From cb578e32ab1851ff40b495108c681a226a2f27de Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Wed, 10 Dec 2025 16:59:47 +0100 Subject: [PATCH 01/10] Java: Move interpretModelForTest into shared code. --- .../code/java/dataflow/ExternalFlow.qll | 14 +- .../internal/ExternalFlowExtensions.qll | 6 + shared/mad/codeql/mad/static/MaD.qll | 124 ++++++++++++++++++ 3 files changed, 138 insertions(+), 6 deletions(-) create mode 100644 shared/mad/codeql/mad/static/MaD.qll diff --git a/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll b/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll index e612239a76ec..372b3a22d107 100644 --- a/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll +++ b/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll @@ -98,8 +98,13 @@ private import internal.FlowSummaryImpl private import internal.FlowSummaryImpl::Public private import internal.FlowSummaryImpl::Private private import internal.FlowSummaryImpl::Private::External -private import internal.ExternalFlowExtensions as Extensions +private import internal.ExternalFlowExtensions private import codeql.mad.ModelValidation as SharedModelVal +private import codeql.mad.static.MaD as SharedMaD + +private module MaD = SharedMaD::ModelsAsData; + +import MaD /** * A class for activating additional model rows. @@ -214,11 +219,12 @@ predicate summaryModel( * This predicate should only be used in tests. */ predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) { + MaD::interpretModelForTest(madId, model) + or exists( string package, string type, boolean subtypes, string name, string signature, string ext, string output, string kind, string provenance | - sourceModel(package, type, subtypes, name, signature, ext, output, kind, provenance, madId) or Extensions::experimentalSourceModel(package, type, subtypes, name, signature, ext, output, kind, provenance, _, madId) | @@ -231,7 +237,6 @@ predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) { string package, string type, boolean subtypes, string name, string signature, string ext, string input, string kind, string provenance | - sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance, madId) or Extensions::experimentalSinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance, _, madId) | @@ -244,8 +249,6 @@ predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) { string package, string type, boolean subtypes, string name, string signature, string ext, string input, string output, string kind, string provenance | - summaryModel(package, type, subtypes, name, signature, ext, input, output, kind, provenance, - madId) or Extensions::experimentalSummaryModel(package, type, subtypes, name, signature, ext, input, output, kind, provenance, _, madId) | @@ -253,7 +256,6 @@ predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) { "Summary: " + package + "; " + type + "; " + subtypes + "; " + name + "; " + signature + "; " + ext + "; " + input + "; " + output + "; " + kind + "; " + provenance ) - //TODO: possibly barrier models? } /** Holds if a neutral model exists for the given parameters. */ diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/ExternalFlowExtensions.qll b/java/ql/lib/semmle/code/java/dataflow/internal/ExternalFlowExtensions.qll index 5386d916e240..c01766c317a2 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/ExternalFlowExtensions.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/ExternalFlowExtensions.qll @@ -4,6 +4,8 @@ overlay[local?] module; +private import codeql.mad.static.MaD as SharedMaD + /** * Holds if a source model exists for the given parameters. */ @@ -93,3 +95,7 @@ extensible predicate experimentalSummaryModel( string input, string output, string kind, string provenance, string filter, QlBuiltins::ExtensionId madId ); + +module Extensions implements SharedMaD::ExtensionsSig { + import ExternalFlowExtensions +} diff --git a/shared/mad/codeql/mad/static/MaD.qll b/shared/mad/codeql/mad/static/MaD.qll new file mode 100644 index 000000000000..5d58b74fe4ef --- /dev/null +++ b/shared/mad/codeql/mad/static/MaD.qll @@ -0,0 +1,124 @@ +overlay[local?] +module; + +signature module ExtensionsSig { + /** + * Holds if a source model exists for the given parameters. + */ + predicate sourceModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance, QlBuiltins::ExtensionId madId + ); + + /** + * Holds if a sink model exists for the given parameters. + */ + predicate sinkModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string kind, string provenance, QlBuiltins::ExtensionId madId + ); + + /** + * Holds if a barrier model exists for the given parameters. + */ + predicate barrierModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance, QlBuiltins::ExtensionId madId + ); + + /** + * Holds if a barrier guard model exists for the given parameters. + */ + predicate barrierGuardModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string acceptingvalue, string kind, string provenance, + QlBuiltins::ExtensionId madId + ); + + /** + * Holds if a summary model exists for the given parameters. + */ + predicate summaryModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string output, string kind, string provenance, QlBuiltins::ExtensionId madId + ); + + /** + * Holds if a neutral model exists for the given parameters. + */ + predicate neutralModel( + string namespace, string type, string name, string signature, string kind, string provenance + ); +} + +module ModelsAsData { + /** + * Holds if the given extension tuple `madId` should pretty-print as `model`. + * + * Barrier models are included for completeness even though they will not show up in a path. + * + * This predicate should only be used in tests. + */ + predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) { + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance + | + Extensions::sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, + provenance, madId) + | + model = + "Source: " + namespace + "; " + type + "; " + subtypes + "; " + name + "; " + signature + + "; " + ext + "; " + output + "; " + kind + "; " + provenance + ) + or + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string kind, string provenance + | + Extensions::sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, + provenance, madId) + | + model = + "Sink: " + namespace + "; " + type + "; " + subtypes + "; " + name + "; " + signature + "; " + + ext + "; " + input + "; " + kind + "; " + provenance + ) + or + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance + | + Extensions::barrierModel(namespace, type, subtypes, name, signature, ext, output, kind, + provenance, madId) + | + model = + "Barrier: " + namespace + "; " + type + "; " + subtypes + "; " + name + "; " + signature + + "; " + ext + "; " + output + "; " + kind + "; " + provenance + ) + or + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string acceptingvalue, string kind, string provenance + | + Extensions::barrierGuardModel(namespace, type, subtypes, name, signature, ext, input, + acceptingvalue, kind, provenance, madId) + | + model = + "Barrier Guard: " + namespace + "; " + type + "; " + subtypes + "; " + name + "; " + + signature + "; " + ext + "; " + input + "; " + acceptingvalue + "; " + kind + "; " + + provenance + ) + or + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string output, string kind, string provenance + | + Extensions::summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, + provenance, madId) + | + model = + "Summary: " + namespace + "; " + type + "; " + subtypes + "; " + name + "; " + signature + + "; " + ext + "; " + input + "; " + output + "; " + kind + "; " + provenance + ) + } +} From f0e7f1af2c22c0f79992bf4dbebe5e05911b52ad Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 11 Dec 2025 08:14:37 +0100 Subject: [PATCH 02/10] C++/C#/Go: Align ExternalFlowExtensions with Java. --- .../internal/ExternalFlowExtensions.qll | 29 +++++++++++++++++++ .../internal/ExternalFlowExtensions.qll | 22 ++++++++++++++ .../internal/ExternalFlowExtensions.qll | 22 ++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll index cd1af34c8d8a..165970206203 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll @@ -2,6 +2,8 @@ * This module provides extensible predicates for defining MaD models. */ +private import codeql.mad.static.MaD as SharedMaD + /** * Holds if an external source model exists for the given parameters. */ @@ -18,6 +20,22 @@ extensible predicate sinkModel( string input, string kind, string provenance, QlBuiltins::ExtensionId madId ); +/** + * Holds if a barrier model exists for the given parameters. + */ +extensible predicate barrierModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance, QlBuiltins::ExtensionId madId +); + +/** + * Holds if a barrier guard model exists for the given parameters. + */ +extensible predicate barrierGuardModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string acceptingvalue, string kind, string provenance, QlBuiltins::ExtensionId madId +); + /** * Holds if an external summary model exists for the given parameters. */ @@ -25,3 +43,14 @@ extensible predicate summaryModel( string namespace, string type, boolean subtypes, string name, string signature, string ext, string input, string output, string kind, string provenance, QlBuiltins::ExtensionId madId ); + +/** + * Holds if a neutral model exists for the given parameters. + */ +extensible predicate neutralModel( + string namespace, string type, string name, string signature, string kind, string provenance +); + +module Extensions implements SharedMaD::ExtensionsSig { + import ExternalFlowExtensions +} diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlowExtensions.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlowExtensions.qll index f761a0a9f5cd..f845ddf79512 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlowExtensions.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlowExtensions.qll @@ -2,6 +2,8 @@ * This module provides extensible predicates for defining MaD models. */ +private import codeql.mad.static.MaD as SharedMaD + /** * Holds if a source model exists for the given parameters. */ @@ -18,6 +20,22 @@ extensible predicate sinkModel( string input, string kind, string provenance, QlBuiltins::ExtensionId madId ); +/** + * Holds if a barrier model exists for the given parameters. + */ +extensible predicate barrierModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance, QlBuiltins::ExtensionId madId +); + +/** + * Holds if a barrier guard model exists for the given parameters. + */ +extensible predicate barrierGuardModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string acceptingvalue, string kind, string provenance, QlBuiltins::ExtensionId madId +); + /** * Holds if a summary model exists for the given parameters. */ @@ -32,3 +50,7 @@ extensible predicate summaryModel( extensible predicate neutralModel( string namespace, string type, string name, string signature, string kind, string provenance ); + +module Extensions implements SharedMaD::ExtensionsSig { + import ExternalFlowExtensions +} diff --git a/go/ql/lib/semmle/go/dataflow/internal/ExternalFlowExtensions.qll b/go/ql/lib/semmle/go/dataflow/internal/ExternalFlowExtensions.qll index b1e1c906028c..588951944e1b 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/ExternalFlowExtensions.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/ExternalFlowExtensions.qll @@ -2,6 +2,8 @@ * This module provides extensible predicates for defining MaD models. */ +private import codeql.mad.static.MaD as SharedMaD + /** * Holds if a source model exists for the given parameters. */ @@ -18,6 +20,22 @@ extensible predicate sinkModel( string input, string kind, string provenance, QlBuiltins::ExtensionId madId ); +/** + * Holds if a barrier model exists for the given parameters. + */ +extensible predicate barrierModel( + string package, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance, QlBuiltins::ExtensionId madId +); + +/** + * Holds if a barrier guard model exists for the given parameters. + */ +extensible predicate barrierGuardModel( + string package, string type, boolean subtypes, string name, string signature, string ext, + string input, string acceptingvalue, string kind, string provenance, QlBuiltins::ExtensionId madId +); + /** * Holds if a summary model exists for the given parameters. */ @@ -37,3 +55,7 @@ extensible predicate neutralModel( * Holds if the package `package` is part of the group `group`. */ extensible predicate packageGrouping(string group, string package); + +module Extensions implements SharedMaD::ExtensionsSig { + import ExternalFlowExtensions +} From 0915db4f6bdcffc6a9f311f63af95bd23d52f499 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 11 Dec 2025 08:20:13 +0100 Subject: [PATCH 03/10] C++/C#/Go: Use shared interpretModelForTest. --- .../semmle/code/cpp/dataflow/ExternalFlow.qll | 50 +++---------------- .../csharp/dataflow/internal/ExternalFlow.qll | 41 ++------------- go/ql/lib/semmle/go/dataflow/ExternalFlow.qll | 49 +++--------------- 3 files changed, 18 insertions(+), 122 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll index 69e32d23ec1c..b31578492bbb 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll @@ -101,9 +101,14 @@ private import internal.FlowSummaryImpl private import internal.FlowSummaryImpl::Public private import internal.FlowSummaryImpl::Private private import internal.FlowSummaryImpl::Private::External -private import internal.ExternalFlowExtensions as Extensions +private import internal.ExternalFlowExtensions private import codeql.mad.ModelValidation as SharedModelVal private import codeql.util.Unit +private import codeql.mad.static.MaD as SharedMaD + +private module MaD = SharedMaD::ModelsAsData; + +import MaD /** * A unit class for adding additional source model rows. @@ -230,49 +235,6 @@ private predicate summaryModel0( ) } -/** - * Holds if the given extension tuple `madId` should pretty-print as `model`. - * - * This predicate should only be used in tests. - */ -predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) { - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - string output, string kind, string provenance - | - Extensions::sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, - provenance, madId) - | - model = - "Source: " + namespace + "; " + type + "; " + subtypes + "; " + name + "; " + signature + "; " - + ext + "; " + output + "; " + kind + "; " + provenance - ) - or - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - string input, string kind, string provenance - | - Extensions::sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance, - madId) - | - model = - "Sink: " + namespace + "; " + type + "; " + subtypes + "; " + name + "; " + signature + "; " + - ext + "; " + input + "; " + kind + "; " + provenance - ) - or - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - string input, string output, string kind, string provenance - | - Extensions::summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, - provenance, madId) - | - model = - "Summary: " + namespace + "; " + type + "; " + subtypes + "; " + name + "; " + signature + - "; " + ext + "; " + input + "; " + output + "; " + kind + "; " + provenance - ) -} - /** * Holds if `input` is `input0`, but with all occurrences of `@` replaced * by `n` repetitions of `*` (and similarly for `output` and `output0`). diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll index 87b28b76e99a..04933e112824 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll @@ -101,44 +101,11 @@ private import semmle.code.csharp.dispatch.OverridableCallable private import semmle.code.csharp.frameworks.System private import codeql.dataflow.internal.AccessPathSyntax as AccessPathSyntax private import codeql.mad.ModelValidation as SharedModelVal +private import codeql.mad.static.MaD as SharedMaD -/** - * Holds if the given extension tuple `madId` should pretty-print as `model`. - * - * This predicate should only be used in tests. - */ -predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) { - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - string output, string kind, string provenance - | - sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance, madId) and - model = - "Source: " + namespace + "; " + type + "; " + subtypes + "; " + name + "; " + signature + "; " - + ext + "; " + output + "; " + kind + "; " + provenance - ) - or - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - string input, string kind, string provenance - | - sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance, madId) and - model = - "Sink: " + namespace + "; " + type + "; " + subtypes + "; " + name + "; " + signature + "; " + - ext + "; " + input + "; " + kind + "; " + provenance - ) - or - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - string input, string output, string kind, string provenance - | - summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance, - madId) and - model = - "Summary: " + namespace + "; " + type + "; " + subtypes + "; " + name + "; " + signature + - "; " + ext + "; " + input + "; " + output + "; " + kind + "; " + provenance - ) -} +private module MaD = SharedMaD::ModelsAsData; + +import MaD private predicate relevantNamespace(string namespace) { sourceModel(namespace, _, _, _, _, _, _, _, _, _) or diff --git a/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll b/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll index 3228f4248859..f09d6b15006c 100644 --- a/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll +++ b/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll @@ -86,7 +86,7 @@ */ private import go -import internal.ExternalFlowExtensions as FlowExtensions +private import internal.ExternalFlowExtensions private import FlowSummary as FlowSummary private import internal.DataFlowPrivate private import internal.FlowSummaryImpl @@ -94,6 +94,13 @@ private import internal.FlowSummaryImpl::Public as Public private import internal.FlowSummaryImpl::Private private import internal.FlowSummaryImpl::Private::External private import codeql.mad.ModelValidation as SharedModelVal +private import codeql.mad.static.MaD as SharedMaD + +private module MaD = SharedMaD::ModelsAsData; + +import MaD + +module FlowExtensions = Extensions; /** Gets the prefix for a group of packages. */ private string groupPrefix() { result = "group:" } @@ -178,46 +185,6 @@ predicate neutralModel( ) } -/** - * Holds if the given extension tuple `madId` should pretty-print as `model`. - * - * This predicate should only be used in tests. - */ -predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) { - exists( - string package, string type, boolean subtypes, string name, string signature, string ext, - string output, string kind, string provenance - | - FlowExtensions::sourceModel(package, type, subtypes, name, signature, ext, output, kind, - provenance, madId) and - model = - "Source: " + package + "; " + type + "; " + subtypes + "; " + name + "; " + signature + "; " + - ext + "; " + output + "; " + kind + "; " + provenance - ) - or - exists( - string package, string type, boolean subtypes, string name, string signature, string ext, - string input, string kind, string provenance - | - FlowExtensions::sinkModel(package, type, subtypes, name, signature, ext, input, kind, - provenance, madId) and - model = - "Sink: " + package + "; " + type + "; " + subtypes + "; " + name + "; " + signature + "; " + - ext + "; " + input + "; " + kind + "; " + provenance - ) - or - exists( - string package, string type, boolean subtypes, string name, string signature, string ext, - string input, string output, string kind, string provenance - | - FlowExtensions::summaryModel(package, type, subtypes, name, signature, ext, input, output, kind, - provenance, madId) and - model = - "Summary: " + package + "; " + type + "; " + subtypes + "; " + name + "; " + signature + "; " + - ext + "; " + input + "; " + output + "; " + kind + "; " + provenance - ) -} - bindingset[p] private string cleanPackage(string p) { exists(string noPrefix | From 3b334ea2158cce13da04a816d5e705ea6853f37d Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 11 Dec 2025 08:44:09 +0100 Subject: [PATCH 04/10] Java/C#: Share model coverage code. --- .../csharp/dataflow/internal/ExternalFlow.qll | 56 ------------------ .../code/java/dataflow/ExternalFlow.qll | 57 ------------------ shared/mad/codeql/mad/static/MaD.qll | 59 +++++++++++++++++++ 3 files changed, 59 insertions(+), 113 deletions(-) diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll index 04933e112824..5b5d3f329dfe 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll @@ -107,62 +107,6 @@ private module MaD = SharedMaD::ModelsAsData; import MaD -private predicate relevantNamespace(string namespace) { - sourceModel(namespace, _, _, _, _, _, _, _, _, _) or - sinkModel(namespace, _, _, _, _, _, _, _, _, _) or - summaryModel(namespace, _, _, _, _, _, _, _, _, _, _) -} - -private predicate namespaceLink(string shortns, string longns) { - relevantNamespace(shortns) and - relevantNamespace(longns) and - longns.prefix(longns.indexOf(".")) = shortns -} - -private predicate canonicalNamespace(string namespace) { - relevantNamespace(namespace) and not namespaceLink(_, namespace) -} - -private predicate canonicalNamespaceLink(string namespace, string subns) { - canonicalNamespace(namespace) and - (subns = namespace or namespaceLink(namespace, subns)) -} - -/** - * Holds if MaD framework coverage of `namespace` is `n` api endpoints of the - * kind `(kind, part)`, and `namespaces` is the number of subnamespaces of - * `namespace` which have MaD framework coverage (including `namespace` - * itself). - */ -predicate modelCoverage(string namespace, int namespaces, string kind, string part, int n) { - namespaces = strictcount(string subns | canonicalNamespaceLink(namespace, subns)) and - ( - part = "source" and - n = - strictcount(string subns, string type, boolean subtypes, string name, string signature, - string ext, string output, string provenance | - canonicalNamespaceLink(namespace, subns) and - sourceModel(subns, type, subtypes, name, signature, ext, output, kind, provenance, _) - ) - or - part = "sink" and - n = - strictcount(string subns, string type, boolean subtypes, string name, string signature, - string ext, string input, string provenance | - canonicalNamespaceLink(namespace, subns) and - sinkModel(subns, type, subtypes, name, signature, ext, input, kind, provenance, _) - ) - or - part = "summary" and - n = - strictcount(string subns, string type, boolean subtypes, string name, string signature, - string ext, string input, string output, string provenance | - canonicalNamespaceLink(namespace, subns) and - summaryModel(subns, type, subtypes, name, signature, ext, input, output, kind, provenance, _) - ) - ) -} - /** Provides a query predicate to check the MaD models for validation errors. */ module ModelValidation { private predicate getRelevantAccessPath(string path) { diff --git a/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll b/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll index 372b3a22d107..ca02010dcecb 100644 --- a/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll +++ b/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll @@ -261,63 +261,6 @@ predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) { /** Holds if a neutral model exists for the given parameters. */ predicate neutralModel = Extensions::neutralModel/6; -private predicate relevantPackage(string package) { - sourceModel(package, _, _, _, _, _, _, _, _, _) or - sinkModel(package, _, _, _, _, _, _, _, _, _) or - summaryModel(package, _, _, _, _, _, _, _, _, _, _) -} - -private predicate packageLink(string shortpkg, string longpkg) { - relevantPackage(shortpkg) and - relevantPackage(longpkg) and - longpkg.prefix(longpkg.indexOf(".")) = shortpkg -} - -private predicate canonicalPackage(string package) { - relevantPackage(package) and not packageLink(_, package) -} - -private predicate canonicalPkgLink(string package, string subpkg) { - canonicalPackage(package) and - (subpkg = package or packageLink(package, subpkg)) -} - -/** - * Holds if MaD framework coverage of `package` is `n` api endpoints of the - * kind `(kind, part)`, and `pkgs` is the number of subpackages of `package` - * which have MaD framework coverage (including `package` itself). - */ -predicate modelCoverage(string package, int pkgs, string kind, string part, int n) { - pkgs = strictcount(string subpkg | canonicalPkgLink(package, subpkg)) and - ( - part = "source" and - n = - strictcount(string subpkg, string type, boolean subtypes, string name, string signature, - string ext, string output, string provenance | - canonicalPkgLink(package, subpkg) and - sourceModel(subpkg, type, subtypes, name, signature, ext, output, kind, provenance, _) - ) - or - part = "sink" and - n = - strictcount(string subpkg, string type, boolean subtypes, string name, string signature, - string ext, string input, string provenance | - canonicalPkgLink(package, subpkg) and - sinkModel(subpkg, type, subtypes, name, signature, ext, input, kind, provenance, _) - ) - or - part = "summary" and - n = - strictcount(string subpkg, string type, boolean subtypes, string name, string signature, - string ext, string input, string output, string provenance | - canonicalPkgLink(package, subpkg) and - summaryModel(subpkg, type, subtypes, name, signature, ext, input, output, kind, provenance, - _) - ) - // TODO: possibly barrier models? - ) -} - /** Provides a query predicate to check the MaD models for validation errors. */ module ModelValidation { private import codeql.dataflow.internal.AccessPathSyntax as AccessPathSyntax diff --git a/shared/mad/codeql/mad/static/MaD.qll b/shared/mad/codeql/mad/static/MaD.qll index 5d58b74fe4ef..6a8598cf6d9f 100644 --- a/shared/mad/codeql/mad/static/MaD.qll +++ b/shared/mad/codeql/mad/static/MaD.qll @@ -121,4 +121,63 @@ module ModelsAsData { "; " + ext + "; " + input + "; " + output + "; " + kind + "; " + provenance ) } + + private predicate relevantNamespace(string namespace) { + Extensions::sourceModel(namespace, _, _, _, _, _, _, _, _, _) or + Extensions::sinkModel(namespace, _, _, _, _, _, _, _, _, _) or + Extensions::summaryModel(namespace, _, _, _, _, _, _, _, _, _, _) + } + + private predicate namespaceLink(string shortns, string longns) { + relevantNamespace(shortns) and + relevantNamespace(longns) and + longns.prefix(longns.indexOf(".")) = shortns + } + + private predicate canonicalNamespace(string namespace) { + relevantNamespace(namespace) and not namespaceLink(_, namespace) + } + + private predicate canonicalNamespaceLink(string namespace, string subns) { + canonicalNamespace(namespace) and + (subns = namespace or namespaceLink(namespace, subns)) + } + + /** + * Holds if MaD framework coverage of `namespace` is `n` api endpoints of the + * kind `(kind, part)`, and `namespaces` is the number of subnamespaces of + * `namespace` which have MaD framework coverage (including `namespace` + * itself). + */ + predicate modelCoverage(string namespace, int namespaces, string kind, string part, int n) { + namespaces = strictcount(string subns | canonicalNamespaceLink(namespace, subns)) and + ( + part = "source" and + n = + strictcount(string subns, string type, boolean subtypes, string name, string signature, + string ext, string output, string provenance | + canonicalNamespaceLink(namespace, subns) and + Extensions::sourceModel(subns, type, subtypes, name, signature, ext, output, kind, + provenance, _) + ) + or + part = "sink" and + n = + strictcount(string subns, string type, boolean subtypes, string name, string signature, + string ext, string input, string provenance | + canonicalNamespaceLink(namespace, subns) and + Extensions::sinkModel(subns, type, subtypes, name, signature, ext, input, kind, + provenance, _) + ) + or + part = "summary" and + n = + strictcount(string subns, string type, boolean subtypes, string name, string signature, + string ext, string input, string output, string provenance | + canonicalNamespaceLink(namespace, subns) and + Extensions::summaryModel(subns, type, subtypes, name, signature, ext, input, output, kind, + provenance, _) + ) + ) + } } From 47dcf05a3244b066e023117f2f00f5d718de61b3 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 11 Dec 2025 08:59:17 +0100 Subject: [PATCH 05/10] C++/Go/Java: Don't import top-level extensible predicates. --- cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll | 2 +- go/ql/lib/semmle/go/dataflow/ExternalFlow.qll | 2 +- java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll index b31578492bbb..eb401f1c36c3 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll @@ -101,7 +101,7 @@ private import internal.FlowSummaryImpl private import internal.FlowSummaryImpl::Public private import internal.FlowSummaryImpl::Private private import internal.FlowSummaryImpl::Private::External -private import internal.ExternalFlowExtensions +private import internal.ExternalFlowExtensions::Extensions as Extensions private import codeql.mad.ModelValidation as SharedModelVal private import codeql.util.Unit private import codeql.mad.static.MaD as SharedMaD diff --git a/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll b/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll index f09d6b15006c..42b3f472a59a 100644 --- a/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll +++ b/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll @@ -86,7 +86,7 @@ */ private import go -private import internal.ExternalFlowExtensions +private import internal.ExternalFlowExtensions::Extensions as Extensions private import FlowSummary as FlowSummary private import internal.DataFlowPrivate private import internal.FlowSummaryImpl diff --git a/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll b/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll index ca02010dcecb..786f76fc679e 100644 --- a/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll +++ b/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll @@ -98,7 +98,7 @@ private import internal.FlowSummaryImpl private import internal.FlowSummaryImpl::Public private import internal.FlowSummaryImpl::Private private import internal.FlowSummaryImpl::Private::External -private import internal.ExternalFlowExtensions +private import internal.ExternalFlowExtensions::Extensions as Extensions private import codeql.mad.ModelValidation as SharedModelVal private import codeql.mad.static.MaD as SharedMaD From 07252519c8ffbb70c82d5ab631b6dc7fa6c6de14 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 11 Dec 2025 09:59:13 +0100 Subject: [PATCH 06/10] Java/C++: Thread additional models through the shared lib. --- .../semmle/code/cpp/dataflow/ExternalFlow.qll | 158 ++++++++---------- .../csharp/dataflow/internal/ExternalFlow.qll | 20 +-- .../dataflow/internal/FlowSummaryImpl.qll | 12 +- go/ql/lib/semmle/go/dataflow/ExternalFlow.qll | 4 +- .../code/java/dataflow/ExternalFlow.qll | 85 +++++----- .../dataflow/internal/FlowSummaryImpl.qll | 16 +- shared/mad/codeql/mad/static/MaD.qll | 101 +++++++++-- 7 files changed, 226 insertions(+), 170 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll index eb401f1c36c3..cf211b4397d8 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll @@ -106,10 +106,6 @@ private import codeql.mad.ModelValidation as SharedModelVal private import codeql.util.Unit private import codeql.mad.static.MaD as SharedMaD -private module MaD = SharedMaD::ModelsAsData; - -import MaD - /** * A unit class for adding additional source model rows. * @@ -149,92 +145,80 @@ predicate sinkModel(string row) { any(SinkModelCsv s).row(row) } /** Holds if `row` is a summary model. */ predicate summaryModel(string row) { any(SummaryModelCsv s).row(row) } -/** Holds if a source model exists for the given parameters. */ -predicate sourceModel( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - string output, string kind, string provenance, string model -) { - exists(string row | - sourceModel(row) and - row.splitAt(";", 0) = namespace and - row.splitAt(";", 1) = type and - row.splitAt(";", 2) = subtypes.toString() and - subtypes = [true, false] and - row.splitAt(";", 3) = name and - row.splitAt(";", 4) = signature and - row.splitAt(";", 5) = ext and - row.splitAt(";", 6) = output and - row.splitAt(";", 7) = kind - ) and - provenance = "manual" and - model = "" - or - exists(QlBuiltins::ExtensionId madId | - Extensions::sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, - provenance, madId) and - model = "MaD:" + madId.toString() - ) -} +private module MadInput implements SharedMaD::InputSig { + /** Holds if a source model exists for the given parameters. */ + predicate additionalSourceModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance, string model + ) { + exists(string row | + sourceModel(row) and + row.splitAt(";", 0) = namespace and + row.splitAt(";", 1) = type and + row.splitAt(";", 2) = subtypes.toString() and + subtypes = [true, false] and + row.splitAt(";", 3) = name and + row.splitAt(";", 4) = signature and + row.splitAt(";", 5) = ext and + row.splitAt(";", 6) = output and + row.splitAt(";", 7) = kind + ) and + provenance = "manual" and + model = "" + } -/** Holds if a sink model exists for the given parameters. */ -predicate sinkModel( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - string input, string kind, string provenance, string model -) { - exists(string row | - sinkModel(row) and - row.splitAt(";", 0) = namespace and - row.splitAt(";", 1) = type and - row.splitAt(";", 2) = subtypes.toString() and - subtypes = [true, false] and - row.splitAt(";", 3) = name and - row.splitAt(";", 4) = signature and - row.splitAt(";", 5) = ext and - row.splitAt(";", 6) = input and - row.splitAt(";", 7) = kind - ) and - provenance = "manual" and - model = "" - or - exists(QlBuiltins::ExtensionId madId | - Extensions::sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance, - madId) and - model = "MaD:" + madId.toString() - ) -} + /** Holds if a sink model exists for the given parameters. */ + predicate additionalSinkModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string kind, string provenance, string model + ) { + exists(string row | + sinkModel(row) and + row.splitAt(";", 0) = namespace and + row.splitAt(";", 1) = type and + row.splitAt(";", 2) = subtypes.toString() and + subtypes = [true, false] and + row.splitAt(";", 3) = name and + row.splitAt(";", 4) = signature and + row.splitAt(";", 5) = ext and + row.splitAt(";", 6) = input and + row.splitAt(";", 7) = kind + ) and + provenance = "manual" and + model = "" + } -/** - * Holds if a summary model exists for the given parameters. - * - * This predicate does not expand `@` to `*`s. - */ -private predicate summaryModel0( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - string input, string output, string kind, string provenance, string model -) { - exists(string row | - summaryModel(row) and - row.splitAt(";", 0) = namespace and - row.splitAt(";", 1) = type and - row.splitAt(";", 2) = subtypes.toString() and - subtypes = [true, false] and - row.splitAt(";", 3) = name and - row.splitAt(";", 4) = signature and - row.splitAt(";", 5) = ext and - row.splitAt(";", 6) = input and - row.splitAt(";", 7) = output and - row.splitAt(";", 8) = kind - ) and - provenance = "manual" and - model = "" - or - exists(QlBuiltins::ExtensionId madId | - Extensions::summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, - provenance, madId) and - model = "MaD:" + madId.toString() - ) + /** + * Holds if a summary model exists for the given parameters. + * + * This predicate does not expand `@` to `*`s. + */ + predicate additionalSummaryModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string output, string kind, string provenance, string model + ) { + exists(string row | + summaryModel(row) and + row.splitAt(";", 0) = namespace and + row.splitAt(";", 1) = type and + row.splitAt(";", 2) = subtypes.toString() and + subtypes = [true, false] and + row.splitAt(";", 3) = name and + row.splitAt(";", 4) = signature and + row.splitAt(";", 5) = ext and + row.splitAt(";", 6) = input and + row.splitAt(";", 7) = output and + row.splitAt(";", 8) = kind + ) and + provenance = "manual" and + model = "" + } } +private module MaD = SharedMaD::ModelsAsData; + +import MaD + /** * Holds if `input` is `input0`, but with all occurrences of `@` replaced * by `n` repetitions of `*` (and similarly for `output` and `output0`). @@ -256,7 +240,7 @@ predicate summaryModel( string input, string output, string kind, string provenance, string model ) { exists(string input0, string output0 | - summaryModel0(namespace, type, subtypes, name, signature, ext, input0, output0, kind, + MaD::summaryModel(namespace, type, subtypes, name, signature, ext, input0, output0, kind, provenance, model) and expandInputAndOutput(input0, input, output0, output, [0 .. Private::getMaxElementContentIndirectionIndex() - 1]) diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll index 5b5d3f329dfe..a36bfc062970 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll @@ -88,7 +88,7 @@ */ import csharp -import ExternalFlowExtensions +private import ExternalFlowExtensions::Extensions as Extensions private import DataFlowDispatch private import DataFlowPrivate private import DataFlowPublic @@ -103,7 +103,9 @@ private import codeql.dataflow.internal.AccessPathSyntax as AccessPathSyntax private import codeql.mad.ModelValidation as SharedModelVal private import codeql.mad.static.MaD as SharedMaD -private module MaD = SharedMaD::ModelsAsData; +private module MadInput implements SharedMaD::InputSig { } + +private module MaD = SharedMaD::ModelsAsData; import MaD @@ -169,7 +171,7 @@ module ModelValidation { predicate sourceKind(string kind) { sourceModel(_, _, _, _, _, _, _, kind, _, _) } - predicate neutralKind(string kind) { neutralModel(_, _, _, _, kind, _) } + predicate neutralKind(string kind) { Extensions::neutralModel(_, _, _, _, kind, _) } } private module KindVal = SharedModelVal::KindValidation; @@ -186,7 +188,7 @@ module ModelValidation { summaryModel(namespace, type, _, name, signature, ext, _, _, _, provenance, _) and pred = "summary" or - neutralModel(namespace, type, name, signature, _, provenance) and + Extensions::neutralModel(namespace, type, name, signature, _, provenance) and ext = "" and pred = "neutral" | @@ -229,7 +231,7 @@ private predicate elementSpec( or summaryModel(namespace, type, subtypes, name, signature, ext, _, _, _, _, _) or - neutralModel(namespace, type, name, signature, _, _) and ext = "" and subtypes = true + Extensions::neutralModel(namespace, type, name, signature, _, _) and ext = "" and subtypes = true } private predicate elementSpec( @@ -501,19 +503,17 @@ private predicate interpretSummary( UnboundCallable c, string input, string output, string kind, string provenance, string model ) { exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - QlBuiltins::ExtensionId madId + string namespace, string type, boolean subtypes, string name, string signature, string ext | summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance, - madId) and - model = "MaD:" + madId.toString() and + model) and c = interpretElement(namespace, type, subtypes, name, signature, ext) ) } predicate interpretNeutral(UnboundCallable c, string kind, string provenance) { exists(string namespace, string type, string name, string signature | - neutralModel(namespace, type, name, signature, kind, provenance) and + Extensions::neutralModel(namespace, type, name, signature, kind, provenance) and c = interpretElement(namespace, type, true, name, signature, "") ) } diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll index 842c28ac75b0..56278b9ef950 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll @@ -213,11 +213,9 @@ module SourceSinkInterpretationInput implements Element e, string output, string kind, Public::Provenance provenance, string model ) { exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - QlBuiltins::ExtensionId madId + string namespace, string type, boolean subtypes, string name, string signature, string ext | - sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance, madId) and - model = "MaD:" + madId.toString() and + sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance, model) and e = interpretElement(namespace, type, subtypes, name, signature, ext) ) } @@ -226,11 +224,9 @@ module SourceSinkInterpretationInput implements Element e, string input, string kind, Public::Provenance provenance, string model ) { exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - QlBuiltins::ExtensionId madId + string namespace, string type, boolean subtypes, string name, string signature, string ext | - sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance, madId) and - model = "MaD:" + madId.toString() and + sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance, model) and e = interpretElement(namespace, type, subtypes, name, signature, ext) ) } diff --git a/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll b/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll index 42b3f472a59a..dfa8f5bb3a2d 100644 --- a/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll +++ b/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll @@ -96,7 +96,9 @@ private import internal.FlowSummaryImpl::Private::External private import codeql.mad.ModelValidation as SharedModelVal private import codeql.mad.static.MaD as SharedMaD -private module MaD = SharedMaD::ModelsAsData; +private module MadInput implements SharedMaD::InputSig { } + +private module MaD = SharedMaD::ModelsAsData; import MaD diff --git a/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll b/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll index 786f76fc679e..108799aee160 100644 --- a/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll +++ b/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll @@ -102,7 +102,47 @@ private import internal.ExternalFlowExtensions::Extensions as Extensions private import codeql.mad.ModelValidation as SharedModelVal private import codeql.mad.static.MaD as SharedMaD -private module MaD = SharedMaD::ModelsAsData; +private module MadInput implements SharedMaD::InputSig { + /** Holds if a source model exists for the given parameters. */ + predicate additionalSourceModel( + string package, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance, string model + ) { + exists(QlBuiltins::ExtensionId madId | + any(ActiveExperimentalModelsInternal q) + .sourceModel(package, type, subtypes, name, signature, ext, output, kind, provenance, + madId) and + model = "MaD:" + madId.toString() + ) + } + + /** Holds if a sink model exists for the given parameters. */ + predicate additionalSinkModel( + string package, string type, boolean subtypes, string name, string signature, string ext, + string input, string kind, string provenance, string model + ) { + exists(QlBuiltins::ExtensionId madId | + any(ActiveExperimentalModelsInternal q) + .sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance, madId) and + model = "MaD:" + madId.toString() + ) + } + + /** Holds if a summary model exists for the given parameters. */ + predicate additionalSummaryModel( + string package, string type, boolean subtypes, string name, string signature, string ext, + string input, string output, string kind, string provenance, string model + ) { + exists(QlBuiltins::ExtensionId madId | + any(ActiveExperimentalModelsInternal q) + .summaryModel(package, type, subtypes, name, signature, ext, input, output, kind, + provenance, madId) and + model = "MaD:" + madId.toString() + ) + } +} + +private module MaD = SharedMaD::ModelsAsData; import MaD @@ -152,34 +192,6 @@ abstract private class ActiveExperimentalModelsInternal extends string { deprecated class ActiveExperimentalModels = ActiveExperimentalModelsInternal; -/** Holds if a source model exists for the given parameters. */ -predicate sourceModel( - string package, string type, boolean subtypes, string name, string signature, string ext, - string output, string kind, string provenance, QlBuiltins::ExtensionId madId -) { - ( - Extensions::sourceModel(package, type, subtypes, name, signature, ext, output, kind, provenance, - madId) - or - any(ActiveExperimentalModelsInternal q) - .sourceModel(package, type, subtypes, name, signature, ext, output, kind, provenance, madId) - ) -} - -/** Holds if a sink model exists for the given parameters. */ -predicate sinkModel( - string package, string type, boolean subtypes, string name, string signature, string ext, - string input, string kind, string provenance, QlBuiltins::ExtensionId madId -) { - ( - Extensions::sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance, - madId) - or - any(ActiveExperimentalModelsInternal q) - .sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance, madId) - ) -} - /** Holds if a barrier model exists for the given parameters. */ predicate barrierModel( string package, string type, boolean subtypes, string name, string signature, string ext, @@ -198,21 +210,6 @@ predicate barrierGuardModel( acceptingvalue, kind, provenance, madId) } -/** Holds if a summary model exists for the given parameters. */ -predicate summaryModel( - string package, string type, boolean subtypes, string name, string signature, string ext, - string input, string output, string kind, string provenance, QlBuiltins::ExtensionId madId -) { - ( - Extensions::summaryModel(package, type, subtypes, name, signature, ext, input, output, kind, - provenance, madId) - or - any(ActiveExperimentalModelsInternal q) - .summaryModel(package, type, subtypes, name, signature, ext, input, output, kind, - provenance, madId) - ) -} - /** * Holds if the given extension tuple `madId` should pretty-print as `model`. * diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll b/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll index c712e1ae5fb6..4f5de01e3e35 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll @@ -228,11 +228,10 @@ module SourceSinkInterpretationInput implements ) { exists( string namespace, string type, boolean subtypes, string name, string signature, string ext, - SourceOrSinkElement baseSource, string originalOutput, QlBuiltins::ExtensionId madId + SourceOrSinkElement baseSource, string originalOutput | sourceModel(namespace, type, subtypes, name, signature, ext, originalOutput, kind, provenance, - madId) and - model = "MaD:" + madId.toString() and + model) and baseSource = interpretElement(namespace, type, subtypes, name, signature, ext, _) and ( e = baseSource and output = originalOutput @@ -247,11 +246,10 @@ module SourceSinkInterpretationInput implements ) { exists( string namespace, string type, boolean subtypes, string name, string signature, string ext, - SourceOrSinkElement baseSink, string originalInput, QlBuiltins::ExtensionId madId + SourceOrSinkElement baseSink, string originalInput | sinkModel(namespace, type, subtypes, name, signature, ext, originalInput, kind, provenance, - madId) and - model = "MaD:" + madId.toString() and + model) and baseSink = interpretElement(namespace, type, subtypes, name, signature, ext, _) and ( e = baseSink and originalInput = input @@ -384,12 +382,10 @@ module Private { ) { exists( string namespace, string type, boolean subtypes, string name, string signature, string ext, - string originalInput, string originalOutput, Callable baseCallable, - QlBuiltins::ExtensionId madId + string originalInput, string originalOutput, Callable baseCallable | summaryModel(namespace, type, subtypes, name, signature, ext, originalInput, originalOutput, - kind, provenance, madId) and - model = "MaD:" + madId.toString() and + kind, provenance, model) and baseCallable = interpretElement(namespace, type, subtypes, name, signature, ext, isExact) and ( c.asCallable() = baseCallable and input = originalInput and output = originalOutput diff --git a/shared/mad/codeql/mad/static/MaD.qll b/shared/mad/codeql/mad/static/MaD.qll index 6a8598cf6d9f..c466bd5487dc 100644 --- a/shared/mad/codeql/mad/static/MaD.qll +++ b/shared/mad/codeql/mad/static/MaD.qll @@ -51,7 +51,39 @@ signature module ExtensionsSig { ); } -module ModelsAsData { +signature module InputSig { + /** + * Holds if a source model exists for the given parameters. + */ + default predicate additionalSourceModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance, string model + ) { + none() + } + + /** + * Holds if a sink model exists for the given parameters. + */ + default predicate additionalSinkModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string kind, string provenance, string model + ) { + none() + } + + /** + * Holds if a summary model exists for the given parameters. + */ + default predicate additionalSummaryModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string output, string kind, string provenance, string model + ) { + none() + } +} + +module ModelsAsData { /** * Holds if the given extension tuple `madId` should pretty-print as `model`. * @@ -122,10 +154,61 @@ module ModelsAsData { ) } + /** + * Holds if a source model exists for the given parameters. + */ + predicate sourceModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance, string model + ) { + exists(QlBuiltins::ExtensionId madId | + Extensions::sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, + provenance, madId) and + model = "MaD:" + madId.toString() + ) + or + Input::additionalSourceModel(namespace, type, subtypes, name, signature, ext, output, kind, + provenance, model) + } + + /** + * Holds if a sink model exists for the given parameters. + */ + predicate sinkModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string kind, string provenance, string model + ) { + exists(QlBuiltins::ExtensionId madId | + Extensions::sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, + provenance, madId) and + model = "MaD:" + madId.toString() + ) + or + Input::additionalSinkModel(namespace, type, subtypes, name, signature, ext, input, kind, + provenance, model) + } + + /** + * Holds if a summary model exists for the given parameters. + */ + predicate summaryModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string output, string kind, string provenance, string model + ) { + exists(QlBuiltins::ExtensionId madId | + Extensions::summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, + provenance, madId) and + model = "MaD:" + madId.toString() + ) + or + Input::additionalSummaryModel(namespace, type, subtypes, name, signature, ext, input, output, + kind, provenance, model) + } + private predicate relevantNamespace(string namespace) { - Extensions::sourceModel(namespace, _, _, _, _, _, _, _, _, _) or - Extensions::sinkModel(namespace, _, _, _, _, _, _, _, _, _) or - Extensions::summaryModel(namespace, _, _, _, _, _, _, _, _, _, _) + sourceModel(namespace, _, _, _, _, _, _, _, _, _) or + sinkModel(namespace, _, _, _, _, _, _, _, _, _) or + summaryModel(namespace, _, _, _, _, _, _, _, _, _, _) } private predicate namespaceLink(string shortns, string longns) { @@ -157,8 +240,7 @@ module ModelsAsData { strictcount(string subns, string type, boolean subtypes, string name, string signature, string ext, string output, string provenance | canonicalNamespaceLink(namespace, subns) and - Extensions::sourceModel(subns, type, subtypes, name, signature, ext, output, kind, - provenance, _) + sourceModel(subns, type, subtypes, name, signature, ext, output, kind, provenance, _) ) or part = "sink" and @@ -166,8 +248,7 @@ module ModelsAsData { strictcount(string subns, string type, boolean subtypes, string name, string signature, string ext, string input, string provenance | canonicalNamespaceLink(namespace, subns) and - Extensions::sinkModel(subns, type, subtypes, name, signature, ext, input, kind, - provenance, _) + sinkModel(subns, type, subtypes, name, signature, ext, input, kind, provenance, _) ) or part = "summary" and @@ -175,8 +256,8 @@ module ModelsAsData { strictcount(string subns, string type, boolean subtypes, string name, string signature, string ext, string input, string output, string provenance | canonicalNamespaceLink(namespace, subns) and - Extensions::summaryModel(subns, type, subtypes, name, signature, ext, input, output, kind, - provenance, _) + summaryModel(subns, type, subtypes, name, signature, ext, input, output, kind, provenance, + _) ) ) } From e2624385571632c9bdcd600adeb7d58961cac821 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 11 Dec 2025 10:14:41 +0100 Subject: [PATCH 07/10] C++: Use shared model coverage code. --- .../semmle/code/cpp/dataflow/ExternalFlow.qll | 58 +------------------ shared/mad/codeql/mad/static/MaD.qll | 5 +- 2 files changed, 6 insertions(+), 57 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll index cf211b4397d8..08e4a073ddb9 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll @@ -213,6 +213,8 @@ private module MadInput implements SharedMaD::InputSig { provenance = "manual" and model = "" } + + string namespaceSegmentSeparator() { result = "::" } } private module MaD = SharedMaD::ModelsAsData; @@ -247,62 +249,6 @@ predicate summaryModel( ) } -private predicate relevantNamespace(string namespace) { - sourceModel(namespace, _, _, _, _, _, _, _, _, _) or - sinkModel(namespace, _, _, _, _, _, _, _, _, _) or - summaryModel(namespace, _, _, _, _, _, _, _, _, _, _) -} - -private predicate namespaceLink(string shortns, string longns) { - relevantNamespace(shortns) and - relevantNamespace(longns) and - longns.prefix(longns.indexOf("::")) = shortns -} - -private predicate canonicalNamespace(string namespace) { - relevantNamespace(namespace) and not namespaceLink(_, namespace) -} - -private predicate canonicalNamespaceLink(string namespace, string subns) { - canonicalNamespace(namespace) and - (subns = namespace or namespaceLink(namespace, subns)) -} - -/** - * Holds if MaD framework coverage of `namespace` is `n` api endpoints of the - * kind `(kind, part)`, and `namespaces` is the number of subnamespaces of - * `namespace` which have MaD framework coverage (including `namespace` - * itself). - */ -predicate modelCoverage(string namespace, int namespaces, string kind, string part, int n) { - namespaces = strictcount(string subns | canonicalNamespaceLink(namespace, subns)) and - ( - part = "source" and - n = - strictcount(string subns, string type, boolean subtypes, string name, string signature, - string ext, string output, string provenance, string model | - canonicalNamespaceLink(namespace, subns) and - sourceModel(subns, type, subtypes, name, signature, ext, output, kind, provenance, model) - ) - or - part = "sink" and - n = - strictcount(string subns, string type, boolean subtypes, string name, string signature, - string ext, string input, string provenance, string model | - canonicalNamespaceLink(namespace, subns) and - sinkModel(subns, type, subtypes, name, signature, ext, input, kind, provenance, model) - ) - or - part = "summary" and - n = - strictcount(string subns, string type, boolean subtypes, string name, string signature, - string ext, string input, string output, string provenance | - canonicalNamespaceLink(namespace, subns) and - summaryModel(subns, type, subtypes, name, signature, ext, input, output, kind, provenance, _) - ) - ) -} - /** Provides a query predicate to check the CSV data for validation errors. */ module CsvValidation { private string getInvalidModelInput() { diff --git a/shared/mad/codeql/mad/static/MaD.qll b/shared/mad/codeql/mad/static/MaD.qll index c466bd5487dc..76d4fa484a62 100644 --- a/shared/mad/codeql/mad/static/MaD.qll +++ b/shared/mad/codeql/mad/static/MaD.qll @@ -81,6 +81,9 @@ signature module InputSig { ) { none() } + + /** Get the separator used between namespace segments. */ + default string namespaceSegmentSeparator() { result = "." } } module ModelsAsData { @@ -214,7 +217,7 @@ module ModelsAsData { private predicate namespaceLink(string shortns, string longns) { relevantNamespace(shortns) and relevantNamespace(longns) and - longns.prefix(longns.indexOf(".")) = shortns + longns.prefix(longns.indexOf(Input::namespaceSegmentSeparator())) = shortns } private predicate canonicalNamespace(string namespace) { From 5bddc8d2892fc5140d93e02ee90e7cd2bc56162e Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 11 Dec 2025 10:35:39 +0100 Subject: [PATCH 08/10] Go: Move Go package-grouping support into shared lib. --- .../internal/ExternalFlowExtensions.qll | 2 + .../internal/ExternalFlowExtensions.qll | 2 + go/ql/lib/semmle/go/dataflow/ExternalFlow.qll | 80 ------------- .../go/dataflow/internal/FlowSummaryImpl.qll | 18 +-- .../code/java/dataflow/ExternalFlow.qll | 21 ---- .../internal/ExternalFlowExtensions.qll | 2 + .../dataflow/internal/FlowSummaryImpl.qll | 10 +- shared/mad/codeql/mad/static/MaD.qll | 107 ++++++++++++++---- 8 files changed, 104 insertions(+), 138 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll index 165970206203..d128feffc209 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll @@ -53,4 +53,6 @@ extensible predicate neutralModel( module Extensions implements SharedMaD::ExtensionsSig { import ExternalFlowExtensions + + predicate packageGrouping(string group, string package) { none() } } diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlowExtensions.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlowExtensions.qll index f845ddf79512..acbb651bcc4b 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlowExtensions.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlowExtensions.qll @@ -53,4 +53,6 @@ extensible predicate neutralModel( module Extensions implements SharedMaD::ExtensionsSig { import ExternalFlowExtensions + + predicate packageGrouping(string group, string package) { none() } } diff --git a/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll b/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll index dfa8f5bb3a2d..544d73fef26b 100644 --- a/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll +++ b/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll @@ -107,86 +107,6 @@ module FlowExtensions = Extensions; /** Gets the prefix for a group of packages. */ private string groupPrefix() { result = "group:" } -/** - * Gets a package represented by `packageOrGroup`. - * - * If `packageOrGroup` is of the form `group:` then `result` is a - * package in the group ``, as determined by `packageGrouping`. - * Otherwise, `result` is `packageOrGroup`. - */ -bindingset[packageOrGroup] -private string getPackage(string packageOrGroup) { - not exists(string group | packageOrGroup = groupPrefix() + group) and result = packageOrGroup - or - exists(string group | - FlowExtensions::packageGrouping(group, result) and - packageOrGroup = groupPrefix() + group - ) -} - -/** - * Holds if a source model exists for the given parameters. - * - * Note that `group:` references are expanded into one or more actual packages - * by this predicate. - */ -predicate sourceModel( - string package, string type, boolean subtypes, string name, string signature, string ext, - string output, string kind, string provenance, QlBuiltins::ExtensionId madId -) { - exists(string packageOrGroup | - package = getPackage(packageOrGroup) and - FlowExtensions::sourceModel(packageOrGroup, type, subtypes, name, signature, ext, output, kind, - provenance, madId) - ) -} - -/** - * Holds if a sink model exists for the given parameters. - * - * Note that `group:` references are expanded into one or more actual packages - * by this predicate. - */ -predicate sinkModel( - string package, string type, boolean subtypes, string name, string signature, string ext, - string input, string kind, string provenance, QlBuiltins::ExtensionId madId -) { - exists(string packageOrGroup | package = getPackage(packageOrGroup) | - FlowExtensions::sinkModel(packageOrGroup, type, subtypes, name, signature, ext, input, kind, - provenance, madId) - ) -} - -/** - * Holds if a summary model exists for the given parameters. - * - * Note that `group:` references are expanded into one or more actual packages - * by this predicate. - */ -predicate summaryModel( - string package, string type, boolean subtypes, string name, string signature, string ext, - string input, string output, string kind, string provenance, QlBuiltins::ExtensionId madId -) { - exists(string packageOrGroup | package = getPackage(packageOrGroup) | - FlowExtensions::summaryModel(packageOrGroup, type, subtypes, name, signature, ext, input, - output, kind, provenance, madId) - ) -} - -/** - * Holds if a neutral model exists for the given parameters. - * - * Note that `group:` references are expanded into one or more actual packages - * by this predicate. - */ -predicate neutralModel( - string package, string type, string name, string signature, string kind, string provenance -) { - exists(string packageOrGroup | package = getPackage(packageOrGroup) | - FlowExtensions::neutralModel(packageOrGroup, type, name, signature, kind, provenance) - ) -} - bindingset[p] private string cleanPackage(string p) { exists(string noPrefix | diff --git a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll index 41ded04634be..39af0cef1ad0 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll @@ -137,11 +137,9 @@ module SourceSinkInterpretationInput implements SourceOrSinkElement e, string output, string kind, Public::Provenance provenance, string model ) { exists( - string package, string type, boolean subtypes, string name, string signature, string ext, - QlBuiltins::ExtensionId madId + string package, string type, boolean subtypes, string name, string signature, string ext | - sourceModel(package, type, subtypes, name, signature, ext, output, kind, provenance, madId) and - model = "MaD:" + madId.toString() and + sourceModel(package, type, subtypes, name, signature, ext, output, kind, provenance, model) and e = interpretElement(package, type, subtypes, name, signature, ext) ) } @@ -154,11 +152,9 @@ module SourceSinkInterpretationInput implements SourceOrSinkElement e, string input, string kind, Public::Provenance provenance, string model ) { exists( - string package, string type, boolean subtypes, string name, string signature, string ext, - QlBuiltins::ExtensionId madId + string package, string type, boolean subtypes, string name, string signature, string ext | - sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance, madId) and - model = "MaD:" + madId.toString() and + sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance, model) and e = interpretElement(package, type, subtypes, name, signature, ext) ) } @@ -504,12 +500,10 @@ module Private { string model ) { exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - QlBuiltins::ExtensionId madId + string namespace, string type, boolean subtypes, string name, string signature, string ext | summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, - provenance, madId) and - model = "MaD:" + madId.toString() and + provenance, model) and c.asFunction() = interpretElement(namespace, type, subtypes, name, signature, ext).asEntity() ) diff --git a/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll b/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll index 108799aee160..1f8ae76ed63b 100644 --- a/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll +++ b/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll @@ -192,24 +192,6 @@ abstract private class ActiveExperimentalModelsInternal extends string { deprecated class ActiveExperimentalModels = ActiveExperimentalModelsInternal; -/** Holds if a barrier model exists for the given parameters. */ -predicate barrierModel( - string package, string type, boolean subtypes, string name, string signature, string ext, - string output, string kind, string provenance, QlBuiltins::ExtensionId madId -) { - Extensions::barrierModel(package, type, subtypes, name, signature, ext, output, kind, provenance, - madId) -} - -/** Holds if a barrier guard model exists for the given parameters. */ -predicate barrierGuardModel( - string package, string type, boolean subtypes, string name, string signature, string ext, - string input, string acceptingvalue, string kind, string provenance, QlBuiltins::ExtensionId madId -) { - Extensions::barrierGuardModel(package, type, subtypes, name, signature, ext, input, - acceptingvalue, kind, provenance, madId) -} - /** * Holds if the given extension tuple `madId` should pretty-print as `model`. * @@ -255,9 +237,6 @@ predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) { ) } -/** Holds if a neutral model exists for the given parameters. */ -predicate neutralModel = Extensions::neutralModel/6; - /** Provides a query predicate to check the MaD models for validation errors. */ module ModelValidation { private import codeql.dataflow.internal.AccessPathSyntax as AccessPathSyntax diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/ExternalFlowExtensions.qll b/java/ql/lib/semmle/code/java/dataflow/internal/ExternalFlowExtensions.qll index c01766c317a2..946872ab3847 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/ExternalFlowExtensions.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/ExternalFlowExtensions.qll @@ -98,4 +98,6 @@ extensible predicate experimentalSummaryModel( module Extensions implements SharedMaD::ExtensionsSig { import ExternalFlowExtensions + + predicate packageGrouping(string group, string package) { none() } } diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll b/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll index 4f5de01e3e35..b9d8f58cecbc 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll @@ -264,11 +264,10 @@ module SourceSinkInterpretationInput implements ) { exists( string namespace, string type, boolean subtypes, string name, string signature, string ext, - SourceOrSinkElement baseBarrier, string originalOutput, QlBuiltins::ExtensionId madId + SourceOrSinkElement baseBarrier, string originalOutput | barrierModel(namespace, type, subtypes, name, signature, ext, originalOutput, kind, - provenance, madId) and - model = "MaD:" + madId.toString() and + provenance, model) and baseBarrier = interpretElement(namespace, type, subtypes, name, signature, ext, _) and ( e = baseBarrier and output = originalOutput @@ -284,11 +283,10 @@ module SourceSinkInterpretationInput implements ) { exists( string namespace, string type, boolean subtypes, string name, string signature, string ext, - SourceOrSinkElement baseBarrier, string originalInput, QlBuiltins::ExtensionId madId + SourceOrSinkElement baseBarrier, string originalInput | barrierGuardModel(namespace, type, subtypes, name, signature, ext, originalInput, - acceptingvalue, kind, provenance, madId) and - model = "MaD:" + madId.toString() and + acceptingvalue, kind, provenance, model) and baseBarrier = interpretElement(namespace, type, subtypes, name, signature, ext, _) and ( e = baseBarrier and input = originalInput diff --git a/shared/mad/codeql/mad/static/MaD.qll b/shared/mad/codeql/mad/static/MaD.qll index 76d4fa484a62..fbd12d909aba 100644 --- a/shared/mad/codeql/mad/static/MaD.qll +++ b/shared/mad/codeql/mad/static/MaD.qll @@ -49,6 +49,11 @@ signature module ExtensionsSig { predicate neutralModel( string namespace, string type, string name, string signature, string kind, string provenance ); + + /** + * Holds if the package `package` is part of the group `group`. + */ + predicate packageGrouping(string group, string package); } signature module InputSig { @@ -157,6 +162,27 @@ module ModelsAsData { ) } + /** Gets the prefix for a group of packages/namespaces. */ + private string groupPrefix() { result = "group:" } + + /** + * Gets a package/namespace represented by `namespaceOrGroup`. + * + * If `namespaceOrGroup` is of the form `group:` then `result` is a + * package/namespace in the group ``, as determined by `packageGrouping`. + * Otherwise, `result` is `namespaceOrGroup`. + */ + bindingset[namespaceOrGroup] + private string getNamespace(string namespaceOrGroup) { + not exists(string group | namespaceOrGroup = groupPrefix() + group) and + result = namespaceOrGroup + or + exists(string group | + Extensions::packageGrouping(group, result) and + namespaceOrGroup = groupPrefix() + group + ) + } + /** * Holds if a source model exists for the given parameters. */ @@ -164,14 +190,16 @@ module ModelsAsData { string namespace, string type, boolean subtypes, string name, string signature, string ext, string output, string kind, string provenance, string model ) { - exists(QlBuiltins::ExtensionId madId | - Extensions::sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, - provenance, madId) and - model = "MaD:" + madId.toString() + exists(string namespaceOrGroup | namespace = getNamespace(namespaceOrGroup) | + exists(QlBuiltins::ExtensionId madId | + Extensions::sourceModel(namespaceOrGroup, type, subtypes, name, signature, ext, output, + kind, provenance, madId) and + model = "MaD:" + madId.toString() + ) + or + Input::additionalSourceModel(namespaceOrGroup, type, subtypes, name, signature, ext, output, + kind, provenance, model) ) - or - Input::additionalSourceModel(namespace, type, subtypes, name, signature, ext, output, kind, - provenance, model) } /** @@ -181,14 +209,42 @@ module ModelsAsData { string namespace, string type, boolean subtypes, string name, string signature, string ext, string input, string kind, string provenance, string model ) { - exists(QlBuiltins::ExtensionId madId | - Extensions::sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, + exists(string namespaceOrGroup | namespace = getNamespace(namespaceOrGroup) | + exists(QlBuiltins::ExtensionId madId | + Extensions::sinkModel(namespaceOrGroup, type, subtypes, name, signature, ext, input, kind, + provenance, madId) and + model = "MaD:" + madId.toString() + ) + or + Input::additionalSinkModel(namespaceOrGroup, type, subtypes, name, signature, ext, input, + kind, provenance, model) + ) + } + + /** Holds if a barrier model exists for the given parameters. */ + predicate barrierModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance, string model + ) { + exists(string namespaceOrGroup, QlBuiltins::ExtensionId madId | + namespace = getNamespace(namespaceOrGroup) and + Extensions::barrierModel(namespaceOrGroup, type, subtypes, name, signature, ext, output, kind, provenance, madId) and model = "MaD:" + madId.toString() ) - or - Input::additionalSinkModel(namespace, type, subtypes, name, signature, ext, input, kind, - provenance, model) + } + + /** Holds if a barrier guard model exists for the given parameters. */ + predicate barrierGuardModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string acceptingvalue, string kind, string provenance, string model + ) { + exists(string namespaceOrGroup, QlBuiltins::ExtensionId madId | + namespace = getNamespace(namespaceOrGroup) and + Extensions::barrierGuardModel(namespaceOrGroup, type, subtypes, name, signature, ext, input, + acceptingvalue, kind, provenance, madId) and + model = "MaD:" + madId.toString() + ) } /** @@ -198,14 +254,27 @@ module ModelsAsData { string namespace, string type, boolean subtypes, string name, string signature, string ext, string input, string output, string kind, string provenance, string model ) { - exists(QlBuiltins::ExtensionId madId | - Extensions::summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, - provenance, madId) and - model = "MaD:" + madId.toString() + exists(string namespaceOrGroup | namespace = getNamespace(namespaceOrGroup) | + exists(QlBuiltins::ExtensionId madId | + Extensions::summaryModel(namespaceOrGroup, type, subtypes, name, signature, ext, input, + output, kind, provenance, madId) and + model = "MaD:" + madId.toString() + ) + or + Input::additionalSummaryModel(namespaceOrGroup, type, subtypes, name, signature, ext, input, + output, kind, provenance, model) + ) + } + + /** + * Holds if a neutral model exists for the given parameters. + */ + predicate neutralModel( + string namespace, string type, string name, string signature, string kind, string provenance + ) { + exists(string namespaceOrGroup | namespace = getNamespace(namespaceOrGroup) | + Extensions::neutralModel(namespaceOrGroup, type, name, signature, kind, provenance) ) - or - Input::additionalSummaryModel(namespace, type, subtypes, name, signature, ext, input, output, - kind, provenance, model) } private predicate relevantNamespace(string namespace) { From 4b2e8c0b57dda61f7e6c76f93ff1f12539968061 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 11 Dec 2025 11:44:50 +0100 Subject: [PATCH 09/10] C++/C#/Go: Add empty extensible data. --- cpp/ql/lib/ext/empty.model.yml | 8 ++++++++ csharp/ql/lib/ext/empty.model.yml | 10 ++++++++++ go/ql/lib/ext/empty.model.yml | 8 ++++++++ 3 files changed, 26 insertions(+) diff --git a/cpp/ql/lib/ext/empty.model.yml b/cpp/ql/lib/ext/empty.model.yml index 6f160b62d7a6..e5202b5ad73c 100644 --- a/cpp/ql/lib/ext/empty.model.yml +++ b/cpp/ql/lib/ext/empty.model.yml @@ -9,6 +9,14 @@ extensions: pack: codeql/cpp-all extensible: sinkModel data: [] + - addsTo: + pack: codeql/cpp-all + extensible: barrierModel + data: [] + - addsTo: + pack: codeql/cpp-all + extensible: barrierGuardModel + data: [] - addsTo: pack: codeql/cpp-all extensible: summaryModel diff --git a/csharp/ql/lib/ext/empty.model.yml b/csharp/ql/lib/ext/empty.model.yml index 6b38b783cbe2..09d848ea57d5 100644 --- a/csharp/ql/lib/ext/empty.model.yml +++ b/csharp/ql/lib/ext/empty.model.yml @@ -11,6 +11,16 @@ extensions: extensible: sinkModel data: [] + - addsTo: + pack: codeql/csharp-all + extensible: barrierModel + data: [] + + - addsTo: + pack: codeql/csharp-all + extensible: barrierGuardModel + data: [] + - addsTo: pack: codeql/csharp-all extensible: summaryModel diff --git a/go/ql/lib/ext/empty.model.yml b/go/ql/lib/ext/empty.model.yml index 8d661a9f1db6..1709a6098eb5 100644 --- a/go/ql/lib/ext/empty.model.yml +++ b/go/ql/lib/ext/empty.model.yml @@ -9,6 +9,14 @@ extensions: pack: codeql/go-all extensible: sinkModel data: [] + - addsTo: + pack: codeql/go-all + extensible: barrierModel + data: [] + - addsTo: + pack: codeql/go-all + extensible: barrierGuardModel + data: [] - addsTo: pack: codeql/go-all extensible: summaryModel From 10a0530737ce818c88c0bdce5e45e42949a5caa8 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 11 Dec 2025 14:23:21 +0100 Subject: [PATCH 10/10] Go: Add support for MaD barriers and barrier guards. --- go/ql/lib/semmle/go/dataflow/ExternalFlow.qll | 58 +++++++++++++++ .../go/dataflow/internal/DataFlowUtil.qll | 70 +++++++++++++++---- .../go/dataflow/internal/FlowSummaryImpl.qll | 19 +++-- 3 files changed, 128 insertions(+), 19 deletions(-) diff --git a/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll b/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll index 544d73fef26b..aa7fed7bcbde 100644 --- a/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll +++ b/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll @@ -320,6 +320,10 @@ private predicate elementSpec( or sinkModel(package, type, subtypes, name, signature, ext, _, _, _, _) or + barrierModel(package, type, subtypes, name, signature, ext, _, _, _, _) + or + barrierGuardModel(package, type, subtypes, name, signature, ext, _, _, _, _, _) + or summaryModel(package, type, subtypes, name, signature, ext, _, _, _, _, _) or neutralModel(package, type, name, signature, _, _) and ext = "" and subtypes = false @@ -455,6 +459,54 @@ private module Cached { isSinkNode(n, kind, model) and n.asNode() = node ) } + + private newtype TKindModelPair = + TMkPair(string kind, string model) { isBarrierGuardNode(_, _, kind, model) } + + private boolean convertAcceptingValue(Public::AcceptingValue av) { + av.isTrue() and result = true + or + av.isFalse() and result = false + // Remaining cases are not supported yet, they depend on the shared Guards library. + // or + // av.isNoException() and result.getDualValue().isThrowsException() + // or + // av.isZero() and result.asIntValue() = 0 + // or + // av.isNotZero() and result.getDualValue().asIntValue() = 0 + // or + // av.isNull() and result.isNullValue() + // or + // av.isNotNull() and result.isNonNullValue() + } + + private predicate barrierGuardChecks(DataFlow::Node g, Expr e, boolean gv, TKindModelPair kmp) { + exists( + SourceSinkInterpretationInput::InterpretNode n, Public::AcceptingValue acceptingvalue, + string kind, string model + | + isBarrierGuardNode(n, acceptingvalue, kind, model) and + n.asNode().asExpr() = e and + kmp = TMkPair(kind, model) and + gv = convertAcceptingValue(acceptingvalue) + | + g.asExpr().(CallExpr).getAnArgument() = e // TODO: qualifier? + ) + } + + /** + * Holds if `node` is specified as a barrier with the given kind in a MaD flow + * model. + */ + cached + predicate barrierNode(DataFlow::Node node, string kind, string model) { + exists(SourceSinkInterpretationInput::InterpretNode n | + isBarrierNode(n, kind, model) and n.asNode() = node + ) + or + DataFlow::ParameterizedBarrierGuard::getABarrierNode(TMkPair(kind, + model)) = node + } } import Cached @@ -471,6 +523,12 @@ predicate sourceNode(DataFlow::Node node, string kind) { sourceNode(node, kind, */ predicate sinkNode(DataFlow::Node node, string kind) { sinkNode(node, kind, _) } +/** + * Holds if `node` is specified as a barrier with the given kind in a MaD flow + * model. + */ +predicate barrierNode(DataFlow::Node node, string kind) { barrierNode(node, kind, _) } + // adapter class for converting Mad summaries to `SummarizedCallable`s private class SummarizedCallableAdapter extends Public::SummarizedCallable { SummarizedCallableAdapter() { summaryElement(this, _, _, _, _, _) } diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll index 14ff455646c9..404eca4b4a25 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll @@ -339,6 +339,20 @@ class ContentSet instanceof TContentSet { */ signature predicate guardChecksSig(Node g, Expr e, boolean branch); +bindingset[this] +private signature class ParamSig; + +private module WithParam { + /** + * Holds if the guard `g` validates the expression `e` upon evaluating to `branch`. + * + * The expression `e` is expected to be a syntactic part of the guard `g`. + * For example, the guard `g` might be a call `isSafe(x)` and the expression `e` + * the argument `x`. + */ + signature predicate guardChecksSig(Node g, Expr e, boolean branch, P param); +} + /** * Provides a set of barrier nodes for a guard that validates an expression. * @@ -346,12 +360,36 @@ signature predicate guardChecksSig(Node g, Expr e, boolean branch); * in data flow and taint tracking. */ module BarrierGuard { + private predicate guardChecks(Node g, Expr e, boolean branch, Unit param) { + guardChecks(g, e, branch) and exists(param) + } + + private module B = ParameterizedBarrierGuard; + + /** Gets a node that is safely guarded by the given guard check. */ + Node getABarrierNode() { result = B::getABarrierNode(_) } + + /** + * Gets a node that is safely guarded by the given guard check. + */ + Node getABarrierNodeForGuard(Node guardCheck) { + result = B::getABarrierNodeForGuard(guardCheck, _) + } +} + +/** + * Provides a set of barrier nodes for a guard that validates an expression. + * + * This is expected to be used in `isBarrier`/`isSanitizer` definitions + * in data flow and taint tracking. + */ +module ParameterizedBarrierGuard::guardChecksSig/4 guardChecks> { /** Gets a node that is safely guarded by the given guard check. */ - Node getABarrierNode() { + Node getABarrierNode(P param) { exists(ControlFlow::ConditionGuardNode guard, SsaWithFields var | result = pragma[only_bind_out](var).getAUse() | - guards(_, guard, _, var) and + guards(_, guard, _, var, param) and pragma[only_bind_out](guard).dominates(result.getBasicBlock()) ) } @@ -359,9 +397,9 @@ module BarrierGuard { /** * Gets a node that is safely guarded by the given guard check. */ - Node getABarrierNodeForGuard(Node guardCheck) { + Node getABarrierNodeForGuard(Node guardCheck, P param) { exists(ControlFlow::ConditionGuardNode guard, SsaWithFields var | result = var.getAUse() | - guards(guardCheck, guard, _, var) and + guards(guardCheck, guard, _, var, param) and guard.dominates(result.getBasicBlock()) ) } @@ -373,22 +411,24 @@ module BarrierGuard { * This predicate exists to enforce a good join order in `getAGuardedNode`. */ pragma[noinline] - private predicate guards(Node g, ControlFlow::ConditionGuardNode guard, Node nd, SsaWithFields ap) { - guards(g, guard, nd) and nd = ap.getAUse() + private predicate guards( + Node g, ControlFlow::ConditionGuardNode guard, Node nd, SsaWithFields ap, P param + ) { + guards(g, guard, nd, param) and nd = ap.getAUse() } /** * Holds if `guard` marks a point in the control-flow graph where `g` * is known to validate `nd`. */ - private predicate guards(Node g, ControlFlow::ConditionGuardNode guard, Node nd) { + private predicate guards(Node g, ControlFlow::ConditionGuardNode guard, Node nd, P param) { exists(boolean branch | - guardChecks(g, nd.asExpr(), branch) and + guardChecks(g, nd.asExpr(), branch, param) and guard.ensures(g, branch) ) or exists(DataFlow::Property p, Node resNode, Node check, boolean outcome | - guardingCall(g, _, _, _, p, _, nd, resNode) and + guardingCall(g, _, _, _, p, _, nd, resNode, param) and p.checkOn(check, outcome, resNode) and guard.ensures(pragma[only_bind_into](check), outcome) ) @@ -405,9 +445,9 @@ module BarrierGuard { pragma[noinline] private predicate guardingCall( Node g, Function f, FunctionInput inp, FunctionOutput outp, DataFlow::Property p, CallNode c, - Node nd, Node resNode + Node nd, Node resNode, P param ) { - guardingFunction(g, f, inp, outp, p) and + guardingFunction(g, f, inp, outp, p, param) and c = f.getACall() and nd = getInputNode(inp, c) and localFlow(getOutputNode(outp, c), resNode) @@ -438,7 +478,7 @@ module BarrierGuard { * `false`, `nil` or a non-`nil` value.) */ private predicate guardingFunction( - Node g, Function f, FunctionInput inp, FunctionOutput outp, DataFlow::Property p + Node g, Function f, FunctionInput inp, FunctionOutput outp, DataFlow::Property p, P param ) { exists(FuncDecl fd, Node arg, Node ret | fd.getFunction() = f and @@ -446,7 +486,7 @@ module BarrierGuard { ( // Case: a function like "if someBarrierGuard(arg) { return true } else { return false }" exists(ControlFlow::ConditionGuardNode guard | - guards(g, pragma[only_bind_out](guard), arg) and + guards(g, pragma[only_bind_out](guard), arg, param) and guard.dominates(pragma[only_bind_out](ret).getBasicBlock()) | onlyPossibleReturnSatisfyingProperty(fd, outp, ret, p) @@ -456,7 +496,7 @@ module BarrierGuard { // or "return !someBarrierGuard(arg) && otherCond(...)" exists(boolean outcome | ret = getUniqueOutputNode(fd, outp) and - guardChecks(g, arg.asExpr(), outcome) and + guardChecks(g, arg.asExpr(), outcome, param) and // This predicate's contract is (p holds of ret ==> arg is checked), // (and we have (this has outcome ==> arg is checked)) // but p.checkOn(ret, outcome, this) gives us (ret has outcome ==> p holds of this), @@ -471,7 +511,7 @@ module BarrierGuard { DataFlow::Property outpProp | ret = getUniqueOutputNode(fd, outp) and - guardingFunction(g, f2, inp2, outp2, outpProp) and + guardingFunction(g, f2, inp2, outp2, outpProp, param) and c = f2.getACall() and arg = inp2.getNode(c) and ( diff --git a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll index 39af0cef1ad0..c40f589c2f03 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll @@ -160,16 +160,27 @@ module SourceSinkInterpretationInput implements } predicate barrierElement( - Element n, string output, string kind, Public::Provenance provenance, string model + Element e, string output, string kind, Public::Provenance provenance, string model ) { - none() + exists( + string package, string type, boolean subtypes, string name, string signature, string ext + | + barrierModel(package, type, subtypes, name, signature, ext, output, kind, provenance, model) and + e = interpretElement(package, type, subtypes, name, signature, ext) + ) } predicate barrierGuardElement( - Element n, string input, Public::AcceptingValue acceptingvalue, string kind, + Element e, string input, Public::AcceptingValue acceptingvalue, string kind, Public::Provenance provenance, string model ) { - none() + exists( + string package, string type, boolean subtypes, string name, string signature, string ext + | + barrierGuardModel(package, type, subtypes, name, signature, ext, input, acceptingvalue, kind, + provenance, model) and + e = interpretElement(package, type, subtypes, name, signature, ext) + ) } // Note that due to embedding, which is currently implemented via some