Minimum confidence filter (#1457)

* Minimum confidence filter Signed-off-by: Prabhu Subramanian <[email protected]> * technique filter Signed-off-by: Prabhu Subramanian <[email protected]> --------- Signed-off-by: Prabhu Subramanian <[email protected]>
CycloneDX · Nov 15, 2024 · ce64722 · ce64722
1 parent 8c4c8ec
commit ce64722
Show file tree

Hide file tree

Showing 7 changed files with 139 additions and 1 deletion.
diff --git a/.github/workflows/repotests.yml b/.github/workflows/repotests.yml
@@ -415,6 +415,8 @@ jobs:
       - name: repotests openpbs
         run: |
           bin/cdxgen.js -p -r -t c repotests/openpbs -o bomresults/bom-openpbs.json
+          bin/cdxgen.js -p -r -t c repotests/openpbs -o bomresults/bom-openpbs.json --min-confidence 0.4
+          bin/cdxgen.js -p -r -t c repotests/openpbs -o bomresults/bom-openpbs.json --technique manifest-analysis
         shell: bash
       - name: repotests Jackalope
         run: |

diff --git a/README.md b/README.md
@@ -158,6 +158,11 @@ Options:
                                luated against or attested to.
   [array] [choices: "asvs-4.0.3", "bsimm-v13", "masvs-2.0.0", "nist_ssdf-1.1", "pcissc-secure-slc-1.1", "scvs-1.0.0", "s
                                                                                                      saf-DRAFT-2023-11"]
+      --min-confidence         Minimum confidence needed for the identity of a component from 0 - 1, where 1 is 100% con
+                               fidence.                                                            [number] [default: 0]
+      --technique              Analysis technique to use
+  [array] [choices: "auto", "source-code-analysis", "binary-analysis", "manifest-analysis", "hash-comparison", "instrume
+                                                                                                   ntation", "filename"]
       --auto-compositions      Automatically set compositions when the BOM was filtered. Defaults to true
                                                                                                [boolean] [default: true]
   -h, --help                   Show help                                                                       [boolean]

diff --git a/bin/cdxgen.js b/bin/cdxgen.js
@@ -297,6 +297,24 @@ const args = yargs(hideBin(process.argv))
     hidden: true,
     choices: ["safe-pip-install", "suggest-build-tools"],
   })
+  .option("min-confidence", {
+    description:
+      "Minimum confidence needed for the identity of a component from 0 - 1, where 1 is 100% confidence.",
+    default: 0,
+    type: "number",
+  })
+  .option("technique", {
+    description: "Analysis technique to use",
+    choices: [
+      "auto",
+      "source-code-analysis",
+      "binary-analysis",
+      "manifest-analysis",
+      "hash-comparison",
+      "instrumentation",
+      "filename",
+    ],
+  })
   .completion("completion", "Generate bash/zsh completion")
   .array("type")
   .array("excludeType")
@@ -306,6 +324,7 @@ const args = yargs(hideBin(process.argv))
   .array("exclude")
   .array("standard")
   .array("feature-flags")
+  .array("technique")
   .option("auto-compositions", {
     type: "boolean",
     default: true,

diff --git a/docs/ADVANCED.md b/docs/ADVANCED.md
@@ -93,6 +93,44 @@ Use `--only` to include only those components containing the string in the purl.
 cdxgen -t java -o /tmp/bom.json -p --only org.springframework
 ```
 
+### Minimum confidence filter
+
+Use `--min-confidence` with a value between 0 and 1 to filter components based on the confidence of their purl [identify](https://cyclonedx.org/docs/1.6/json/#components_items_evidence_identity_oneOf_i0_items_field). The logic involves looking for `field=purl` in `evidence.identity` and collecting the maximum `confidence` value. This is then compared against the minimum confidence passed as an argument.
+
+```shell
+cdxgen -t c . --min-confidence 0.1
+```
+
+The above would filter out all the zero confidence components in c/c++, so use it with caution.
+
+### Analysis technique filter
+
+Use `--technique` to list the techniques that cdxgen is allowed to use for the xBOM generation. Leaving this argument or using the value `auto` enables default behaviour.
+
+Example 1 - only allow manifest-analysis:
+
+```shell
+cdxgen -t c . --technique manifest-analysis
+```
+
+Example 2 - allow manifest-analysis and source-code-analysis:
+
+```shell
+cdxgen -t c . --technique manifest-analysis --technique source-code-analysis
+```
+
+List of supported techniques:
+
+- auto (default)
+- source-code-analysis
+- binary-analysis
+- manifest-analysis
+- hash-comparison
+- instrumentation
+- filename
+
+Currently, this capability is implemented as a filter during post-processing, so unlikely to yield any performance benefits.
+
 ## Automatic compositions
 
 When using any filters, cdxgen would automatically set the [compositions.aggregate](https://cyclonedx.org/docs/1.5/json/#compositions_items_aggregate) property to "incomplete" or "incomplete_first_party_only".

diff --git a/docs/CLI.md b/docs/CLI.md
@@ -119,6 +119,11 @@ Options:
                                luated against or attested to.
   [array] [choices: "asvs-4.0.3", "bsimm-v13", "masvs-2.0.0", "nist_ssdf-1.1", "pcissc-secure-slc-1.1", "scvs-1.0.0", "s
                                                                                                      saf-DRAFT-2023-11"]
+      --min-confidence         Minimum confidence needed for the identity of a component from 0 - 1, where 1 is 100% con
+                               fidence.                                                            [number] [default: 0]
+      --technique              Analysis technique to use
+  [array] [choices: "auto", "source-code-analysis", "binary-analysis", "manifest-analysis", "hash-comparison", "instrume
+                                                                                                   ntation", "filename"]
       --auto-compositions      Automatically set compositions when the BOM was filtered. Defaults to true
                                                                                                [boolean] [default: true]
   -h, --help                   Show help                                                                       [boolean]

diff --git a/lib/stages/postgen/postgen.js b/lib/stages/postgen/postgen.js
@@ -127,6 +127,50 @@ export function applyStandards(bomJson, options) {
   return bomJson;
 }
 
+/**
+ * Method to get the purl identity confidence.
+ *
+ * @param comp Component
+ * @returns {undefined|number} Max of all the available purl identity confidence or undefined
+ */
+function getIdentityConfidence(comp) {
+  if (!comp.evidence) {
+    return undefined;
+  }
+  let confidence;
+  for (const aidentity of comp?.evidence?.identity || []) {
+    if (aidentity?.field === "purl") {
+      if (confidence === undefined) {
+        confidence = aidentity.confidence || 0;
+      } else {
+        confidence = Math.max(aidentity.confidence, confidence);
+      }
+    }
+  }
+  return confidence;
+}
+
+/**
+ * Method to get the list of techniques used for identity.
+ *
+ * @param comp Component
+ * @returns {Set|undefined} Set of technique. evidence.identity.methods.technique
+ */
+function getIdentityTechniques(comp) {
+  if (!comp.evidence) {
+    return undefined;
+  }
+  const techniques = new Set();
+  for (const aidentity of comp?.evidence?.identity || []) {
+    if (aidentity?.field === "purl") {
+      for (const amethod of aidentity.methods || []) {
+        techniques.add(amethod?.technique);
+      }
+    }
+  }
+  return techniques;
+}
+
 /**
  * Filter BOM based on options
  *
@@ -143,6 +187,31 @@ export function filterBom(bomJson, options) {
     return bomJson;
   }
   for (const comp of bomJson.components) {
+    // minimum confidence filter
+    if (options?.minConfidence > 0) {
+      const confidence = Math.min(options.minConfidence, 1);
+      const identityConfidence = getIdentityConfidence(comp);
+      if (identityConfidence !== undefined && identityConfidence < confidence) {
+        filtered = true;
+        continue;
+      }
+    }
+    // identity technique filter
+    if (options?.technique?.length && !options.technique.includes("auto")) {
+      const allowedTechniques = new Set(
+        Array.isArray(options.technique)
+          ? options.technique
+          : [options.technique],
+      );
+      const usedTechniques = getIdentityTechniques(comp);
+      if (
+        usedTechniques &&
+        !usedTechniques.intersection(allowedTechniques).size
+      ) {
+        filtered = true;
+        continue;
+      }
+    }
     if (
       options.requiredOnly &&
       comp.scope &&

diff --git a/types/lib/stages/postgen/postgen.d.ts.map b/types/lib/stages/postgen/postgen.d.ts.map