From a45feacb87e8ee272dfac4543baf6be66a5c9844 Mon Sep 17 00:00:00 2001 From: prabhu Date: Wed, 13 Nov 2024 14:50:02 +0000 Subject: [PATCH] Annotation improvements - part 2 (#1451) * Adds ml-tiny profile to reduce the size further. Improved stemming. Signed-off-by: Prabhu Subramanian * obom tagging Signed-off-by: Prabhu Subramanian --------- Signed-off-by: Prabhu Subramanian --- README.md | 4 +- bin/cdxgen.js | 20 +- bin/repl.js | 2 +- ci/Dockerfile | 2 +- ci/base-images/cdxgen/Dockerfile.python | 2 +- ci/base-images/sle/Dockerfile.lang | 2 +- data/component-tags.json | 455 +++++++++++++------- docs/CLI.md | 19 +- lib/evinser/evinser.js | 17 +- lib/helpers/display.js | 7 +- lib/stages/postgen/annotator.js | 147 +++++-- lib/stages/postgen/annotator.test.js | 9 +- lib/stages/postgen/postgen.js | 33 +- types/lib/evinser/evinser.d.ts.map | 2 +- types/lib/helpers/display.d.ts.map | 2 +- types/lib/stages/postgen/annotator.d.ts | 13 +- types/lib/stages/postgen/annotator.d.ts.map | 2 +- types/lib/stages/postgen/postgen.d.ts.map | 2 +- 18 files changed, 513 insertions(+), 227 deletions(-) diff --git a/README.md b/README.md index 38b1ea709..30559a578 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ docker run --rm -e CDXGEN_DEBUG_MODE=debug -v /tmp:/tmp -v $(pwd):/app:rw -t ghc In deno applications, cdxgen could be directly imported without any conversion. Please see the section on [integration as a library](#integration-as-library) ```ts -import { createBom, submitBom } from "npm:@cyclonedx/cdxgen@^10.9.6"; +import { createBom, submitBom } from "npm:@cyclonedx/cdxgen@^11.0.0"; ``` ## Getting Help @@ -148,7 +148,7 @@ Options: and claim authorship. [array] [default: "OWASP Foundation"] --profile BOM profile to use for generation. Default generic. [choices: "appsec", "research", "operational", "threat-modeling", "license-compliance", "generic", "machine-learning", - "ml", "deep-learning", "ml-deep"] [default: "generic"] + "ml", "deep-learning", "ml-deep", "ml-tiny"] [default: "generic"] --exclude Additional glob pattern(s) to ignore [array] --include-formulation Generate formulation section with git metadata and build tools. Defaults to false. [boolean] [default: false] diff --git a/bin/cdxgen.js b/bin/cdxgen.js index 33d8f821c..f2ed93f53 100755 --- a/bin/cdxgen.js +++ b/bin/cdxgen.js @@ -239,6 +239,7 @@ const args = yargs(hideBin(process.argv)) "ml", "deep-learning", "ml-deep", + "ml-tiny", ], }) .option("lifecycle", { @@ -317,6 +318,14 @@ const args = yargs(hideBin(process.argv)) "$0 -t java -t js .", "Generate a SBOM for Java and JavaScript in the current directory", ], + [ + "$0 -t java --profile ml .", + "Generate a Java SBOM for machine learning purposes.", + ], + [ + "$0 -t python --profile research .", + "Generate a Python SBOM for appsec research.", + ], ["$0 --server", "Run cdxgen as a server"], ]) .epilogue("for documentation, visit https://cyclonedx.github.io/cdxgen") @@ -418,6 +427,13 @@ const applyAdvancedOptions = (options) => { case "license-compliance": process.env.FETCH_LICENSE = "true"; break; + case "ml-tiny": + process.env.FETCH_LICENSE = "true"; + options.deep = false; + options.evidence = false; + options.includeCrypto = false; + options.installDeps = false; + break; case "machine-learning": case "ml": process.env.FETCH_LICENSE = "true"; @@ -705,8 +721,10 @@ const checkPermissions = (filePath) => { usagesSlicesFile: options.usagesSlicesFile, dataFlowSlicesFile: options.dataFlowSlicesFile, reachablesSlicesFile: options.reachablesSlicesFile, + semanticsSlicesFile: options.semanticsSlicesFile, includeCrypto: options.includeCrypto, specVersion: options.specVersion, + profile: options.profile, }; const dbObjMap = await evinserModule.prepareDB(evinseOptions); if (dbObjMap) { @@ -719,8 +737,6 @@ const checkPermissions = (filePath) => { evinseOptions, ); bomNSData.bomJson = evinseJson; - // Redo post processing with evinse data - bomNSData = postProcess(bomNSData, options); if (options.print && evinseJson) { printOccurrences(evinseJson); printCallStack(evinseJson); diff --git a/bin/repl.js b/bin/repl.js index 618912889..550c4bd44 100755 --- a/bin/repl.js +++ b/bin/repl.js @@ -161,7 +161,7 @@ cdxgenRepl.defineCommand("search", { let dependenciesSearchStr = searchStr; if (!searchStr.includes("~>")) { dependenciesSearchStr = `dependencies[ref ~> /${searchStr}/i or dependsOn ~> /${searchStr}/i or provides ~> /${searchStr}/i]`; - searchStr = `components[group ~> /${searchStr}/i or name ~> /${searchStr}/i or description ~> /${searchStr}/i or publisher ~> /${searchStr}/i or purl ~> /${searchStr}/i]`; + searchStr = `components[group ~> /${searchStr}/i or name ~> /${searchStr}/i or description ~> /${searchStr}/i or publisher ~> /${searchStr}/i or purl ~> /${searchStr}/i or tags ~> /${searchStr}/i]`; } const expression = jsonata(searchStr); let components = await expression.evaluate(sbom); diff --git a/ci/Dockerfile b/ci/Dockerfile index d646709cd..2e7b6658d 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -21,7 +21,7 @@ ARG SBT_VERSION=1.10.2 ARG MAVEN_VERSION=3.9.9 ARG GRADLE_VERSION=8.10 ARG GO_VERSION=1.23.1 -ARG NODE_VERSION=23.1.0 +ARG NODE_VERSION=23.2.0 ARG PYTHON_VERSION=3.12 ENV GOPATH=/opt/app-root/go \ diff --git a/ci/base-images/cdxgen/Dockerfile.python b/ci/base-images/cdxgen/Dockerfile.python index cc1dcee69..c86ca9bc8 100644 --- a/ci/base-images/cdxgen/Dockerfile.python +++ b/ci/base-images/cdxgen/Dockerfile.python @@ -11,7 +11,7 @@ LABEL maintainer="CycloneDX" \ org.opencontainers.image.description="Rolling image with cdxgen SBOM generator for Python 3.12 apps" \ org.opencontainers.docker.cmd="docker run --rm -v /tmp:/tmp -p 9090:9090 -v $(pwd):/app:rw -t ghcr.io/cyclonedx/cdxgen-python:v10 -r /app --server" -ARG NODE_VERSION=23.1.0 +ARG NODE_VERSION=23.2.0 ENV NVM_DIR="/root/.nvm" \ PYTHON_CMD=python3 \ diff --git a/ci/base-images/sle/Dockerfile.lang b/ci/base-images/sle/Dockerfile.lang index 9df2442ef..1c216c90c 100644 --- a/ci/base-images/sle/Dockerfile.lang +++ b/ci/base-images/sle/Dockerfile.lang @@ -3,7 +3,7 @@ FROM registry.suse.com/bci/python:3.12 ARG JAVA_VERSION=23-tem ARG MAVEN_VERSION=3.9.9 ARG GCC_VERSION=13 -ARG NODE_VERSION=23.1.0 +ARG NODE_VERSION=23.2.0 ENV JAVA_VERSION=$JAVA_VERSION \ MAVEN_VERSION=$MAVEN_VERSION \ diff --git a/data/component-tags.json b/data/component-tags.json index b231cfafa..5201ef580 100644 --- a/data/component-tags.json +++ b/data/component-tags.json @@ -1,160 +1,299 @@ { - "description": [ - "sql", - "http", - "xml", - "web", - "security", - "database", - "json", - "yaml", - "validat", - "sanitizat", - "cloud", - "iam", - "auth", - "middleware", - "serializat", - "event", - "stream", - "rpc", - "socket", - "proto", - "resource", - "data", - "sensitive", - "template", - "log", - "logging", - "service", - "api", - "slf4j", - "parse", - "emit", - "jdbc", - "connect", - "pool", - "beans", - "transact", - "mysql", - "postgres", - "oracle", - "mongo", - "redis", - "splunk", - "stripe", - "payment", - "finance", - "currency", - "coin", - "monero", - "ssl", - "traffic", - "mvc", - "html", - "escape", - "unescape", - "rest", - "tomcat", - "jackson", - "hibernate", - "orm", - "aop", - "jwt", - "saml", - "token", - "tls", - "codec", - "cron", - "crypto", - "jce", - "certificate", - "developer", - "tools", - "autoconfigure", - "test", - "jsonpath", - "bytecode", - "mock", - "inject", - "comparators", - "transform", - "encode", - "decode", - "ldap", - "owasp", - "fileupload", - "beanshell", - "spel", - "mail", - "apacheds", - "jndi", - "ldif", - "jdbm", - "kerberos", - "oidc", - "oauth2", - "cli", - "binary", - "ml", - "ai", - "azure", - "gcp", - "terraform", - "redis", - "valkey", - "lint", - "bundle", - "object-persistence", - "text-to-image", - "translat", - "object-detect", - "mvc", - "framework", - "graph", - "templates", - "fastjson", - "simd", - "event-driven", - "productivity", - "annotations", - "typesafe", - "projections", - "performance", - "plugins", - "non-block", - "microsoft" - ], - "properties": [ - "sql", - "http", - "xml", - "security", - "cloud", - "middleware", - "framework", - "bluetooth", - "wifi", - "wireless", - "driver", - "graphics", - "firmware", - "gyroscope", - "accelerometer", - "mobile", - "network", - "battery", - "matrix", - "thunderbolt", - "crypto", - "algorithm", - "encrypt", - "decrypt", - "registry", - "maps", - "payment", - "stripe", - "apple-pay", - "icloud" - ] + "description": { + "all": [ + "sql", + "xml", + "web", + "security", + "database", + "json", + "yaml", + "validation", + "sanitization", + "cloud", + "iam", + "auth", + "middleware", + "serialization", + "event", + "stream", + "rpc", + "socket", + "proto", + "resource", + "data", + "sensitive", + "template", + "log", + "logging", + "service", + "api", + "slf4j", + "parse", + "emit", + "jdbc", + "connect", + "pool", + "beans", + "transaction", + "mysql", + "postgres", + "oracle", + "mongo", + "redis", + "splunk", + "stripe", + "payment", + "finance", + "currency", + "coin", + "monero", + "ssl", + "traffic", + "mvc", + "html", + "escape", + "unescape", + "rest", + "tomcat", + "hibernate", + "orm", + "aop", + "jwt", + "saml", + "token", + "tls", + "codec", + "cron", + "crypto", + "jce", + "certificate", + "developer", + "tools", + "autoconfigure", + "test", + "jsonpath", + "bytecode", + "mock", + "injection", + "comparators", + "transform", + "encode", + "decode", + "ldap", + "owasp", + "fileupload", + "beanshell", + "spel", + "mail", + "apacheds", + "jndi", + "ldif", + "jdbm", + "kerberos", + "oidc", + "oauth2", + "cli", + "binary", + "ml", + "ai", + "azure", + "gcp", + "terraform", + "redis", + "valkey", + "lint", + "bundle", + "object-persistence", + "text-to-image", + "translat", + "object-detect", + "mvc", + "framework", + "graph", + "templates", + "fastjson", + "simd", + "event-driven", + "productivity", + "typesafe", + "projections", + "performance", + "plugins", + "non-block", + "microsoft" + ] + }, + "properties": { + "all": [ + "sql", + "http", + "xml", + "cloud", + "middleware", + "framework", + "bluetooth", + "wifi", + "wireless", + "driver", + "graphics", + "firmware", + "gyroscope", + "accelerometer", + "mobile", + "network", + "battery", + "matrix", + "thunderbolt", + "crypto", + "algorithm", + "encrypt", + "decrypt", + "registry", + "maps", + "payment", + "stripe", + "apple-pay", + "icloud" + ], + "obom": [ + "windows_drivers", + "windows_patches", + "windows_programs", + "processor", + "services_snapshot", + "apt_sources", + "behavioral_reverse_shell", + "certificates", + "chrome_extensions", + "crontab_snapshot", + "deb_packages", + "docker_container_ports", + "docker_containers", + "docker_networks", + "docker_volumes", + "etc_hosts", + "firefox_addons", + "vscode_extensions", + "homebrew_packages", + "installed_applications", + "interface_addresses", + "kernel_info", + "kernel_integrity", + "kernel_modules", + "ld_preload", + "listening_ports", + "os_version", + "pipes", + "pipes_snapshot", + "portage_packages", + "process_events", + "processes", + "python_packages", + "rpm_packages", + "scheduled_tasks", + "services_snapshot", + "startup_items", + "system_info_snapshot", + "windows_drivers", + "windows_patches", + "windows_programs", + "windows_shared_resources", + "yum_sources", + "appcompat_shims", + "browser_plugins", + "certificates", + "chocolatey_packages", + "chrome_extensions", + "etc_hosts", + "firefox_addons", + "ie_extensions", + "kernel_info", + "npm_packages", + "opera_extensions", + "pipes_snapshot", + "process_open_sockets", + "safari_extensions", + "scheduled_tasks", + "services_snapshot", + "startup_items", + "routes", + "system_info_snapshot", + "win_version", + "windows_firewall_rules", + "windows_optional_features", + "windows_programs", + "windows_shared_resources", + "windows_update_history", + "wmi_cli_event_consumers", + "wmi_cli_event_consumers_snapshot", + "wmi_event_filters", + "wmi_filter_consumer_binding" + ] + }, + "name": { + "obom": [ + { + "devel": [ + "-(dev|devel|headers|sdk|libs|extension|headers+x86|headers+x64|headers+arm64)$", + "^(git)-", + "^(sdk|windows+sdk)" + ] + }, + { + "bin": [ + "(-bin|redistributable|clickonce|bootstrappermsi|bootstrappermsires|clickoncesigntoolmsi|codecoveragemsi|msires|sharedmsi|x64msi|arm64msi|sharedmsi|x64vmsi|filehandler_amd64|filehandler_x86|protocolhandlermsi|interopmsi|interopx64msi|shellmsires|shellx64msi)$" + ] + }, + { "kernel": ["^(linux|kernel|os-image)"] }, + { + "security": [ + "(selinux|apparmor|security|openssl|libressl|gnutls|jose|keyutils|passwd)" + ] + }, + { + "container": [ + "(container|podman|docker|runc|nerdctl|crun|libvirt|qemu)" + ] + }, + { + "build": ["(cpp|fortran|gcc|make|meson|bazel|maven|gradle|sbt|ant|gdb)"] + }, + { + "network": [ + "(tailscale|wireguard|openvpn|dns|cockpit|cups|dhcp|network|iproute|iptables|mosh|netavark|openssh|rsync|tcpdump)" + ] + }, + { "webserver": ["(httpd|http2)"] }, + { "crypto": ["(crypt|gpg|keys|certificates|gnupg|certifi)"] }, + { "repository": ["(-repos|-release|ostree|appstream)"] }, + { "shell": ["(bash|zsh|csh|fish)"] }, + { "bluetooth": ["(bluez|bluetooth)"] }, + { "sound": ["(alsa|pulseaudio|wireplumber|flac|codecs|ldac|sound)"] }, + { "compression": ["(brotli|xz-utils|zstd|lz4)", "(tar|zip|webp)$"] }, + { + "runtime": [ + "(perl|lua|php|python|ruby|dotnet|java|swift|runtime|glibc|musl|wasm|.net|asp.net|node.js|node)" + ] + }, + { "editor": ["(vim|emacs|nano|hexedit)"] }, + { "xml": ["(xml|expat)"] }, + { "boot": ["(grub|systemd-boot|syslinux)"] }, + { "gui": ["(wayland|xorg|X11|mesa|vulkan|tk|wkhtmltox|electron)"] }, + { + "package": [ + "(rpm|dnf|yum|apt|zypper|apk|conda)$", + "^(conda_package_|conda-package-)" + ] + }, + { + "browser": [ + "^(edge)", + "(firefox|chrome|opera|brave|mullvad|tor|chromium)", + "(microsoft+edge|microsoft+edge+webview2|microsoft+html)" + ] + }, + { "chat": ["(webex|teams|slack|discord|vesktop|matrix|signal|whatsapp)"] } + ] + } } diff --git a/docs/CLI.md b/docs/CLI.md index 0a89f61e8..c181887ba 100644 --- a/docs/CLI.md +++ b/docs/CLI.md @@ -78,6 +78,7 @@ Options: --deep Perform deep searches for components. Useful while scanning C/C++ apps, live OS and oci i mages. [boolean] --server-url Dependency track url. Eg: https://deptrack.cyclonedx.io + --skip-dt-tls-check Skip TLS certificate check when calling Dependency-Track. [boolean] [default: false] --api-key Dependency track api key --project-group Dependency track project group --project-name Dependency track project name. Default use the directory name @@ -99,7 +100,7 @@ Options: --validate Validate the generated SBOM using json schema. Defaults to true. Pass --no-validate to di sable. [boolean] [default: true] --evidence Generate SBOM with evidence for supported languages. [boolean] [default: false] - --spec-version CycloneDX Specification version to use. Defaults to 1.5 [number] [default: 1.5] + --spec-version CycloneDX Specification version to use. Defaults to 1.6 [number] [default: 1.6] --filter Filter components containing this word in purl or component.properties.value. Multiple va lues allowed. [array] --only Include components only containing this word in purl. Useful to generate BOM with first p @@ -107,11 +108,11 @@ Options: --author The person(s) who created the BOM. Set this value if you're intending the modify the BOM and claim authorship. [array] [default: "OWASP Foundation"] --profile BOM profile to use for generation. Default generic. - [choices: "appsec", "research", "operational", "threat-modeling", "license-compliance", "generic"] [default: "generic" - ] + [choices: "appsec", "research", "operational", "threat-modeling", "license-compliance", "generic", "machine-learning", + "ml", "deep-learning", "ml-deep", "ml-tiny"] [default: "generic"] --exclude Additional glob pattern(s) to ignore [array] - --include-formulation Generate formulation section with git metadata and build tools. Defaults to true. Invoke - with --no-include-formulation to disable. [boolean] [default: true] + --include-formulation Generate formulation section with git metadata and build tools. Defaults to false. + [boolean] [default: false] --include-crypto Include crypto libraries as components. [boolean] [default: false] --standard The list of standards which may consist of regulations, industry or organizational-specif ic standards, maturity models, best practices, or any other requirements which can be eva @@ -124,9 +125,11 @@ Options: -v, --version Show version number [boolean] Examples: - cdxgen -t java . Generate a Java SBOM for the current directory - cdxgen -t java -t js . Generate a SBOM for Java and JavaScript in the current directory - cdxgen --server Run cdxgen as a server + cdxgen -t java . Generate a Java SBOM for the current directory + cdxgen -t java -t js . Generate a SBOM for Java and JavaScript in the current directory + cdxgen -t java --profile ml . Generate a Java SBOM for machine learning purposes. + cdxgen -t python --profile research . Generate a Python SBOM for appsec research. + cdxgen --server Run cdxgen as a server for documentation, visit https://cyclonedx.github.io/cdxgen ``` diff --git a/lib/evinser/evinser.js b/lib/evinser/evinser.js index d24e93ea2..1b153940d 100644 --- a/lib/evinser/evinser.js +++ b/lib/evinser/evinser.js @@ -17,6 +17,7 @@ import { getMavenCommand, getTimestamp, } from "../helpers/utils.js"; +import { postProcess } from "../stages/postgen/postgen.js"; import { createSemanticsSlices } from "./swiftsem.js"; const DB_NAME = "evinser.db"; @@ -1179,8 +1180,8 @@ export const createEvinseFile = (sliceArtefacts, options) => { const evinseOutFile = options.output; const bomJson = JSON.parse(fs.readFileSync(bomFile, "utf8")); const components = bomJson.components || []; - // Clear the existing annotations - bomJson.annotations = undefined; + // Clear existing annotations + bomJson.annotations = []; let occEvidencePresent = false; let csEvidencePresent = false; let servicesPresent = false; @@ -1263,9 +1264,6 @@ export const createEvinseFile = (sliceArtefacts, options) => { bomJson.dependencies = newDependencies; } if (options.annotate) { - if (!bomJson.annotations) { - bomJson.annotations = []; - } if (usagesSlicesFile && fs.existsSync(usagesSlicesFile)) { bomJson.annotations.push({ subjects: [bomJson.serialNumber], @@ -1296,7 +1294,11 @@ export const createEvinseFile = (sliceArtefacts, options) => { // Set the current timestamp to indicate this is newer bomJson.metadata.timestamp = getTimestamp(); delete bomJson.signature; - fs.writeFileSync(evinseOutFile, JSON.stringify(bomJson, null, null)); + const bomNSData = postProcess({ bomJson }, options); + fs.writeFileSync( + evinseOutFile, + JSON.stringify(bomNSData.bomJson, null, null), + ); if (occEvidencePresent || csEvidencePresent || servicesPresent) { console.log(evinseOutFile, "created successfully."); } else { @@ -1307,7 +1309,8 @@ export const createEvinseFile = (sliceArtefacts, options) => { if (tempDir?.startsWith(tmpdir())) { fs.rmSync(tempDir, { recursive: true, force: true }); } - return bomJson; + // Redo post processing with evinse data + return bomNSData?.bomJson; }; /** diff --git a/lib/helpers/display.js b/lib/helpers/display.js index e4aca253f..86f054c46 100644 --- a/lib/helpers/display.js +++ b/lib/helpers/display.js @@ -102,16 +102,17 @@ export function printOSTable(bomJson) { columnDefault: { width: 50, }, - columnCount: 3, - columns: [{ width: 20 }, { width: 40 }, { width: 50 }], + columnCount: 4, + columns: [{ width: 20 }, { width: 40 }, { width: 50 }, { width: 25 }], }; const stream = createStream(config); - stream.write(["Type", "Title", "Properties"]); + stream.write(["Type", "Title", "Properties", "Tags"]); for (const comp of bomJson.components) { stream.write([ comp.type, `\x1b[1;35m${comp.name.replace(/\+/g, " ").replace(/--/g, "::")}\x1b[0m`, formatProps(comp.properties || []), + (comp.tags || []).join(", "), ]); } console.log(); diff --git a/lib/stages/postgen/annotator.js b/lib/stages/postgen/annotator.js index 68c6603fd..66e414eb5 100644 --- a/lib/stages/postgen/annotator.js +++ b/lib/stages/postgen/annotator.js @@ -1,8 +1,8 @@ import { readFileSync } from "node:fs"; -import os from "node:os"; import { join } from "node:path"; import { dirNameStr } from "../../helpers/utils.js"; +// Tags per BOM type. const componentTags = JSON.parse( readFileSync(join(dirNameStr, "data", "component-tags.json"), "utf-8"), ); @@ -41,7 +41,44 @@ function cleanTypes(s) { } /** - * Create the textual representation of the metadata section + * Method to determine the type of the BOM. + * + * @param {Object} bomJson BOM JSON Object + * + * @returns {String} Type of the bom such as sbom, cbom, obom, ml-bom etc + */ +export function findBomType(bomJson) { + let description = "Software Bill-of-Materials (SBOM)"; + let bomType = "SBOM"; + const metadata = bomJson.metadata; + const lifecycles = metadata?.lifecycles || []; + const cryptoAssetsCount = bomJson?.components?.filter( + (c) => c.type === "cryptographic-asset", + ).length; + const dataCount = bomJson?.components?.filter( + (c) => + c?.data?.length > 0 || + (c.modelCard && Object.keys(c?.modelCard).length > 0), + ).length; + // Is this an OBOM? + if (lifecycles.filter((l) => l.phase === "operations").length > 0) { + bomType = "OBOM"; + description = "Operations Bill-of-Materials (OBOM)"; + } else if (cryptoAssetsCount > 0) { + bomType = "CBOM"; + description = "Cryptography Bill-of-Materials (CBOM)"; + } else if (dataCount > 0) { + bomType = "ML-BOM"; + description = "Machine-Learning Bill-of-Materials (ML-BOM)"; + } + return { + bomType, + bomTypeDescription: description, + }; +} + +/** + * Create the textual representation of the metadata section. * * @param {Object} bomJson BOM JSON Object * @@ -52,20 +89,20 @@ export function textualMetadata(bomJson) { return undefined; } let text = ""; - let cdxTypeDesc = "Software Bill-of-Materials (SBOM)"; + const { bomType, bomTypeDescription } = findBomType(bomJson); const metadata = bomJson.metadata; const lifecycles = metadata?.lifecycles || []; - const cryptoAssetsCount = bomJson?.components.filter( + const cryptoAssetsCount = bomJson?.components?.filter( (c) => c.type === "cryptographic-asset", ).length; - // Is this an OBOM? - if (lifecycles.filter((l) => l.phase === "operations").length > 0) { - cdxTypeDesc = "Operations Bill-of-Materials (OBOM)"; - } else if (cryptoAssetsCount > 0) { - cdxTypeDesc = "Cryptography Bill-of-Materials (CBOM)"; - } + const vsixCount = bomJson?.components?.filter((c) => + c?.purl?.startsWith("pkg:vsix"), + ).length; + const swidCount = bomJson?.components?.filter((c) => + c?.purl?.startsWith("pkg:swid"), + ).length; if (metadata?.timestamp) { - text = `This ${cdxTypeDesc} document was created on ${humanifyTimestamp(metadata.timestamp)}`; + text = `This ${bomTypeDescription} document was created on ${humanifyTimestamp(metadata.timestamp)}`; } if (metadata?.tools) { const tools = metadata.tools.components; @@ -97,12 +134,16 @@ export function textualMetadata(bomJson) { parentVersion && !["", "unspecified", "latest", "master", "main"].includes(parentVersion) ) { - text = `${text} The document describes ${toArticle(metadata.component.type)} ${cleanTypeName} named '${cleanNames(metadata.component.name)}' with version '${parentVersion}'.`; + let versionType = "version"; + if (parentVersion.includes(" ") || parentVersion.includes("(")) { + versionType = "the build name"; + } + text = `${text} The document describes ${toArticle(metadata.component.type)} ${cleanTypeName} named '${cleanNames(metadata.component.name)}' with ${versionType} '${parentVersion}'.`; } else { text = `${text} The document describes ${toArticle(metadata.component.type)} ${cleanTypeName} named '${cleanNames(metadata.component.name)}'.`; } if (cryptoAssetsCount) { - text = `${text} There are ${cryptoAssetsCount} cryptographic assets listed under components in this CBOM.`; + text = `${text} There are ${cryptoAssetsCount} cryptographic assets listed under components in this ${bomType}.`; } if ( metadata?.component.components && @@ -166,11 +207,19 @@ export function textualMetadata(bomJson) { } if (bomPkgTypes.length && bomPkgNamespaces.length) { if (bomPkgTypes.length === 1) { - text = `${text} The package type in this xBOM is ${joinArray(bomPkgTypes)} with ${bomPkgNamespaces.length} namespaces described under components.`; + text = `${text} The package type in this ${bomType} is ${joinArray(bomPkgTypes)} with ${bomPkgNamespaces.length} namespaces described under components.`; } else { text = `${text} ${bomPkgTypes.length} package type(s) and ${bomPkgNamespaces.length} namespaces are described in the document under components.`; } } + if (bomType === "OBOM") { + if (vsixCount > 0) { + text = `${text} The system appears to be set up for remote development, with ${vsixCount} Visual Studio Code extensions installed.`; + } + if (swidCount > 0) { + text = `${text} In addition, there are ${swidCount} applications installed on the system.`; + } + } return text; } @@ -178,38 +227,70 @@ export function textualMetadata(bomJson) { * Extract interesting tags from the component attribute * * @param {Object} component CycloneDX component + * @param {String} bomType BOM type * @returns {Array | undefined} Array of string tags */ -export function extractTags(component) { - if (!component || (!component.description && !component.properties)) { +export function extractTags(component, bomType = "all") { + if ( + !component || + (!component.description && !component.properties && !component.name) + ) { return undefined; } const tags = new Set(); - const desc = component?.description - ?.toLowerCase() - .replaceAll("ion ", " ") - .replaceAll("ing ", " ") - .replaceAll("ed ", " "); + const desc = component?.description?.toLowerCase(); const compProps = component.properties || []; - // Identify tags from description - for (const adescTag of componentTags.description) { - if ( - desc && - (desc.includes(` ${adescTag} `) || desc.includes(` ${adescTag}.`)) - ) { - tags.add(adescTag); + // Collect both the BOM specific tags and all tags + const compNameTags = (componentTags.name[bomType.toLowerCase()] || []).concat( + componentTags.name.all || [], + ); + const compDescTags = ( + componentTags.description[bomType.toLowerCase()] || [] + ).concat(componentTags.description.all || []); + const compPropsTags = ( + componentTags.properties[bomType.toLowerCase()] || [] + ).concat(componentTags.properties.all || []); + if (component?.name) { + // {"devel": ["/-(dev|devel|headers)$/"]} + for (const anameTagObject of compNameTags) { + for (const compCategoryTag of Object.keys(anameTagObject)) { + for (const catRegexStr of anameTagObject[compCategoryTag]) { + // Regex-based search on the name + if (new RegExp(catRegexStr, "ig").test(component.name)) { + tags.add(compCategoryTag); + } + } + } } } - // Identify tags from properties - for (const adescTag of componentTags.properties) { - for (const aprop of compProps) { + // Identify tags from description + if (desc) { + for (const adescTag of compDescTags) { + if (desc.includes(` ${adescTag} `) || desc.includes(` ${adescTag}.`)) { + tags.add(adescTag); + } + const stemmedTag = adescTag.replace(/(ion|ed|er|en|ing)$/, ""); + const stemmedDesc = adescTag.replace(/(ion|ed|er|en|ing) $/, " "); if ( - aprop.name !== "SrcFile" && - aprop?.value?.toLowerCase().includes(adescTag) + stemmedDesc.includes(` ${stemmedTag} `) || + stemmedDesc.includes(` ${stemmedTag}.`) ) { tags.add(adescTag); } } } + // Identify tags from properties as a fallback + if (!tags.size) { + for (const adescTag of compPropsTags) { + for (const aprop of compProps) { + if ( + aprop.name !== "SrcFile" && + aprop?.value?.toLowerCase().includes(adescTag) + ) { + tags.add(adescTag); + } + } + } + } return Array.from(tags).sort(); } diff --git a/lib/stages/postgen/annotator.test.js b/lib/stages/postgen/annotator.test.js index 1a8921e08..61daa2821 100644 --- a/lib/stages/postgen/annotator.test.js +++ b/lib/stages/postgen/annotator.test.js @@ -1,4 +1,4 @@ -import { textualMetadata } from "./annotator.js"; +import { extractTags, textualMetadata } from "./annotator.js"; import { expect, test } from "@jest/globals"; @@ -267,3 +267,10 @@ test("textualMetadata tests", () => { "This Operations Bill-of-Materials (OBOM) document was created on Monday, November 11, 2024 with cdxgen. The lifecycles phases represented are: pre-build and operations. The document describes an operating system named 'Microsoft Windows 11 Pro' with version '22H2'. The OS is x64 architecture with the build version '10.0.22621'.", ); }); + +test("extractTags tests", () => { + expect(extractTags({ name: "container-selinux" }, "obom")).toEqual([ + "container", + "security", + ]); +}); diff --git a/lib/stages/postgen/postgen.js b/lib/stages/postgen/postgen.js index b61e7dad7..438f20c95 100644 --- a/lib/stages/postgen/postgen.js +++ b/lib/stages/postgen/postgen.js @@ -9,7 +9,7 @@ import { getTimestamp, hasAnyProjectType, } from "../../helpers/utils.js"; -import { extractTags, textualMetadata } from "./annotator.js"; +import { extractTags, findBomType, textualMetadata } from "./annotator.js"; /** * Filter and enhance BOM post generation. @@ -299,18 +299,20 @@ export function annotate(bomJson, options) { if (!bomJson?.components) { return bomJson; } - const bomAnnotations = bomJson.annotations || []; + const bomAnnotations = bomJson?.annotations || []; const cdxgenAnnotator = bomJson.metadata.tools.components.filter( (c) => c.name === "cdxgen", ); if (!cdxgenAnnotator.length) { return bomJson; } + const requiresContextTrimming = ["ml-tiny"].includes(options?.profile); const requiresContextTuning = [ "deep-learning", "machine-learning", "ml", "ml-deep", + "ml-tiny", ].includes(options?.profile); // Construct the bom-link prefix to use for context tuning const bomLinkPrefix = `${bomJson.serialNumber}/${bomJson.version}/`; @@ -333,9 +335,19 @@ export function annotate(bomJson, options) { }); } bomJson.annotations = bomAnnotations; + // Shall we trim the metadata section + if (requiresContextTrimming) { + if (bomJson?.metadata?.component?.components) { + bomJson.metadata.component.components = undefined; + } + if (bomJson?.metadata?.component?.["bom-ref"]) { + bomJson.metadata.component["bom-ref"] = undefined; + } + } + const { bomType, bomTypeDescription } = findBomType(bomJson); // Tag the components for (const comp of bomJson.components) { - const tags = extractTags(comp); + const tags = extractTags(comp, bomType); if (tags?.length) { comp.tags = tags; } @@ -344,7 +356,22 @@ export function annotate(bomJson, options) { `${bomLinkPrefix}${stripBomLink(bomJson.serialNumber, bomJson.version, comp["bom-ref"])}`; comp.description = undefined; comp.properties = undefined; + comp.evidence = undefined; } + if (requiresContextTrimming) { + comp.authors = undefined; + comp.supplier = undefined; + comp.publisher = undefined; + comp["bom-ref"] = undefined; + comp.externalReferences = undefined; + comp.description = undefined; + comp.properties = undefined; + comp.evidence = undefined; + } + } + // For tiny models, we can remove the dependencies section + if (requiresContextTrimming) { + bomJson.dependencies = undefined; } // Problem: information such as the dependency tree are specific to an sbom // To prevent the models from incorrectly learning about the trees, we automatically convert all bom-ref diff --git a/types/lib/evinser/evinser.d.ts.map b/types/lib/evinser/evinser.d.ts.map index 7cb068587..f98e36468 100644 --- a/types/lib/evinser/evinser.d.ts.map +++ b/types/lib/evinser/evinser.d.ts.map @@ -1 +1 @@ -{"version":3,"file":"evinser.d.ts","sourceRoot":"","sources":["../../../lib/evinser/evinser.js"],"names":[],"mappings":"AA+vBA;;;;;;GAMG;AACH,iFAyFC;AAl0BM;;;;;;;;;;;;;qBAojD0h5C,CAAC;qBAAgB,CAAC;;;qBAA4F,CAAC;qBAAgB,CAAC;;;qBAAkE,CAAC;qBAAgB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;wGAAr3wC,QAAa;;;;;;;;;;;;;;sHAAq3M,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;wJAAmkY,CAAC;;;wJAA2rB,CAAC;qUAAg+C,CAAC;2JAAqH,CAAC,kJAAgH,CAAC;qUAA8wB,CAAC;2JAAqH,CAAC,kJAAgH,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBAA81kB,CAAC;qBAAgB,CAAC;;;qBAA4F,CAAC;qBAAgB,CAAC;;;qBAAkE,CAAC;qBAAgB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;wGAAr3wC,QAAa;;;;;;;;;;;;;;sHAAq3M,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;wJAAmkY,CAAC;;;wJAA2rB,CAAC;qUAAg+C,CAAC;2JAAqH,CAAC,kJAAgH,CAAC;qUAA8wB,CAAC;2JAAqH,CAAC,kJAAgH,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBAA81kB,CAAC;qBAAgB,CAAC;;;qBAA4F,CAAC;qBAAgB,CAAC;;;qBAAkE,CAAC;qBAAgB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;wGAAr3wC,QAAa;;;;;;;;;;;;;;sHAAq3M,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;wJAAmkY,CAAC;;;wJAA2rB,CAAC;qUAAg+C,CAAC;2JAAqH,CAAC,kJAAgH,CAAC;qUAA8wB,CAAC;2JAAqH,CAAC,kJAAgH,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAn/Cls0B;AAEM,6GAiDN;AAEM,gGAkCN;AAEM,wGAqBN;AAEM;;;;;;;;EAqFN;AAEM,iEAoBN;AAEM;;;EA8BN;AAQM;;;;;;;;;;;;;GAsJN;AAEM,2JA4CN;AAcM,2CARI,MAAM,mHAkNhB;AA2HM,sGAqEN;AASM,mDAJI,MAAM,0CA6DhB;AASM,gDAJI,MAAM,mDA8DhB;AAEM,yEAWN;AAEM,gEAmDN;AASM,yEAiJN;AAaM,gDAPI,MAAM,wHAyHhB;AAUM,kDAHI,MAAM;;;;;;;;;;;;;EA2FhB;AAQM,kDAaN;AAQM,2CAHI,MAAM,UAKhB;AAEM,oFAyCN"} \ No newline at end of file +{"version":3,"file":"evinser.d.ts","sourceRoot":"","sources":["../../../lib/evinser/evinser.js"],"names":[],"mappings":"AAgwBA;;;;;;GAMG;AACH,iFAyFC;AAl0BM;;;;;;;;;;;;;qBAsjD264C,CAAC;qBAAgB,CAAC;;;qBAA4F,CAAC;qBAAgB,CAAC;;;qBAAkE,CAAC;qBAAgB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;wGAAr3wC,QAAa;;;;;;;;;;;;;;sHAAq3M,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;wJAAmkY,CAAC;;;wJAA2rB,CAAC;qUAAg+C,CAAC;2JAAqH,CAAC,kJAAgH,CAAC;qUAA8wB,CAAC;2JAAqH,CAAC,kJAAgH,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBAA81kB,CAAC;qBAAgB,CAAC;;;qBAA4F,CAAC;qBAAgB,CAAC;;;qBAAkE,CAAC;qBAAgB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;wGAAr3wC,QAAa;;;;;;;;;;;;;;sHAAq3M,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;wJAAmkY,CAAC;;;wJAA2rB,CAAC;qUAAg+C,CAAC;2JAAqH,CAAC,kJAAgH,CAAC;qUAA8wB,CAAC;2JAAqH,CAAC,kJAAgH,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBAA81kB,CAAC;qBAAgB,CAAC;;;qBAA4F,CAAC;qBAAgB,CAAC;;;qBAAkE,CAAC;qBAAgB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;wGAAr3wC,QAAa;;;;;;;;;;;;;;sHAAq3M,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;wJAAmkY,CAAC;;;wJAA2rB,CAAC;qUAAg+C,CAAC;2JAAqH,CAAC,kJAAgH,CAAC;qUAA8wB,CAAC;2JAAqH,CAAC,kJAAgH,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAr/Cnl0B;AAEM,6GAiDN;AAEM,gGAkCN;AAEM,wGAqBN;AAEM;;;;;;;;EAqFN;AAEM,iEAoBN;AAEM;;;EA8BN;AAQM;;;;;;;;;;;;;GAsJN;AAEM,2JA4CN;AAcM,2CARI,MAAM,mHAkNhB;AA2HM,sGAqEN;AASM,mDAJI,MAAM,0CA6DhB;AASM,gDAJI,MAAM,mDA8DhB;AAEM,yEAWN;AAEM,gEAmDN;AASM,yEAmJN;AAaM,gDAPI,MAAM,wHAyHhB;AAUM,kDAHI,MAAM;;;;;;;;;;;;;EA2FhB;AAQM,kDAaN;AAQM,2CAHI,MAAM,UAKhB;AAEM,oFAyCN"} \ No newline at end of file diff --git a/types/lib/helpers/display.d.ts.map b/types/lib/helpers/display.d.ts.map index a3d771801..81ff9d40c 100644 --- a/types/lib/helpers/display.d.ts.map +++ b/types/lib/helpers/display.d.ts.map @@ -1 +1 @@ -{"version":3,"file":"display.d.ts","sourceRoot":"","sources":["../../../lib/helpers/display.js"],"names":[],"mappings":"AAoBA,mFAuEC;AAQD,iDAkBC;AACD,kDAsBC;AAED,qDAqBC;AAeD,qDA4BC;AACD,mDA8CC;AACD,wFAuCC;AA4DD,2DA+BC;AAED,iEA0BC;AAED,uDAoBC;AAED,iDA8CC"} \ No newline at end of file +{"version":3,"file":"display.d.ts","sourceRoot":"","sources":["../../../lib/helpers/display.js"],"names":[],"mappings":"AAoBA,mFAuEC;AAQD,iDAmBC;AACD,kDAsBC;AAED,qDAqBC;AAeD,qDA4BC;AACD,mDA8CC;AACD,wFAuCC;AA4DD,2DA+BC;AAED,iEA0BC;AAED,uDAoBC;AAED,iDA8CC"} \ No newline at end of file diff --git a/types/lib/stages/postgen/annotator.d.ts b/types/lib/stages/postgen/annotator.d.ts index f26c8cbad..0d5f72ac9 100644 --- a/types/lib/stages/postgen/annotator.d.ts +++ b/types/lib/stages/postgen/annotator.d.ts @@ -1,5 +1,13 @@ /** - * Create the textual representation of the metadata section + * Method to determine the type of the BOM. + * + * @param {Object} bomJson BOM JSON Object + * + * @returns {String} Type of the bom such as sbom, cbom, obom, ml-bom etc + */ +export function findBomType(bomJson: any): string; +/** + * Create the textual representation of the metadata section. * * @param {Object} bomJson BOM JSON Object * @@ -10,7 +18,8 @@ export function textualMetadata(bomJson: any): string | undefined; * Extract interesting tags from the component attribute * * @param {Object} component CycloneDX component + * @param {String} bomType BOM type * @returns {Array | undefined} Array of string tags */ -export function extractTags(component: any): any[] | undefined; +export function extractTags(component: any, bomType?: string): any[] | undefined; //# sourceMappingURL=annotator.d.ts.map \ No newline at end of file diff --git a/types/lib/stages/postgen/annotator.d.ts.map b/types/lib/stages/postgen/annotator.d.ts.map index 03b8865aa..940562358 100644 --- a/types/lib/stages/postgen/annotator.d.ts.map +++ b/types/lib/stages/postgen/annotator.d.ts.map @@ -1 +1 @@ -{"version":3,"file":"annotator.d.ts","sourceRoot":"","sources":["../../../../lib/stages/postgen/annotator.js"],"names":[],"mappings":"AA0CA;;;;;;GAMG;AACH,+CAFa,SAAS,SAAS,CA+H9B;AAED;;;;;GAKG;AACH,6CAFa,QAAQ,SAAS,CAkC7B"} \ No newline at end of file +{"version":3,"file":"annotator.d.ts","sourceRoot":"","sources":["../../../../lib/stages/postgen/annotator.js"],"names":[],"mappings":"AA0CA;;;;;;GAMG;AACH,kDA4BC;AAED;;;;;;GAMG;AACH,+CAFa,SAAS,SAAS,CA2I9B;AAED;;;;;;GAMG;AACH,+DAFa,QAAQ,SAAS,CAiE7B"} \ No newline at end of file diff --git a/types/lib/stages/postgen/postgen.d.ts.map b/types/lib/stages/postgen/postgen.d.ts.map index 4e38a742c..455ff16a4 100644 --- a/types/lib/stages/postgen/postgen.d.ts.map +++ b/types/lib/stages/postgen/postgen.d.ts.map @@ -1 +1 @@ -{"version":3,"file":"postgen.d.ts","sourceRoot":"","sources":["../../../../lib/stages/postgen/postgen.js"],"names":[],"mappings":"AAaA;;;;;;;GAOG;AACH,+DAkBC;AAED;;;;;;;GAOG;AACH,gEAqCC;AAED;;;;;;;GAOG;AACH,gEA+BC;AAED;;;;;;;GAOG;AACH,2DAyIC;AAED;;GAEG;AACH,gDAIC;AAMD;;;;;;;GAOG;AACH,0DAyEC"} \ No newline at end of file +{"version":3,"file":"postgen.d.ts","sourceRoot":"","sources":["../../../../lib/stages/postgen/postgen.js"],"names":[],"mappings":"AAaA;;;;;;;GAOG;AACH,+DAkBC;AAED;;;;;;;GAOG;AACH,gEAqCC;AAED;;;;;;;GAOG;AACH,gEA+BC;AAED;;;;;;;GAOG;AACH,2DAyIC;AAED;;GAEG;AACH,gDAIC;AAMD;;;;;;;GAOG;AACH,0DAoGC"} \ No newline at end of file