From 5fd0d16da0e03e9628fceeaa6bce4a827c26d6c6 Mon Sep 17 00:00:00 2001 From: anthonyharrison Date: Mon, 6 Mar 2023 17:27:52 +0000 Subject: [PATCH] feat: Refactor to use lib4sbom --- requirements.txt | 1 + sbom4python/cli.py | 76 ++++----- sbom4python/cyclonedxgenerator.py | 217 ------------------------ sbom4python/dotgenerator.py | 77 --------- sbom4python/generator.py | 117 ------------- sbom4python/output.py | 71 -------- sbom4python/scanner.py | 168 +++++++++++++++---- sbom4python/spdxgenerator.py | 266 ------------------------------ sbom4python/version.py | 2 +- 9 files changed, 167 insertions(+), 828 deletions(-) delete mode 100644 sbom4python/cyclonedxgenerator.py delete mode 100644 sbom4python/dotgenerator.py delete mode 100644 sbom4python/generator.py delete mode 100644 sbom4python/output.py delete mode 100644 sbom4python/spdxgenerator.py diff --git a/requirements.txt b/requirements.txt index ac47736..ec4ae2b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ lib4sbom sbom4files +sbom2dot diff --git a/sbom4python/cli.py b/sbom4python/cli.py index e047cd9..5eee4fc 100644 --- a/sbom4python/cli.py +++ b/sbom4python/cli.py @@ -6,14 +6,17 @@ import textwrap from collections import ChainMap -from sbom4python.dotgenerator import DOTGenerator -from sbom4python.generator import SBOMGenerator -from sbom4python.output import SBOMOutput +# from sbom4python.output import SBOMOutput from sbom4python.scanner import SBOMScanner from sbom4python.version import VERSION -# CLI processing +from sbom2dot.dotgenerator import DOTGenerator + +from lib4sbom.generator import SBOMGenerator +from lib4sbom.output import SBOMOutput +from lib4sbom.sbom import SBOM +# CLI processing def main(argv=None): @@ -43,6 +46,12 @@ def main(argv=None): action="store_true", help="suppress detecting the license of components", ) + input_group.add_argument( + "--include-file", + action="store_true", + default=False, + help="include reporting files associated with module", + ) output_group = parser.add_argument_group("Output") output_group.add_argument( @@ -63,7 +72,7 @@ def main(argv=None): "--format", action="store", default="tag", - choices=["tag", "json", "xml"], + choices=["tag", "json", "yaml"], help="specify format of software bill of materials (sbom) (default: tag)", ) @@ -87,6 +96,7 @@ def main(argv=None): defaults = { "module": "", + "include_file": False, "exclude_license": False, "output_file": "", "sbom": "spdx", @@ -105,63 +115,43 @@ def main(argv=None): # Ensure format is aligned with type of SBOM bom_format = args["format"] - if args["sbom"] == "spdx": - # XML not valid for SPDX - if bom_format == "xml": - bom_format = "tag" - else: - # Tag not valid for CycloneDX - if bom_format == "tag": + if args["sbom"] == "cyclonedx": + # Only JSON format valid for CycloneDX + if bom_format != "json": bom_format = "json" if args["debug"]: print("Exclude Licences:", args["exclude_license"]) + print("Include Files:", args["include_file"]) print("SBOM type:", args["sbom"]) print("Format:", bom_format) print("Output file:", args["output_file"]) print("Graph file:", args["graph"]) print(f"Analysing {module_name}") - sbom_scan = SBOMScanner(args["debug"]) - sbom_scan.set_module(module_name) - sbom_scan.process_module() - - # If module not found, abort processing - if not sbom_scan.valid_module(): - return -1 - - sbom_scan.add( - [ - "-", - sbom_scan.get("Name").lower().replace("_", "-"), - sbom_scan.get("Version"), - sbom_scan.get("Author") + " " + sbom_scan.get("Author-email"), - sbom_scan.get("License"), - ] - ) - sbom_scan.analyze(sbom_scan.get("Name"), sbom_scan.get("Requires")) + sbom_scan = SBOMScanner(args["debug"], args["include_file"], args["exclude_license"]) + sbom_scan.process_python_module(module_name) # Generate SBOM file - sbom_gen = SBOMGenerator( - args["exclude_license"], args["sbom"], bom_format, app_name, VERSION, "pypi" + python_sbom = SBOM() + python_sbom.add_files(sbom_scan.get_files()) + python_sbom.add_packages(sbom_scan.get_packages()) + python_sbom.add_relationships(sbom_scan.get_relationships()) + + sbom_gen = SBOMGenerator(sbom_type=args["sbom"], format=bom_format, application = app_name, version = VERSION) + sbom_gen.generate( + project_name=sbom_scan.get_parent(), + sbom_data=python_sbom.get_sbom(), + filename=args["output_file"], ) - sbom_out = SBOMOutput(args["output_file"], bom_format) - - if args["sbom"] == "spdx": - sbom_gen.generate_spdx(module_name, sbom_scan.get_record()) - sbom_out.generate_output(sbom_gen.get_spdx()) - else: - sbom_gen.generate_cyclonedx(module_name, sbom_scan.get_record()) - sbom_out.generate_output(sbom_gen.get_cyclonedx()) if len(args["graph"]) > 0: - sbom_dot = DOTGenerator() - sbom_dot.generatedot(sbom_gen.get_relationships()) + sbom_dot = DOTGenerator(python_sbom.get_sbom()["packages"]) + sbom_dot.generatedot(python_sbom.get_sbom()["relationships"]) dot_out = SBOMOutput(args["graph"], "dot") dot_out.generate_output(sbom_dot.getDOT()) return 0 - if __name__ == "__main__": sys.exit(main()) diff --git a/sbom4python/cyclonedxgenerator.py b/sbom4python/cyclonedxgenerator.py deleted file mode 100644 index 4710d46..0000000 --- a/sbom4python/cyclonedxgenerator.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright (C) 2023 Anthony Harrison -# SPDX-License-Identifier: Apache-2.0 - -import re -import uuid -from datetime import datetime - -from sbom4python.license import LicenseScanner - - -class CycloneDXGenerator: - """ - Generate CycloneDX SBOM. - """ - - CYCLONEDX_VERSION = "1.4" - DATA_LICENCE = "CC0-1.0" - SPDX_NAMESPACE = "http://spdx.org/spdxdocs/" - SPDX_PROJECT_ID = "SPDXRef-DOCUMENT" - NAME = "SBOM4PYTHON_Generator" - PACKAGE_PREAMBLE = "SPDXRef-Package-" - LICENSE_PREAMBLE = "LicenseRef-" - - def __init__( - self, - include_license: False, - cyclonedx_format="json", - application="sbom4python", - version="0.1", - ): - self.doc = [] - self.package_id = 0 - self.include_license = include_license - self.license = LicenseScanner() - self.format = cyclonedx_format - self.application = application - self.application_version = version - if self.format == "xml": - self.doc = [] - else: - self.doc = {} - self.component = [] - self.relationship = [] - self.sbom_complete = False - self.include_purl = False - - def set_purl(self, package_manager): - self.include_purl = True - self.package_manager = package_manager - - def store(self, message): - self.doc.append(message) - - def getBOM(self): - if not self.sbom_complete: - if self.format == "xml": - self.store("") - # Now process dependencies - self.store("") - for element in self.relationship: - item = element["ref"] - self.store(f'') - for depends in element["dependsOn"]: - self.store(f'') - self.store("") - self.store("") - self.store("") - else: - # Add set of detected components to SBOM - self.doc["components"] = self.component - self.doc["dependencies"] = self.relationship - self.sbom_complete = True - return self.doc - - def getRelationships(self): - # Only required for relationships graph. Reformat data - relationship_graph = [] - for relationship in self.relationship: - from_id = relationship["ref"] - if len(relationship_graph) == 0: - # Add root element - relationship_graph.append([from_id, from_id, " DESCRIBES "]) - for depend in relationship["dependsOn"]: - relationship_graph.append([from_id, depend, " CONTAINS "]) - return relationship_graph - - def generateTime(self): - # Generate data/time label in format YYYY-MM-DDThh:mm:ssZ - return datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") - - def generateDocumentHeader(self, project_name): - if self.format == "xml": - self.generateXMLDocumentHeader(project_name) - else: - self.generateJSONDocumentHeader(project_name) - - def generateJSONDocumentHeader(self, project_name): - urn = "urn:uuid" + str(uuid.uuid4()) - self.doc = { - "$schema": "http://cyclonedx.org/schema/bom-1.4.schema.json", - "bomFormat": "CycloneDX", - "specVersion": self.CYCLONEDX_VERSION, - "serialNumber": urn, - "version": 1, - "metadata": { - "timestamp": self.generateTime(), - "tools": [ - { - "name": self.application, - "version": self.application_version, - } - ], - }, - } - - def generateXMLDocumentHeader(self, project_name): - urn = "urn:uuid" + str(uuid.uuid4()) - self.store("") - self.store("') - self.store("") - self.store(f"{self.generateTime()}") - self.store("") - self.store(f"{self.application}") - self.store(f"{self.application_version}") - self.store("") - self.store("") - self.store("") - - def generateRelationship(self, parent_id, package_id): - # Check if entry exists. If so, update list of dependencies - element_found = False - for element in self.relationship: - if element["ref"] == parent_id: - # Update list of dependencies - element["dependsOn"].append(package_id) - element_found = True - break - if not element_found: - # New item found - dependency = dict() - dependency["ref"] = parent_id - dependency["dependsOn"] = [package_id] - self.relationship.append(dependency) - - def generateComponent(self, id, type, name, supplier, version, licence): - if self.format == "xml": - self.generateXMLComponent(id, type, name, supplier, version, licence) - else: - self.generateJSONComponent(id, type, name, supplier, version, licence) - - def _format_supplier(self, supplier_info): - # Get names, ignore email addresses - names = re.findall(r"[a-zA-Z\.\]+ [A-Za-z]+ ", supplier_info) - supplier = " ".join(n for n in names) - return re.sub(" +", " ", supplier.strip()) - - def generateJSONComponent( - self, id, type, name, supplier, version, identified_licence - ): - component = dict() - component["type"] = type - component["bom-ref"] = id - component["name"] = name - component["version"] = version - if supplier != "UNKNOWN" and len(supplier) > 1: - component_supplier = self._format_supplier(supplier) - component["author"] = component_supplier - # Supplier name mustn't have spaces in. Covert spaces to '_' - component[ - "cpe" - ] = f"cpe:/a:{component_supplier.replace(' ', '_').lower()}:{name}:{version}" - if identified_licence != "": - license_id = self.license.find_license(identified_licence) - # Only include if valid license - if license_id != "UNKNOWN": - license = dict() - license["id"] = license_id - license_url = self.license.get_license_url(license["id"]) - if license_url is not None: - license["url"] = license_url - item = dict() - item["license"] = license - component["licenses"] = [item] - if self.include_purl: - component["purl"] = f"pkg:{self.package_manager}/{name}@{version}" - self.component.append(component) - - def generateXMLComponent( - self, id, type, name, supplier, version, identified_licence - ): - self.store(f'') - self.store(f"{name}") - self.store(f"{version}") - if supplier != "UNKNOWN" and len(supplier) > 1: - component_supplier = self._format_supplier(supplier) - self.store(f"{component_supplier}") - # Supplier name mustn't have spaces in. Covert spaces to '_' - self.store( - f"cpe:/a:{component_supplier.replace(' ', '_').lower()}:{name}:{version}" - ) - if identified_licence != "": - license_id = self.license.find_license(identified_licence) - # Only include if valid license - if license_id != "UNKNOWN": - self.store("") - self.store("") - self.store(f'"{license_id}"') - license_url = self.license.get_license_url(license_id) - if license_url is not None: - self.store(f'"{license_url}"') - self.store("") - self.store("") - if self.include_purl: - self.store(f"pkg:{self.package_manager}/{name}@{version}") - self.store("") diff --git a/sbom4python/dotgenerator.py b/sbom4python/dotgenerator.py deleted file mode 100644 index 5204c1c..0000000 --- a/sbom4python/dotgenerator.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (C) 2022 Anthony Harrison -# SPDX-License-Identifier: Apache-2.0 - - -class DOTGenerator: - def __init__(self): - self.dot = [] - - def getDOT(self): - return self.dot - - def show(self, text): - # print (text) - self.dot.append(text) - - def get_package(self, package_id): - # Extract package name from package identifier. - # Supported identifier format is SPDXRef-Package-n- or n- - prefix = "SPDXRef-Package-" - if prefix in package_id: - # Format is SPDXRef-Package-n- - # Find package name after package number n - startpos = len(prefix) + 1 - return package_id[package_id[startpos:].find("-") + startpos + 1 :] - elif "-" in package_id: - # Format is n- - return package_id[package_id.find("-") + 1 :] - return package_id - - def set_colour(self, colour): - base = " [shape=box, style=filled, fontcolor=white, fillcolor=" - return base + colour + "];" - - def generatedot(self, data): - - # Generate header - self.show("strict digraph sbom {") - self.show('\tsize="8,10.5"; ratio=fill;') - # Generate graph - root = "" - explicit_style = self.set_colour("royalblue") - implicit_style = self.set_colour("darkgreen") - packages = [] - for element in data: - source = element[0] - dest = element[1] - relationship = element[2] - - lib = '"' + self.get_package(source) + '"' - application = '"' + self.get_package(dest) + '"' - - if relationship == " DESCRIBES ": - # Should only be one DESCRIBES relationship. - root = application - else: - if lib == root: - if lib not in packages: - packages.append(lib) - self.show("\t" + lib + self.set_colour("darkred")) - if application not in packages: - packages.append(application) - self.show("\t" + application + explicit_style) - elif application == root: - if lib not in packages: - packages.append(lib) - self.show("\t" + lib + explicit_style) - else: - if lib not in packages: - packages.append(lib) - self.show("\t" + lib + implicit_style) - if application not in packages: - packages.append(application) - self.show("\t" + application + implicit_style) - if lib != application: - self.show("\t" + lib + " -> " + application + ";") - self.show("}") - # end diff --git a/sbom4python/generator.py b/sbom4python/generator.py deleted file mode 100644 index 198a765..0000000 --- a/sbom4python/generator.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright (C) 2022 Anthony Harrison -# SPDX-License-Identifier: Apache-2.0 - -from sbom4python.cyclonedxgenerator import CycloneDXGenerator -from sbom4python.spdxgenerator import SPDXGenerator - - -class SBOMGenerator: - """ - Simple SBOM Generator. - """ - - def __init__( - self, - include_license: False, - sbom_type="spdx", - format="tag", - application="sbom4python", - version="0.1", - package_manager="pypi", - ): - if sbom_type == "spdx": - self.bom = SPDXGenerator(include_license, format, application, version) - self.bom.set_purl(package_manager) - else: - self.bom = CycloneDXGenerator(include_license, format, application, version) - self.bom.set_purl(package_manager) - - def generate_spdx(self, project_name, packages): - self.sbom_complete = False - project_id = self.bom.generateDocumentHeader(project_name) - # Process list of packages - id = 1 - package_set = {} - for package in packages: - product = package[1] - version = package[2] - supplier = package[3] - licence = package[4] - parent = package[0].lower() - if product not in package_set: - package_set[product] = str(id) + "-" + product - if parent == "-": - parent_id = project_id - relationship = " DESCRIBES " - else: - if parent in package_set: - parent_id = package_set[parent] - relationship = " DEPENDS_ON " - self.bom.generatePackageDetails( - product, - str(id) + "-" + product, - version, - supplier, - licence, - parent_id, - relationship, - ) - id = id + 1 - else: - if parent == "-": - parent_id = project_id - relationship = " DESCRIBES " - elif parent in package_set: - relationship = " DEPENDS_ON " - parent_id = package_set[parent] - else: - parent_id = None - if parent_id is not None: - self.bom.generateRelationship( - self.bom.package_ident(parent_id), - self.bom.package_ident(package_set[product]), - relationship, - ) - - def get_spdx(self): - if not self.sbom_complete: - self.bom.showRelationship() - self.sbom_complete = True - return self.bom.getBOM() - - def get_relationships(self): - return self.bom.getRelationships() - - def get_cyclonedx(self): - return self.bom.getBOM() - - def generate_cyclonedx(self, project_name, packages): - self.bom.generateDocumentHeader(project_name) - # Process list of packages - id = 1 - package_set = {} - for package in packages: - product = package[1] - version = package[2] - supplier = package[3] - licence = package[4] - parent = package[0].lower() - if product not in package_set: - package_set[product] = str(id) + "-" + product - if parent == "-": - type = "application" - else: - type = "library" - self.bom.generateComponent( - package_set[product], type, product, supplier, version, licence - ) - if parent != "-": - self.bom.generateRelationship( - package_set[parent], package_set[product] - ) - id = id + 1 - else: - if parent != "-": - self.bom.generateRelationship( - package_set[parent], package_set[product] - ) diff --git a/sbom4python/output.py b/sbom4python/output.py deleted file mode 100644 index 3f66296..0000000 --- a/sbom4python/output.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (C) 2022 Anthony Harrison -# SPDX-License-Identifier: Apache-2.0 - -""" Set up Output Formatting """ - -import json - - -class OutputManager: - """Helper class for managing output to file and console.""" - - def __init__(self, out_type="file", filename=None): - self.out_type = out_type - self.filename = filename - if self.out_type == "file": - self.file_handle = open(filename, "w") - else: - self.file_handle = None - - def close(self): - # print("close...") - if self.out_type == "file": - # print("close file", self.file_handle) - self.file_handle.close() - - def file_out(self, message): - self.file_handle.write(message + "\n") - - def console_out(self, message): - print(message) - - def show(self, message): - if self.out_type == "file": - self.file_out(message) - else: - self.console_out(message) - - -class SBOMOutput: - """Output manager for SBOM data.""" - - def __init__(self, filename="console", output_format="tag"): - self.filename = filename - self.output_format = output_format - self.format_process = { - "tag": self.format_tag_data, - "json": self.format_json_data, - "xml": self.format_tag_data, - "dot": self.format_tag_data, - } - self.type = "console" - if self.filename != "": - self.type = "file" - self.output_manager = OutputManager(self.type, self.filename) - - def format_json_data(self, data): - json_data = json.dumps(data, indent=2) - self.send_output(json_data) - - def format_tag_data(self, dataset): - for data_item in dataset: - self.send_output(data_item) - - def send_output(self, data): - self.output_manager.show(data) - - def generate_output(self, dataset): - self.format_process[self.output_format](dataset) - # print("about to close") - self.output_manager.close() - # print("closed") diff --git a/sbom4python/scanner.py b/sbom4python/scanner.py index ad7992b..1eeae79 100644 --- a/sbom4python/scanner.py +++ b/sbom4python/scanner.py @@ -2,22 +2,38 @@ # SPDX-License-Identifier: Apache-2.0 import subprocess +import pathlib +import re + +from lib4sbom.data.package import SBOMPackage +from lib4sbom.data.relationship import SBOMRelationship +from lib4sbom.license import LicenseScanner +from sbom4files.filescanner import FileScanner + +import unicodedata class SBOMScanner: """ - Simple SBOM File Scanner. + Simple SBOM Generator for Python module. """ - def __init__(self, debug): + def __init__(self, debug, include_file=False, exclude_license=False): self.record = [] self.debug = debug + self.include_file = include_file + self.include_license = exclude_license + self.sbom_package = SBOMPackage() + self.sbom_relationship = SBOMRelationship() + self.license = LicenseScanner() + self.sbom_files = {} + self.sbom_packages = {} + self.sbom_relationships = [] + self.parent = "NOT_DEFINED" + self.file_scanner = FileScanner() - def set_module(self, module): - self.module = module - self.module_valid = False - if self.debug: - print(f"Module: {self.module}") + def set_parent(self, module): + self.parent = f"Python-{module}" def run_program(self, command_line): # Remove any null bytes @@ -27,53 +43,133 @@ def run_program(self, command_line): res = subprocess.run(params, capture_output=True, text=True) return res.stdout.splitlines() - def process_module(self): - out = self.run_program(f"pip show {self.module}") + def license_ident(self, license): + if not self.include_license and len(license) > 0: + if license != "UNKNOWN": + derived_license = self.license.find_license(license) + if derived_license != "UNKNOWN": + return derived_license + # Not an SPDX License id + return license + return "NOASSERTION" + + def _format_supplier(self, supplier_info, include_email=True): + # See https://stackoverflow.com/questions/1207457/convert-a-unicode-string-to-a-string-in-python-containing-extra-symbols + # And convert byte object to a string + name_str = unicodedata.normalize('NFKD', supplier_info).encode('ascii', 'ignore').decode("utf-8") + # Get names + names = re.findall(r"[a-zA-Z\.\]+ [A-Za-z]+ ", name_str) + # Get email addresses + # Use RFC-5322 compliant regex (https://regex101.com/library/6EL6YF) + emails = re.findall( + r"((?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\]))", + supplier_info, + ) + supplier = " ".join(n for n in names) + if include_email and len(emails) > 0: + # Only one email can be specified, so choose last one + supplier = supplier + "(" + emails[-1] + ")" + return re.sub(" +", " ", supplier.strip()) + + def process_module(self, module, parent="-"): + if self.debug: + print(f"Process Module {module}") + out = self.run_program(f"pip show {module}") # If module not found, no metadata returned if len(out) > 0: self.metadata = {} - self.module_valid = True for line in out: entry = line.split(":") - self.metadata[entry[0]] = entry[1].lstrip() + # store all data after keyword + self.metadata[entry[0]] = line.split(f"{entry[0]}:", 1)[1].strip().rstrip("\n") + if self.debug: + print (f"Metadata for {module}\n{self.metadata}") + self.sbom_package.initialise() + package = self.get("Name").lower().replace("_", "-") + version = self.get("Version") + self.sbom_package.set_name(package) + self.sbom_package.set_version(version) + self.sbom_package.set_filesanalysis(self.include_file) + license = self.license_ident(self.get("License")) + self.sbom_package.set_licensedeclared(license) + self.sbom_package.set_licenseconcluded(license) + supplier = self.get("Author") + " " + self.get("Author-email") + if len(supplier.split()) > 3: + self.sbom_package.set_supplier("Organization", self._format_supplier(supplier)) + elif len(supplier) > 1: + self.sbom_package.set_supplier("Person", self._format_supplier(supplier)) + else: + self.sbom_package.set_supplier("UNKNOWN", "NOASSERTION") + if self.get("Home-page") != "": + self.sbom_package.set_homepage(self.get("Home-page")) + if self.get("Summary") != "": + self.sbom_package.set_summary(self.get('Summary')) + self.sbom_package.set_downloadlocation(f'https://pypi.org/project/{self.get("Name")}/{version}') + # External references + self.sbom_package.set_externalreference("PACKAGE-MANAGER", "purl", f"pkg:pypi/{package}@{version}") + if len(supplier) > 1: + component_supplier = self._format_supplier(supplier, include_email=False) + self.sbom_package.set_externalreference("SECURITY", "cpe23Type", f"cpe:2.3:a:{component_supplier.replace(' ', '_').lower()}:{package}:{version}:*:*:*:*:*:*:*") + # Store package data + self.sbom_packages[(self.sbom_package.get_name(), self.sbom_package.get_value('version'))] = self.sbom_package.get_package() + # Add relationship + self.sbom_relationship.initialise() + if parent != "-": + self.sbom_relationship.set_relationship(parent.lower(), "DEPENDS_ON", package) + else: + self.sbom_relationship.set_relationship(self.parent, "DESCRIBES", package) + self.sbom_relationships.append(self.sbom_relationship.get_relationship()) + if self.include_file: + directory_location = f'{self.get("Location")}/{self.get("Name").lower().replace("-","_")}' + file_dir = pathlib.Path(directory_location) + if file_dir.exists(): + filtered = [x for x in file_dir.glob("**/*") if x.name.endswith(".py")] + else: + # Module is only a single file + filtered = [pathlib.Path(f'{self.get("Location")}/{self.get("Name").lower().replace("-","_")}.py')] + for entry in filtered: + if self.debug: + print (f'Analyse file in {entry}') + if self.file_scanner.scan_file(entry): + self.sbom_files[self.file_scanner.get_name()] = self.file_scanner.get_file() + # Add relationship + self.sbom_relationship.initialise() + self.sbom_relationship.set_relationship(package, "CONTAINS", self.file_scanner.get_name()) + self.sbom_relationship.set_relationship_id(self.sbom_package.get_value("id"), self.file_scanner.get_value("id")) + self.sbom_relationship.set_target_type("file") + self.sbom_relationships.append(self.sbom_relationship.get_relationship()) elif self.debug: - print(f"Module {self.module} not found") - - def add(self, entry): - if entry not in self.record: - self.record.append(entry) + print(f"Module {module} not found") + return (len(out) > 0) def get(self, attribute): if attribute in self.metadata: return self.metadata[attribute].lstrip() return "" - def get_record(self): - return self.record + def get_files(self): + return self.sbom_files - def valid_module(self): - return self.module_valid + def get_packages(self): + return self.sbom_packages - def show_record(self): - for r in self.record: - print(r) + def get_relationships(self): + if self.debug: + print (self.sbom_relationships) + return self.sbom_relationships + + def get_parent(self): + return self.parent def analyze(self, parent, dependencies): if len(dependencies) == 0: return else: for r in dependencies.split(","): - self.set_module(r) - self.process_module() - # Only process module for dependencies if valid - if self.valid_module(): - self.add( - [ - parent.lower().replace("_", "-"), - self.get("Name").lower().replace("_", "-"), - self.get("Version"), - self.get("Author") + " " + self.get("Author-email"), - self.get("License"), - ] - ) + if self.process_module(r, parent): self.analyze(r.strip(), self.get("Requires")) + + def process_python_module(self, module_name): + self.set_parent(module_name) + if self.process_module(module_name): + self.analyze(self.get("Name"), self.get("Requires")) diff --git a/sbom4python/spdxgenerator.py b/sbom4python/spdxgenerator.py deleted file mode 100644 index 6c5b6b5..0000000 --- a/sbom4python/spdxgenerator.py +++ /dev/null @@ -1,266 +0,0 @@ -# Copyright (C) 2023 Anthony Harrison -# SPDX-License-Identifier: Apache-2.0 - -import re -import uuid -from datetime import datetime - -from sbom4python.license import LicenseScanner - - -class SPDXGenerator: - """ - Generate SPDX Tag/Value SBOM. - """ - - SPDX_VERSION = "SPDX-2.2" - DATA_LICENSE = "CC0-1.0" - SPDX_NAMESPACE = "http://spdx.org/spdxdocs/" - SPDX_LICENSE_VERSION = "3.9" - SPDX_PROJECT_ID = "SPDXRef-DOCUMENT" - PACKAGE_PREAMBLE = "SPDXRef-Package-" - LICENSE_PREAMBLE = "LicenseRef-" - - def __init__( - self, - include_license: False, - spdx_format="tag", - application="sbom4python", - version="0.1", - ): - - self.package_id = 0 - self.include_license = include_license - self.license = LicenseScanner() - self.relationship = [] - self.format = spdx_format - self.application = application - self.application_version = version - if self.format == "tag": - self.doc = [] - else: - self.doc = {} - self.component = [] - self.relationships = [] - self.include_purl = False - - def set_purl(self, package_manager): - self.include_purl = True - self.package_manager = package_manager - - def show(self, message): - self.doc.append(message) - - def getBOM(self): - if self.format != "tag": - # Add subcomponents to SBOM - self.doc["packages"] = self.component - self.doc["relationships"] = self.relationships - return self.doc - - def getRelationships(self): - return self.relationship - - def generateTag(self, tag, value): - self.show(tag + ": " + value) - - def generateComment(self, comment): - self.show("##### " + comment) - - def generateTime(self): - # Generate data/time label in format YYYY-MM-DDThh:mm:ssZ - return datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") - - def generateTagDocumentHeader(self, project_name): - # Geerate SPDX Document Header - self.generateTag("SPDXVersion", self.SPDX_VERSION) - self.generateTag("DataLicense", self.DATA_LICENSE) - self.generateTag("SPDXID", self.SPDX_PROJECT_ID) - # Project name mustn't have spaces in. Covert spaces to '-' - self.generateTag("DocumentName", project_name.replace(" ", "-")) - self.generateTag( - "DocumentNamespace", - self.SPDX_NAMESPACE - + project_name.replace(" ", "-") - + "-" - + str(uuid.uuid4()), - ) - self.generateTag("LicenseListVersion", self.license.get_license_version()) - self.generateTag( - "Creator: Tool", self.application + "-" + self.application_version - ) - self.generateTag("Created", self.generateTime()) - self.generateTag( - "CreatorComment", - "This document has been automatically generated.", - ) - return self.SPDX_PROJECT_ID - - def generateJSONDocumentHeader(self, project_name): - # Generate SPDX Document Header - self.doc["SPDXID"] = self.SPDX_PROJECT_ID - self.doc["spdxVersion"] = self.SPDX_VERSION - creation_info = dict() - creation_info["comment"] = "This document has been automatically generated." - creation_info["creators"] = [ - "Tool: " + self.application + "-" + self.application_version - ] - creation_info["created"] = self.generateTime() - creation_info["licenseListVersion"] = self.license.get_license_version() - self.doc["creationInfo"] = creation_info - # Project name mustn't have spaces in. Covert spaces to '-' - self.doc["name"] = project_name.replace(" ", "-") - self.doc["dataLicense"] = self.DATA_LICENSE - self.doc["documentNamespace"] = ( - self.SPDX_NAMESPACE - + project_name.replace(" ", "-") - + "-" - + str(uuid.uuid4()) - ) - # self.doc["documentDescribes"]=[self.SPDX_PROJECT_ID] - return self.SPDX_PROJECT_ID - - def generateDocumentHeader(self, project_name): - if self.format == "tag": - return self.generateTagDocumentHeader(project_name) - else: - return self.generateJSONDocumentHeader(project_name) - - def package_ident(self, id): - # Only add preamble if not parent document - if id != self.SPDX_PROJECT_ID: - return self.PACKAGE_PREAMBLE + str(id) - return str(id) - - def license_ident(self, license): - if not self.include_license: - if license != "UNKNOWN": - derived_license = self.license.find_license(license) - if derived_license != "UNKNOWN": - return derived_license - return "NOASSERTION" - - def _format_supplier(self, supplier_info, include_email=True): - # Get names - names = re.findall(r"[a-zA-Z\.\]+ [A-Za-z]+ ", supplier_info) - # Get email addresses - # Use RFC-5322 compliant regex (https://regex101.com/library/6EL6YF) - emails = re.findall( - r"((?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\]))", - supplier_info, - ) - supplier = " ".join(n for n in names) - if include_email and len(emails) > 0: - # Only one email can be specified, so choose last one - supplier = supplier + "(" + emails[-1] + ")" - return re.sub(" +", " ", supplier.strip()) - - def generateTagPackageDetails( - self, package, id, version, supplier, license, parent_id, relationship - ): - self.generateComment("\n") - self.generateTag("PackageName", package) - package_id = self.package_ident(id) - self.generateTag("SPDXID", package_id) - # Attempt to detect an organization - if len(supplier.split()) > 3: - self.generateTag( - "PackageSupplier: Organization", self._format_supplier(supplier) - ) - elif len(supplier) > 1: - self.generateTag("PackageSupplier: Person", self._format_supplier(supplier)) - else: - self.generateTag("PackageSupplier", "NOASSERTION") - self.generateTag("PackageVersion", version) - self.generateTag("PackageDownloadLocation", "NOASSERTION") - self.generateTag("FilesAnalyzed", "false") - self.generateComment("Reported license " + license) - self.generateTag("PackageLicenseConcluded", self.license_ident(license)) - self.generateTag("PackageLicenseDeclared", self.license_ident(license)) - self.generateTag("PackageCopyrightText", "NOASSERTION") - if self.include_purl: - self.generateTag( - "ExternalRef", - f"PACKAGE-MANAGER purl pkg:{self.package_manager}/{package}@{version}", - ) - if len(supplier) > 1: - component_supplier = self._format_supplier(supplier, include_email=False) - self.generateTag( - "ExternalRef", - f"SECURITY cpe23Type cpe:2.3:a:{component_supplier.replace(' ', '_').lower()}:{package}:{version}:*:*:*:*:*:*:*", - ) - self.generateRelationship( - self.package_ident(parent_id), package_id, relationship - ) - - def generateJSONPackageDetails( - self, package, id, version, supplier, license, parent_id, relationship - ): - component = dict() - package_id = self.package_ident(id) - component["SPDXID"] = package_id - component["name"] = package - component["versionInfo"] = version - # Attempt to detect an organization - if len(supplier.split()) > 2: - component["supplier"] = "Organization: " + self._format_supplier(supplier) - elif len(supplier) > 1: - component["supplier"] = "Person: " + self._format_supplier(supplier) - else: - component["supplier"] = "NOASSERTION" - component["downloadLocation"] = "NONE" - component["filesAnalyzed"] = False - component["licenseConcluded"] = self.license_ident(license) - component["licenseDeclared"] = self.license_ident(license) - component["copyrightText"] = "NOASSERTION" - if self.include_purl: - purl_data = dict() - purl_data["referenceCategory"] = "PACKAGE-MANAGER" - purl_data[ - "referenceLocator" - ] = f"pkg:{self.package_manager}/{package}@{version}" - purl_data["referenceType"] = "purl" - component["externalRefs"] = [purl_data] - if len(supplier) > 1: - component_supplier = self._format_supplier(supplier, include_email=False) - cpe_data = dict() - cpe_data["referenceCategory"] = "SECURITY" - cpe_data[ - "referenceLocator" - ] = f"cpe:2.3:a:{component_supplier.replace(' ', '_').lower()}:{package}:{version}:*:*:*:*:*:*:*" - cpe_data["referenceType"] = "cpe23Type" - if "externalRefs" in component: - component["externalRefs"].append(cpe_data) - else: - component["externalRefs"] = [cpe_data] - self.component.append(component) - self.generateRelationship( - self.package_ident(parent_id), package_id, relationship - ) - - def generatePackageDetails( - self, package, id, version, supplier, license, parent_id, relationship - ): - if self.format == "tag": - self.generateTagPackageDetails( - package, id, version, supplier, license, parent_id, relationship - ) - else: - self.generateJSONPackageDetails( - package, id, version, supplier, license, parent_id, relationship - ) - - def generateRelationship(self, from_id, to_id, relationship_type): - self.relationship.append([from_id, to_id, relationship_type]) - - def showRelationship(self): - self.relationship.sort() - for r in self.relationship: - if self.format == "tag": - self.generateTag("Relationship", r[0] + r[2] + r[1]) - else: - relation = dict() - relation["spdxElementId"] = r[0] - relation["relatedSpdxElement"] = r[1] - relation["relationshipType"] = r[2].strip() - self.relationships.append(relation) diff --git a/sbom4python/version.py b/sbom4python/version.py index c366169..6528e7e 100644 --- a/sbom4python/version.py +++ b/sbom4python/version.py @@ -1,4 +1,4 @@ # Copyright (C) 2023 Anthony Harrison # SPDX-License-Identifier: Apache-2.0 -VERSION: str = "0.7.0" +VERSION: str = "0.8.0"