Skip to content

Commit 2b8e2c1

Browse files
committed
feat: compare SBOMs and show components only present in target SBOM
Signed-off-by: badrikesh prusty <[email protected]>
1 parent 0337d3a commit 2b8e2c1

File tree

2 files changed

+206
-0
lines changed

2 files changed

+206
-0
lines changed

src/debsbom/cli.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from .commands.source_merge import SourceMergeCmd
1919
from .commands.repack import RepackCmd
2020
from .commands.export import ExportCmd
21+
from .commands.compare import CompareCmd
2122

2223
# Attempt to import optional download dependencies to check their availability.
2324
# The success or failure of these imports determines if download features are enabled.
@@ -64,6 +65,7 @@ def setup_parser():
6465
)
6566
RepackCmd.setup_parser(subparser.add_parser("repack", help="repack sources and sbom"))
6667
ExportCmd.setup_parser(subparser.add_parser("export", help="export SBOM as graph"))
68+
CompareCmd.setup_parser(subparser.add_parser("compare", help="compare SBOMs and list new components"))
6769

6870
return parser
6971

@@ -97,6 +99,8 @@ def main():
9799
ExportCmd.run(args)
98100
elif args.cmd == "merge":
99101
MergeCmd.run(args)
102+
elif args.cmd == "compare":
103+
CompareCmd.run(args)
100104
except DistroArchUnknownError as e:
101105
logger.error(f"debsbom: error: {e}. Set --distro-arch to dpkg architecture (e.g. amd64)")
102106
sys.exit(-2)

src/debsbom/commands/compare.py

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
# Copyright (C) 2025 Siemens
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
import json
6+
import os
7+
import uuid
8+
from .input import SbomInput
9+
from datetime import datetime
10+
11+
import logging
12+
13+
14+
logger = logging.getLogger(__name__)
15+
16+
17+
class CompareCmd(SbomInput):
18+
"""
19+
Compare two SBOMs and generate a new SBOM containing only the additional components found in the target
20+
"""
21+
22+
@classmethod
23+
def run(cls, args):
24+
with open(args.base_sbom) as f:
25+
base_sbom_data = json.load(f)
26+
with open(args.target_sbom) as f:
27+
target_sbom_data = json.load(f)
28+
29+
base_sbom_fmt = cls.detect_sbom_format(base_sbom_data)
30+
target_sbom_fmt = cls.detect_sbom_format(target_sbom_data)
31+
32+
if not base_sbom_fmt or not target_sbom_fmt:
33+
raise ValueError("can not detect SBOM format for one or both files")
34+
35+
if base_sbom_fmt != target_sbom_fmt:
36+
raise ValueError("can not compare mixed SPDX and CycloneDX documents")
37+
38+
if target_sbom_fmt == "spdx":
39+
base_sbom_comp, base_sbom = cls.load_spdx_sbom(args.base_sbom)
40+
target_sbom_comp, target_sbom = cls.load_spdx_sbom(args.target_sbom)
41+
extra_pkgs = cls.compare_items(base_sbom_comp, target_sbom_comp, "pkg")
42+
43+
ref_creation_info = base_sbom.get("creationInfo", {
44+
"creators": ["Tool: sbom-diff-generator 1.0"],
45+
"created": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
46+
})
47+
result = cls.build_extra_spdx(extra_pkgs, ref_creation_info)
48+
49+
50+
elif target_sbom_fmt == "cdx":
51+
base_sbom_comp, base_sbom = cls.load_cdx_sbom(args.base_sbom)
52+
target_sbom_comp, target_sbom = cls.load_cdx_sbom(args.target_sbom)
53+
54+
extra_components = cls.compare_items(base_sbom_comp, target_sbom_comp, "component")
55+
result = cls.build_extra_cdx(extra_components, target_sbom)
56+
57+
58+
else:
59+
raise ValueError(f"Unsupported SBOM format: {new_fmt}")
60+
61+
out_dir = os.path.dirname(args.out_file)
62+
if out_dir:
63+
os.makedirs(out_dir, exist_ok=True)
64+
65+
with open(args.out_file, 'w', encoding='utf-8') as f:
66+
json.dump(result, f, indent=4)
67+
68+
69+
@classmethod
70+
def detect_sbom_format(cls, data):
71+
"""
72+
Detect SBOM format based on known top-level keys.
73+
Returns 'spdx' or 'cdx' or None.
74+
"""
75+
if "spdxVersion" in data:
76+
return "spdx"
77+
if data.get("bomFormat", "").lower() == "cyclonedx":
78+
return "cdx"
79+
return None
80+
81+
82+
@classmethod
83+
def load_spdx_sbom(cls, path):
84+
"""Return packages keyed by purl or fallback name@version."""
85+
with open(path) as f:
86+
data = json.load(f)
87+
88+
packages = {}
89+
for pkg in data.get("packages", []):
90+
purl = next(
91+
(ref["referenceLocator"]
92+
for ref in pkg.get("externalRefs", [])
93+
if ref.get("referenceType") == "purl"),
94+
None
95+
)
96+
if not purl:
97+
version = pkg.get("versionInfo", "")
98+
purl = f"{pkg.get('name')}@{version}"
99+
100+
sha256 = next(
101+
(c["checksumValue"]
102+
for c in pkg.get("checksums", [])
103+
if c.get("algorithm", "").upper() == "SHA256"),
104+
None
105+
)
106+
107+
packages[purl] = {"pkg": pkg, "sha256": sha256}
108+
109+
return packages, data
110+
111+
112+
@classmethod
113+
def build_extra_spdx(cls, extra_pkgs, ref_creation_info):
114+
"""Build minimal SPDX 2.3 JSON document."""
115+
return {
116+
"spdxVersion": "SPDX-2.3",
117+
"SPDXID": "SPDXRef-DOCUMENT",
118+
"name": "Extra Components SBOM",
119+
"dataLicense": "CC0-1.0",
120+
"documentNamespace": f"https://example.org/spdx/extra-{uuid.uuid4()}",
121+
"creationInfo": ref_creation_info,
122+
"packages": extra_pkgs
123+
}
124+
125+
126+
@classmethod
127+
def load_cdx_sbom(cls, path):
128+
"""Return components keyed by purl or fallback name@version."""
129+
with open(path) as f:
130+
data = json.load(f)
131+
132+
components = {}
133+
for comp in data.get("components", []):
134+
purl = comp.get("purl") or f"{comp.get('name')}@{comp.get('version', '')}"
135+
sha256 = next(
136+
(h["content"]
137+
for h in comp.get("hashes", [])
138+
if h.get("alg", "").upper() == "SHA-256"),
139+
None
140+
)
141+
components[purl] = {"component": comp, "sha256": sha256}
142+
143+
return components, data
144+
145+
146+
@classmethod
147+
def build_extra_cdx(cls, extra_components, new_metadata=None):
148+
"""Build minimal CycloneDX 1.5 JSON SBOM."""
149+
sbom = {
150+
"bomFormat": "CycloneDX",
151+
"specVersion": "1.5",
152+
"version": 1,
153+
"serialNumber": f"urn:uuid:{uuid.uuid4()}",
154+
"components": extra_components
155+
}
156+
if new_metadata and "metadata" in new_metadata:
157+
sbom["metadata"] = new_metadata["metadata"]
158+
else:
159+
sbom["metadata"] = {
160+
"timestamp": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
161+
"tools": [{"name": "sbom-diff-generator", "version": "1.0"}]
162+
}
163+
return sbom
164+
165+
166+
@classmethod
167+
def compare_items(cls, base_sbom_comp, target_sbom_comp, key_name):
168+
"""Generic comparison for SPDX or CDX items keyed by purl."""
169+
extra = []
170+
for key, new_info in target_sbom_comp.items():
171+
base_info = base_sbom_comp.get(key)
172+
new_sha = (new_info["sha256"] or "").lower().strip()
173+
ref_sha = ((base_info or {}).get("sha256") or "").lower().strip()
174+
175+
if base_info is None or (ref_sha and new_sha and ref_sha != new_sha):
176+
extra.append(new_info[key_name])
177+
return extra
178+
179+
180+
@classmethod
181+
def setup_parser(cls, parser):
182+
cls.parser_add_sbom_input_args(parser)
183+
parser.add_argument(
184+
"-b",
185+
"--base-sbom",
186+
required=True,
187+
help="Path to the base (reference) SBOM file"
188+
)
189+
190+
parser.add_argument(
191+
"-n",
192+
"--target-sbom",
193+
required=True,
194+
help="Path to the target (new) SBOM file"
195+
)
196+
197+
parser.add_argument(
198+
"-o",
199+
"--out-file",
200+
default="uncleared_components.json",
201+
help="Path to the output JSON file (default: uncleared_components.json)"
202+
)

0 commit comments

Comments
 (0)