Skip to content

Commit

Permalink
Add debian legacy cache processing (#117)
Browse files Browse the repository at this point in the history
* allow for empty cvss vectors for legacy data

Signed-off-by: Alex Goodman <[email protected]>

* add support for processing legacy debian records

Signed-off-by: Alex Goodman <[email protected]>

* add unit tests for legacy debian data

Signed-off-by: Alex Goodman <[email protected]>

* add debian cache to quality gate to pick up on legacy results

Signed-off-by: Alex Goodman <[email protected]>

* fix linting

Signed-off-by: Alex Goodman <[email protected]>

---------

Signed-off-by: Alex Goodman <[email protected]>
  • Loading branch information
wagoodman authored Mar 14, 2023
1 parent 2385b06 commit a0fb062
Show file tree
Hide file tree
Showing 7 changed files with 383 additions and 12 deletions.
5 changes: 2 additions & 3 deletions schema/vulnerability/os/schema-1.0.0.json
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,14 @@
"type": "object",
"properties": {
"Score": {
"type": "integer"
"type": "number"
},
"Vectors": {
"type": "string"
}
},
"required": [
"Score",
"Vectors"
"Score"
]
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/vunnel/providers/debian/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def __init__(self, root: str, config: Config | None = None):
)

# this provider requires the previous state from former runs
# note: we MUST keep the input directory, since it may have out-of-band updates to support
# legacy vulns that are not in the Debian security tracker anymore.
provider.disallow_existing_input_policy(config.runtime)

@classmethod
Expand Down
40 changes: 40 additions & 0 deletions src/vunnel/providers/debian/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
import re
from collections import namedtuple
from typing import Any

import requests

Expand Down Expand Up @@ -51,6 +52,7 @@ def __init__(self, workspace, download_timeout=125, logger=None, distro_map=None
self.debian_distro_map = distro_map
self.json_file_path = os.path.join(workspace.input_path, self._json_file_)
self.dsa_file_path = os.path.join(workspace.input_path, self._dsa_file_)
self.legacy_records_path = os.path.join(self.workspace.input_path, "legacy")
self.urls = [self._json_url_, self._dsa_url_]

if not logger:
Expand Down Expand Up @@ -453,6 +455,40 @@ def _normalize_json(self, ns_cve_dsalist=None): # noqa: PLR0912,PLR0915

return vuln_records

def _get_legacy_records(self):
legacy_records = {}

def process_file(contents: list[dict[str, Any]]) -> None:
for record in contents:
relno = record["Vulnerability"]["NamespaceName"].split(":")[-1]
vid = record["Vulnerability"]["Name"]
if relno not in legacy_records:
legacy_records[relno] = {}

# ensure results are compliant with the current schema
cvss_metadata = record["Vulnerability"].get("Metadata", {}).get("NVD", {}).get("CVSSv2", {})
if cvss_metadata:
if cvss_metadata["Vectors"] is None:
del cvss_metadata["Vectors"]
record["Vulnerability"]["Metadata"]["NVD"]["CVSSv2"] = cvss_metadata

# write the record back
legacy_records[relno][vid] = record

# read every json file in the legacy directory
for root, _dirs, files in os.walk(self.legacy_records_path):
for file in files:
if file.endswith(".json") and file.startswith("vulnerabilities"):
with open(os.path.join(root, file)) as f:
process_file(json.load(f))

if legacy_records:
self.logger.info(f"found existing legacy data for the following releases: {list(legacy_records.keys())}")
else:
self.logger.info("no existing legacy data found")

return legacy_records

def get(self):
# download the files
self._download_json()
Expand All @@ -464,6 +500,10 @@ def get(self):
# normalize json file
vuln_records = self._normalize_json(ns_cve_dsalist=ns_cve_dsalist)

# fetch records from legacy (if they exist)
legacy_records = self._get_legacy_records()
vuln_records.update(legacy_records)

if vuln_records:
for relno, vuln_dict in vuln_records.items():
for vid, vuln_record in vuln_dict.items():
Expand Down
4 changes: 4 additions & 0 deletions tests/quality/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ tests:
# - docker.io/centos:6@sha256:3688aa867eb84332460e172b9250c9c198fdfd8d987605fd53f246f498c60bcf

- provider: debian
# ideally we would not use cache, however, the in order to test if we are properly keeping the processing
# of legacy information that is in the debian data cache (for debian 7, 8, and 9) we must test with
# cache enabled.
use_cache: true
images:
- docker.io/debian:7@sha256:81e88820a7759038ffa61cff59dfcc12d3772c3a2e75b7cfe963c952da2ad264

Expand Down
22 changes: 14 additions & 8 deletions tests/quality/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def provider_data_source(self, providers: list[str]) -> tuple[list[str], list[st
uncached_providers = []

tests = []
providers_under_test_that_require_cache = set()
for provider in providers:
test = self.test_configuration_by_provider(provider)
if test is None:
Expand All @@ -129,10 +130,13 @@ def provider_data_source(self, providers: list[str]) -> tuple[list[str], list[st

tests.append(test)

# note: we always include the subject in the uncached providers, but also add it to the cached providers.
# the subject must always be run even when cache is involved.
uncached_providers.append(test.provider)
if test.use_cache:
providers_under_test_that_require_cache.add(test.provider)
cached_providers.append(test.provider)
else:
uncached_providers.append(test.provider)

if test.additional_providers:
for additional_provider in test.additional_providers:
if additional_provider.use_cache:
Expand All @@ -141,7 +145,7 @@ def provider_data_source(self, providers: list[str]) -> tuple[list[str], list[st
uncached_providers.append(additional_provider.name)

for provider in uncached_providers:
if provider in cached_providers:
if provider in cached_providers and provider not in providers_under_test_that_require_cache:
cached_providers.remove(provider)

return cached_providers, uncached_providers, self.yardstick_application_config(tests)
Expand Down Expand Up @@ -407,6 +411,8 @@ def configure(cfg: Config, provider_names: list[str]):

cached_providers, uncached_providers, yardstick_app_cfg = cfg.provider_data_source(provider_names)

logging.info(f"providers uncached={uncached_providers!r} cached={cached_providers!r}")

if not cached_providers and not uncached_providers:
logging.error(f"no test configuration found for provider {provider_names!r}")
return [], []
Expand Down Expand Up @@ -493,18 +499,18 @@ def build_db(cfg: Config):
shutil.rmtree(data_dir, ignore_errors=True)
shutil.rmtree(build_dir, ignore_errors=True)

# run providers
for provider in state.uncached_providers:
logging.info(f"running provider {provider!r}")
subprocess.run(["vunnel", "-v", "run", provider], check=True)

# fetch cache for other providers
for provider in state.cached_providers:
logging.info(f"fetching cache for {provider!r}")
subprocess.run(["oras", "pull", f"ghcr.io/anchore/grype-db/data/{provider}:latest"], check=True)
subprocess.run([GRYPE_DB, "cache", "restore", "--path", cache_file], check=True)
os.remove(cache_file)

# run providers
for provider in state.uncached_providers:
logging.info(f"running provider {provider!r}")
subprocess.run(["vunnel", "-v", "run", provider], check=True)

logging.info("building DB")
subprocess.run([GRYPE_DB, "build", "-v"], check=True)
subprocess.run([GRYPE_DB, "package", "-v"], check=True)
Expand Down
Loading

0 comments on commit a0fb062

Please sign in to comment.