Skip to content

Commit

Permalink
Implement provider for Chainguard Linux (#132)
Browse files Browse the repository at this point in the history
* Implement provider for Chainguard Linux

Signed-off-by: Dan Luhring <[email protected]>

* Fix tests

Signed-off-by: Dan Luhring <[email protected]>

* wip: configs.yaml

Signed-off-by: Dan Luhring <[email protected]>

* Fix failing test

Signed-off-by: Dan Luhring <[email protected]>

* Use main branch of Grype for quality gate

Signed-off-by: Dan Luhring <[email protected]>

* Use latest grype and grype-db

Signed-off-by: Dan Luhring <[email protected]>

---------

Signed-off-by: Dan Luhring <[email protected]>
  • Loading branch information
luhring authored Mar 28, 2023
1 parent dba31ab commit 948d8d1
Show file tree
Hide file tree
Showing 16 changed files with 1,060 additions and 154 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ dev: ## Get a development shell with locally editable grype, grype-db, and vunn
@DEV_VUNNEL_BIN_DIR=$(ABS_BIN_DIR) .github/scripts/dev-shell.sh $(provider) $(providers)

.PHONY: build-grype
build-grype: $(TEMP_DIR) ## Build grype for local development
build-grype: $(BIN_DIR) ## Build grype for local development
@cd $(GRYPE_PATH) && go build -o $(ABS_BIN_DIR)/grype .

.PHONY: build-grype-db
build-grype-db: $(TEMP_DIR) ## Build grype-db for local development
build-grype-db: $(BIN_DIR) ## Build grype-db for local development
@cd $(GRYPE_DB_PATH) && go build -o $(ABS_BIN_DIR)/grype-db ./cmd/grype-db

.PHONY: update-db
Expand Down
1 change: 1 addition & 0 deletions src/vunnel/cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class Providers:
alpine: providers.alpine.Config = field(default_factory=providers.alpine.Config)
amazon: providers.amazon.Config = field(default_factory=providers.amazon.Config)
centos: providers.centos.Config = field(default_factory=providers.centos.Config)
chainguard: providers.chainguard.Config = field(default_factory=providers.chainguard.Config)
debian: providers.debian.Config = field(default_factory=providers.debian.Config)
github: providers.github.Config = field(default_factory=providers.github.Config)
nvd: providers.nvd.Config = field(default_factory=providers.nvd.Config)
Expand Down
2 changes: 2 additions & 0 deletions src/vunnel/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
alpine,
amazon,
centos,
chainguard,
debian,
github,
nvd,
Expand Down Expand Up @@ -38,6 +39,7 @@
sles.Provider.name(): sles.Provider,
ubuntu.Provider.name(): ubuntu.Provider,
wolfi.Provider.name(): wolfi.Provider,
chainguard.Provider.name(): chainguard.Provider,
}


Expand Down
64 changes: 64 additions & 0 deletions src/vunnel/providers/chainguard/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from __future__ import annotations

import os
from dataclasses import dataclass, field
from typing import TYPE_CHECKING

from vunnel import provider, result, schema
from vunnel.providers.wolfi.parser import Parser

if TYPE_CHECKING:
import datetime


@dataclass
class Config:
runtime: provider.RuntimeConfig = field(
default_factory=lambda: provider.RuntimeConfig(
result_store=result.StoreStrategy.SQLITE,
existing_results=provider.ResultStatePolicy.DELETE_BEFORE_WRITE,
),
)
request_timeout: int = 125


class Provider(provider.Provider):
_url = "https://packages.cgr.dev/chainguard/security.json"
_namespace = "chainguard"

def __init__(self, root: str, config: Config | None = None):
if not config:
config = Config()
super().__init__(root, runtime_cfg=config.runtime)
self.config = config

self.logger.debug(f"config: {config}")

self.schema = schema.OSSchema()
self.parser = Parser(
workspace=self.workspace,
url=self._url,
namespace=self._namespace,
download_timeout=self.config.request_timeout,
logger=self.logger,
)

# this provider requires the previous state from former runs
provider.disallow_existing_input_policy(config.runtime)

@classmethod
def name(cls) -> str:
return "chainguard"

def update(self, last_updated: datetime.datetime | None) -> tuple[list[str], int]:
with self.results_writer() as writer:
# TODO: tech debt: on subsequent runs, we should only write new vulns (this currently re-writes all)
for release, vuln_dict in self.parser.get():
for vuln_id, record in vuln_dict.items():
writer.write(
identifier=os.path.join(f"{self._namespace.lower()}:{release.lower()}", vuln_id),
schema=self.schema,
payload=record,
)

return [self._url], len(writer)
11 changes: 8 additions & 3 deletions src/vunnel/providers/wolfi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from vunnel import provider, result, schema

from .parser import Parser, namespace
from .parser import Parser

if TYPE_CHECKING:
import datetime
Expand All @@ -24,6 +24,9 @@ class Config:


class Provider(provider.Provider):
_url = "https://packages.wolfi.dev/os/security.json"
_namespace = "wolfi"

def __init__(self, root: str, config: Config | None = None):
if not config:
config = Config()
Expand All @@ -35,6 +38,8 @@ def __init__(self, root: str, config: Config | None = None):
self.schema = schema.OSSchema()
self.parser = Parser(
workspace=self.workspace,
url=self._url,
namespace=self._namespace,
download_timeout=self.config.request_timeout,
logger=self.logger,
)
Expand All @@ -52,9 +57,9 @@ def update(self, last_updated: datetime.datetime | None) -> tuple[list[str], int
for release, vuln_dict in self.parser.get():
for vuln_id, record in vuln_dict.items():
writer.write(
identifier=os.path.join(f"{namespace.lower()}:{release.lower()}", vuln_id),
identifier=os.path.join(f"{self._namespace.lower()}:{release.lower()}", vuln_id),
schema=self.schema,
payload=record,
)

return self.parser.urls, len(writer)
return [self._url], len(writer)
165 changes: 77 additions & 88 deletions src/vunnel/providers/wolfi/parser.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,45 @@
from __future__ import annotations

import copy
import glob
import json
import logging
import os
import re
from urllib.parse import urlparse

import requests

from vunnel import utils
from vunnel.utils import vulnerability

namespace = "wolfi"


class Parser:
_url_ = "https://packages.wolfi.dev"
_release_ = "rolling"
_secdb_dir_ = "secdb"
_db_types = ["os"]

def __init__(self, workspace, download_timeout=125, url=None, logger=None):
def __init__( # noqa: PLR0913
self,
workspace,
url: str,
namespace: str,
download_timeout: int = 125,
logger: logging.Logger = None, # noqa: PLR0913
):
self.download_timeout = download_timeout
self.secdb_dir_path = os.path.join(workspace.input_path, self._secdb_dir_)
self.metadata_url = url.strip("/") if url else Parser._url_
self.urls = []
self.url = url
self.namespace = namespace
self._db_filename = self._extract_filename_from_url(url)

if not logger:
logger = logging.getLogger(self.__class__.__name__)
self.logger = logger

@staticmethod
def _extract_filename_from_url(url):
return os.path.basename(urlparse(url).path)

@utils.retry_with_backoff()
def _download(self):
"""
Expand All @@ -39,55 +49,38 @@ def _download(self):
if not os.path.exists(self.secdb_dir_path):
os.makedirs(self.secdb_dir_path, exist_ok=True)

for t in self._db_types:
try:
rel_dir = os.path.join(self.secdb_dir_path, t)
os.makedirs(rel_dir, exist_ok=True)

filename = "security.json"
download_url = f"{self.metadata_url}/{t}/{filename}"

self.urls.append(download_url)

self.logger.info(f"downloading Wolfi secdb {download_url}")
r = requests.get(download_url, stream=True, timeout=self.download_timeout)
if r.status_code == 200:
file_path = os.path.join(rel_dir, filename)
with open(file_path, "wb") as fp:
for chunk in r.iter_content():
fp.write(chunk)
else:
r.raise_for_status()
except: # noqa
self.logger.exception(f"ignoring error processing secdb for {t}")
try:
self.logger.info(f"downloading {self.namespace} secdb {self.url}")
r = requests.get(self.url, stream=True, timeout=self.download_timeout)
if r.status_code == 200:
file_path = os.path.join(self.secdb_dir_path, self._db_filename)
with open(file_path, "wb") as fp:
for chunk in r.iter_content():
fp.write(chunk)
else:
r.raise_for_status()
except: # noqa
self.logger.exception(f"ignoring error processing secdb for {self.url}")

def _load(self):
"""
Loads all db json an yield it
Loads all db json and yields it
:return:
"""
dbtype_data_dict = {}

# parse and transform the json
try:
if os.path.exists(self.secdb_dir_path):
for s in glob.glob(f"{self.secdb_dir_path}/**/security.json", recursive=True):
dbtype = s.split("/")[-2]
with open(f"{self.secdb_dir_path}/{self._db_filename}") as fh:
dbtype_data_dict = json.load(fh)

if os.path.exists(s):
self.logger.debug(f"loading secdb data from: {s}")
with open(s, encoding="utf-8") as fh:
dbtype_data_dict[dbtype] = json.load(fh)

yield "rolling", dbtype_data_dict
else:
raise Exception("Cannot find Wolfi sec db source ")
yield self._release_, dbtype_data_dict
except Exception:
self.logger.exception("failed to load Wolfi sec db data")
self.logger.exception(f"failed to load {self.namespace} sec db data")
raise

# noqa
def _normalize(self, release, dbtype_data_dict):
def _normalize(self, release, data):
"""
Normalize all the sec db entries into vulnerability payload records
:param release:
Expand All @@ -97,52 +90,48 @@ def _normalize(self, release, dbtype_data_dict):

vuln_dict = {}

for dbtype, data in dbtype_data_dict.items():
self.logger.debug(f"normalizing {release}:{dbtype}")

if not data["packages"]:
continue

for el in data["packages"]:
pkg_el = el["pkg"]

pkg = pkg_el["name"]
for pkg_version in pkg_el["secfixes"]:
vids = []
if pkg_el["secfixes"][pkg_version]:
for rawvid in pkg_el["secfixes"][pkg_version]:
tmp = rawvid.split()
for newvid in tmp:
if newvid not in vids:
vids.append(newvid)

for vid in vids:
if not re.match("^CVE-.*", vid):
# skip non-CVE records
continue

if vid not in vuln_dict:
# create a new record
vuln_dict[vid] = copy.deepcopy(vulnerability.vulnerability_element)
vuln_record = vuln_dict[vid]

# populate the static information about the new vuln record
vuln_record["Vulnerability"]["Name"] = str(vid)
vuln_record["Vulnerability"]["NamespaceName"] = namespace + ":" + str(release)
vuln_record["Vulnerability"]["Link"] = "http://cve.mitre.org/cgi-bin/cvename.cgi?name=" + str(vid)
vuln_record["Vulnerability"]["Severity"] = "Unknown"
else:
vuln_record = vuln_dict[vid]

# SET UP fixedins
fixed_el = {
"Name": pkg,
"Version": pkg_version,
"VersionFormat": "apk",
"NamespaceName": namespace + ":" + str(release),
}

vuln_record["Vulnerability"]["FixedIn"].append(fixed_el)
self.logger.debug("normalizing vulnerability data")

for el in data["packages"]:
pkg_el = el["pkg"]

pkg = pkg_el["name"]
for pkg_version in pkg_el["secfixes"]:
vids = []
if pkg_el["secfixes"][pkg_version]:
for rawvid in pkg_el["secfixes"][pkg_version]:
tmp = rawvid.split()
for newvid in tmp:
if newvid not in vids:
vids.append(newvid)

for vid in vids:
if not re.match("^CVE-.*", vid):
# skip non-CVE records
continue

if vid not in vuln_dict:
# create a new record
vuln_dict[vid] = copy.deepcopy(vulnerability.vulnerability_element)
vuln_record = vuln_dict[vid]

# populate the static information about the new vuln record
vuln_record["Vulnerability"]["Name"] = str(vid)
vuln_record["Vulnerability"]["NamespaceName"] = self.namespace + ":" + str(release)
vuln_record["Vulnerability"]["Link"] = "http://cve.mitre.org/cgi-bin/cvename.cgi?name=" + str(vid)
vuln_record["Vulnerability"]["Severity"] = "Unknown"
else:
vuln_record = vuln_dict[vid]

# SET UP fixedins
fixed_el = {
"Name": pkg,
"Version": pkg_version,
"VersionFormat": "apk",
"NamespaceName": self.namespace + ":" + str(release),
}

vuln_record["Vulnerability"]["FixedIn"].append(fixed_el)

return vuln_dict

Expand Down
14 changes: 12 additions & 2 deletions tests/quality/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ yardstick:
# - "latest" to use the latest released grype
# - a released version name (e.g. "v0.52.1")
# - a branch name (e.g. "dev-fix-foo")
# - a repo reference and optional "@branch" (e.g. "my-user-fork/grype@dev-fix-foo")
# - a repo reference and optional "@branch" (e.g. "github.com/my-user-fork/grype@dev-fix-foo")
# Note:
# - ALWAYS leave the "import-db" annotation as-is
# - this version should ALWAYS match that of the other "grype" tool below
Expand All @@ -26,7 +26,7 @@ yardstick:
# - "latest" to use the latest released grype
# - a released version name (e.g. "v0.52.1")
# - a branch name (e.g. "dev-fix-foo")
# - a repo reference and optional "@branch" (e.g. "my-user-fork/grype@dev-fix-foo")
# - a repo reference and optional "@branch" (e.g. "github.com/my-user-fork/grype@dev-fix-foo")
# Note:
# - this version should ALWAYS match that of the other "grype" tool above
version: latest
Expand Down Expand Up @@ -64,6 +64,16 @@ tests:
# images:
# - docker.io/centos:6@sha256:3688aa867eb84332460e172b9250c9c198fdfd8d987605fd53f246f498c60bcf

- provider: chainguard
additional_providers:
- name: nvd
use_cache: true
additional-trigger-globs:
# this provider imports and uses the wolfi provider code
- src/vunnel/providers/wolfi/**
images:
- ghcr.io/chainguard-images/scanner-test:latest@sha256:59bddc101fba0c45d5c093575c6bc5bfee7f0e46ff127e6bb4e5acaaafb525f9

- provider: debian
# ideally we would not use cache, however, the in order to test if we are properly keeping the processing
# of legacy information that is in the debian data cache (for debian 7, 8, and 9) we must test with
Expand Down
Loading

0 comments on commit 948d8d1

Please sign in to comment.