Skip to content

Commit

Permalink
allow for versioning providers (#131)
Browse files Browse the repository at this point in the history
  • Loading branch information
wagoodman authored Mar 24, 2023
1 parent 628a2c8 commit 7cca87f
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 32 deletions.
5 changes: 5 additions & 0 deletions DEVELOPING.md
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,11 @@ Possible vulnerability schemas supported within the vunnel repo are:
- [GitHub Security Advisories](https://github.com/anchore/vunnel/tree/main/schema/vulnerability/github-security-advisory)
- [NVD Vulnerability](https://github.com/anchore/vunnel/tree/main/schema/vulnerability/nvd)

If at any point a breaking change needs to be made to a provider (and say the schema remains the same), then you
can set the `__version__` attribute on the provider class to a new integer value (incrementing from `1` onwards). This
is a way to indicate that the cached input/results are not compatible with the output of the current version of the
provider, in which case the next invocation of the provider will delete the previous input and results before running.


### Provider configurations

Expand Down
71 changes: 71 additions & 0 deletions schema/provider-workspace-state/schema-1.0.1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"title": "provider-workspace-state",
"description": "describes the filesystem state of a provider workspace directory",
"properties": {
"provider": {
"type": "string"
},
"urls": {
"type": "array",
"items": [
{
"type": "string"
}
]
},
"store": {
"type": "string"
},
"timestamp": {
"type": "string"
},
"listing": {
"type": "object",
"properties": {
"digest": {
"type": "string"
},
"path": {
"type": "string"
},
"algorithm": {
"type": "string"
}
},
"required": [
"digest",
"path",
"algorithm"
]
},
"version": {
"type": "integer"
},
"schema": {
"type": "object",
"properties": {
"version": {
"type": "string"
},
"url": {
"type": "string"
}
},
"required": [
"version",
"url"
]
}
},
"required": [
"provider",
"urls",
"store",
"timestamp",
"listing",
"version",
"schema"
]
}
42 changes: 33 additions & 9 deletions src/vunnel/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,21 @@ def disallow_existing_input_policy(cfg: RuntimeConfig) -> None:


class Provider(abc.ABC):
# a breaking change to the semantics or values that the provider writes out should incur a version bump here.
# this is used to determine if the provider can be run on an existing workspace or if it must be cleared first
# (regardless of the existing_input and existing_result policy is).
__version__: int = 1

def __init__(self, root: str, runtime_cfg: RuntimeConfig = RuntimeConfig()): # noqa: B008
self.logger = logging.getLogger(self.name())
self.workspace = workspace.Workspace(root, self.name(), logger=self.logger, create=False)
self.urls: list[str] = []
self.runtime_cfg = runtime_cfg

@classmethod
def version(cls) -> int:
return cls.__version__

@classmethod
@abc.abstractmethod
def name(cls) -> str:
Expand All @@ -110,30 +119,45 @@ def update(self, last_updated: datetime.datetime | None) -> tuple[list[str], int
"""Populates the input directory from external sources, processes the data, places results into the output directory."""
raise NotImplementedError("'update()' must be implemented")

def read_state(self) -> workspace.State | None:
try:
return workspace.State.read(root=self.workspace.path)
except FileNotFoundError:
return None

def _update(self) -> None:
start = datetime.datetime.now(tz=datetime.timezone.utc)

last_updated = None
try:
current_state = workspace.State.read(root=self.workspace.path)
current_state = self.read_state()
if current_state:
last_updated = current_state.timestamp
except FileNotFoundError:
last_updated = None

urls, count = self.update(last_updated=last_updated)
if count > 0:
self.workspace.record_state(timestamp=start, urls=urls, store=self.runtime_cfg.result_store.value)
self.workspace.record_state(
version=self.version(),
timestamp=start,
urls=urls,
store=self.runtime_cfg.result_store.value,
)
else:
self.logger.debug("skipping recording of workspace state (no new results found)")

def run(self) -> None:
self.logger.debug(f"using {self.workspace.path!r} as workspace")

if self.runtime_cfg.existing_results == ResultStatePolicy.DELETE:
self.workspace.clear_results()
current_state = self.read_state()
if current_state and current_state.version != self.version():
self.logger.warning(f"provider version has changed from {current_state.version} to {self.version()}")
self.logger.warning("clearing workspace to ensure consistency of existing input and results")
self.workspace.clear()
else:
if self.runtime_cfg.existing_results == ResultStatePolicy.DELETE:
self.workspace.clear_results()

if self.runtime_cfg.existing_input == InputStatePolicy.DELETE:
self.workspace.clear_input()
if self.runtime_cfg.existing_input == InputStatePolicy.DELETE:
self.workspace.clear_input()

self.workspace.create()
try:
Expand Down
2 changes: 1 addition & 1 deletion src/vunnel/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from dataclasses import dataclass

PROVIDER_WORKSPACE_STATE_SCHEMA_VERSION = "1.0.0"
PROVIDER_WORKSPACE_STATE_SCHEMA_VERSION = "1.0.1"
MATCH_EXCLUSION_SCHEMA_VERSION = "1.0.0"
GITHUB_SECURITY_ADVISORY_SCHEMA_VERSION = "1.0.0"
MSRC_SCHEMA_VERSION = "1.0.0"
Expand Down
8 changes: 6 additions & 2 deletions src/vunnel/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class State:
urls: list[str]
store: str
timestamp: datetime.datetime
version: int = 1
listing: File | None = None
schema: schemaDef.Schema = field(default_factory=schemaDef.ProviderStateSchema)

Expand Down Expand Up @@ -135,6 +136,9 @@ def create(self) -> None:
def clear(self) -> None:
self.clear_input()
self.clear_results()
self._clear_metadata()

def _clear_metadata(self) -> None:
utils.silent_remove(os.path.join(self.path, METADATA_FILENAME))
utils.silent_remove(os.path.join(self.path, CHECKSUM_LISTING_FILENAME))

Expand Down Expand Up @@ -162,7 +166,7 @@ def clear_input(self) -> None:
shutil.rmtree(self.input_path)
os.makedirs(self.input_path, exist_ok=True)

def record_state(self, timestamp: datetime.datetime, urls: list[str], store: str) -> None:
def record_state(self, version: int, timestamp: datetime.datetime, urls: list[str], store: str) -> None:
try:
current_state = State.read(root=self.path)
except FileNotFoundError:
Expand All @@ -176,7 +180,7 @@ def record_state(self, timestamp: datetime.datetime, urls: list[str], store: str

self.logger.info("recording workspace state")

state = State(provider=self.name, urls=urls, store=store, timestamp=timestamp)
state = State(provider=self.name, version=version, urls=urls, store=store, timestamp=timestamp)
metadata_path = state.write(self.path, self.results_path)

self.logger.debug(f"wrote workspace state to {metadata_path}")
Expand Down
48 changes: 43 additions & 5 deletions tests/unit/test_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import json
import os
from unittest.mock import MagicMock
from unittest.mock import MagicMock, patch

import pytest
from vunnel import provider, result, schema, workspace
Expand Down Expand Up @@ -93,6 +93,47 @@ def test_clear_existing_state(dummy_provider):
assert subject.workspace.clear_results.call_count == 1


def test_clear_existing_state_from_mismatched_versions(dummy_provider):
policy = provider.RuntimeConfig(
existing_input=provider.InputStatePolicy.KEEP,
existing_results=provider.ResultStatePolicy.KEEP,
)

subject = dummy_provider(populate=True, runtime_cfg=policy)

# track calls without affecting behavior (get mock tracking abilities without mocking)
subject.workspace.clear_input = MagicMock(side_effect=subject.workspace.clear_input)
subject.workspace.clear_results = MagicMock(side_effect=subject.workspace.clear_results)
subject.workspace._clear_metadata = MagicMock(side_effect=subject.workspace._clear_metadata)
subject.version = MagicMock(return_value=2)

subject.run()

assert subject.workspace.clear_input.call_count == 1
assert subject.workspace.clear_results.call_count == 1
assert subject.workspace._clear_metadata.call_count == 1


def test_keep_existing_state_from_matching_versions(dummy_provider):
policy = provider.RuntimeConfig(
existing_input=provider.InputStatePolicy.KEEP,
existing_results=provider.ResultStatePolicy.KEEP,
)

subject = dummy_provider(populate=True, runtime_cfg=policy)

# track calls without affecting behavior (get mock tracking abilities without mocking)
subject.workspace.clear_input = MagicMock(side_effect=subject.workspace.clear_input)
subject.workspace.clear_results = MagicMock(side_effect=subject.workspace.clear_results)
subject.workspace._clear_metadata = MagicMock(side_effect=subject.workspace._clear_metadata)

subject.run()

assert subject.workspace.clear_input.call_count == 0
assert subject.workspace.clear_results.call_count == 0
assert subject.workspace._clear_metadata.call_count == 0


def test_keep_existing_state(dummy_provider, dummy_file):
policy = provider.RuntimeConfig(
existing_input=provider.InputStatePolicy.KEEP,
Expand Down Expand Up @@ -276,10 +317,7 @@ def assert_dummy_workspace_state(ws):
urls=["http://localhost:8000/dummy-input-1.json"],
listing=workspace.File(digest="1e119ae45b38b28f", algorithm="xxh64", path="checksums"),
timestamp=None,
schema=schema.Schema(
version="1.0.0",
url="https://raw.githubusercontent.com/anchore/vunnel/main/schema/provider-workspace-state/schema-1.0.0.json",
),
schema=schema.ProviderStateSchema(),
)

assert current_state == expected_state
4 changes: 2 additions & 2 deletions tests/unit/test_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_result_writer_flat_file(tmpdir):
payload={"Vulnerability": {"dummy": "result-2"}},
)

ws.record_state(timestamp=datetime.datetime.now(), urls=[], store=store_strategy)
ws.record_state(timestamp=datetime.datetime.now(), urls=[], store=store_strategy, version=1)

state = ws.state()

Expand Down Expand Up @@ -50,7 +50,7 @@ def test_result_writer_sqlite(tmpdir):
payload={"Vulnerability": {"dummy": "result-2"}},
)

ws.record_state(timestamp=datetime.datetime.now(), urls=[], store=store_strategy)
ws.record_state(timestamp=datetime.datetime.now(), urls=[], store=store_strategy, version=1)

state = ws.state()

Expand Down
20 changes: 7 additions & 13 deletions tests/unit/test_workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_clear_results(tmpdir, dummy_file):

urls = ["http://localhost:8000/dummy-input-1.json"]
store = result.StoreStrategy.FLAT_FILE
ws.record_state(urls=urls, store=store.value, timestamp=datetime.datetime(2021, 1, 1))
ws.record_state(urls=urls, store=store.value, timestamp=datetime.datetime(2021, 1, 1), version=1)

assert_directory(ws.input_path, exists=True, empty=False)
assert_directory(ws.results_path, exists=True, empty=False)
Expand All @@ -71,7 +71,7 @@ def test_record_state(tmpdir, dummy_file):

urls = ["http://localhost:8000/dummy-input-1.json"]
store = result.StoreStrategy.FLAT_FILE
ws.record_state(urls=urls, store=store.value, timestamp=datetime.datetime(2021, 1, 1))
ws.record_state(urls=urls, store=store.value, timestamp=datetime.datetime(2021, 1, 1), version=1)

current_state = workspace.State.read(root=ws.path)

Expand All @@ -85,10 +85,7 @@ def test_record_state(tmpdir, dummy_file):
urls=["http://localhost:8000/dummy-input-1.json"],
listing=workspace.File(digest="63b7adef165e430a", algorithm="xxh64", path="checksums"),
timestamp=None,
schema=schema.Schema(
version="1.0.0",
url="https://raw.githubusercontent.com/anchore/vunnel/main/schema/provider-workspace-state/schema-1.0.0.json",
),
schema=schema.ProviderStateSchema(),
)

assert current_state == expected_state
Expand All @@ -103,10 +100,10 @@ def test_record_state_urls_persisted_across_runs(tmpdir, dummy_file):

urls = ["http://localhost:8000/dummy-input-1.json"]
store = result.StoreStrategy.FLAT_FILE
ws.record_state(urls=urls, store=store.value, timestamp=datetime.datetime(2021, 1, 1))
ws.record_state(urls=urls, store=store.value, timestamp=datetime.datetime(2021, 1, 1), version=1)

# this call should not clear the URLs
ws.record_state(urls=None, store=store.value, timestamp=datetime.datetime(2021, 1, 1))
ws.record_state(urls=None, store=store.value, timestamp=datetime.datetime(2021, 1, 1), version=1)

current_state = workspace.State.read(root=ws.path)

Expand All @@ -120,10 +117,7 @@ def test_record_state_urls_persisted_across_runs(tmpdir, dummy_file):
urls=["http://localhost:8000/dummy-input-1.json"],
listing=workspace.File(digest="63b7adef165e430a", algorithm="xxh64", path="checksums"),
timestamp=None,
schema=schema.Schema(
version="1.0.0",
url="https://raw.githubusercontent.com/anchore/vunnel/main/schema/provider-workspace-state/schema-1.0.0.json",
),
schema=schema.ProviderStateSchema(),
)

assert current_state == expected_state
Expand All @@ -139,7 +133,7 @@ def test_state_schema(tmpdir, dummy_file, helpers):

urls = ["http://localhost:8000/dummy-input-1.json"]
store = result.StoreStrategy.FLAT_FILE
ws.record_state(urls=urls, store=store.value, timestamp=datetime.datetime(2021, 1, 1))
ws.record_state(urls=urls, store=store.value, timestamp=datetime.datetime(2021, 1, 1), version=1)

ws_helper = helpers.provider_workspace_helper(name=name, create=False)

Expand Down

0 comments on commit 7cca87f

Please sign in to comment.