-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathluts.py
110 lines (88 loc) · 3.85 KB
/
luts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from pathlib import Path
from rdflib import Graph, Namespace
from rdflib.compare import isomorphic
from rdflib.namespace import SDO
from src.dawe_nrm import api
from src.dawe_nrm.api.utils import (
fetch_and_check_uri,
fetch_collection_url_and_member_labels,
fetch_lut_with_metadata,
)
default_path = Path("vocab_files/categorical_collections/luts")
URNPROPERTY = Namespace("urn:property:")
if __name__ == "__main__":
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("-p", "--path", dest="path", help="write to path")
parser.add_argument(
"-v",
"--validate",
dest="validate",
action="store_true",
help="validate the remote data with the local data",
)
args = parser.parse_args()
cwd = Path.cwd()
path = cwd / args.path if args.path else default_path
path.mkdir(exist_ok=True)
validate = args.validate
api.categorical_values.write_all(path)
if validate:
if path == default_path:
raise ValueError(
"In order to validate local and remote LUTs, the path must be different to the default path."
)
print("Checking for changes...")
local_files = default_path.glob("**/*.ttl")
local_lut_graph = Graph()
local_lut_graph.bind("urnp", URNPROPERTY)
for file in local_files:
local_lut_graph.parse(file, format="turtle")
local_graph = Graph()
local_graph.bind("urnp", URNPROPERTY)
for endpoint in api.categorical_values.endpoints:
if fetch_and_check_uri(endpoint.endpoint_url):
print("Fetching LUT with metadata --- ", endpoint.endpoint_url)
local_graph = fetch_lut_with_metadata(
local_lut_graph, endpoint.collection_url, local_graph
)
else:
print("Fetching LUT with only labels --- ", endpoint.endpoint_url)
local_graph = fetch_collection_url_and_member_labels(
local_lut_graph, endpoint.collection_url, local_graph
)
remote_files = path.glob("**/*.ttl")
remote_graph = Graph()
for file in remote_files:
remote_graph.parse(file, format="turtle")
# Delete schema:url values since they would be different as they are
# are added during transformation of the LUT API data to local files.
# The reason they are different is because the value of schema:url
# is tied to the file path on disk.
local_graph.remove((None, SDO.url, None))
remote_graph.remove((None, SDO.url, None))
local_graph.serialize("local_graph.ttl", format="turtle")
remote_graph.serialize("remote_graph.ttl", format="turtle")
# Check for changes
IS_ISOMORPHIC = isomorphic(local_graph, remote_graph)
try:
if not IS_ISOMORPHIC:
local_graph_diff = local_graph - remote_graph
remote_graph_diff = remote_graph - local_graph
local_graph_diff.serialize("local_graph_diff.ttl", format="turtle")
remote_graph_diff.serialize("remote_graph_diff.ttl", format="turtle")
raise ValueError(
"🛑 The local and remote LUT data is different.\n"
f"Number of triples only in local: {len(local_graph_diff)}\n"
f"Number of triples only in remote: {len(remote_graph_diff)}"
"Differences have been written to the following files: local_graph_diff.ttl, remote_graph_diff.ttl"
)
else:
print("Local and remote data is isomorphic 🎉")
finally:
# Clean up
for file in path.glob("**/*"):
file.unlink()
path.rmdir()
else:
print(f"✅ LUTs written to {path.absolute()}")