Skip to content

Commit

Permalink
Merge pull request #44 from SciCatProject/schema_selector
Browse files Browse the repository at this point in the history
Metadata schema configuration and new logic
  • Loading branch information
YooSunYoung authored Jul 16, 2024
2 parents d563b75 + e256c53 commit 4503711
Show file tree
Hide file tree
Showing 12 changed files with 548 additions and 116 deletions.
4 changes: 2 additions & 2 deletions config.20240405.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
"config_file" : "config.json",
"verbose" : false,
"file_log" : false,
"log_filepath_prefix" : ".scicat_ingestor_log",
"file_log_base_name" : ".scicat_ingestor_log",
"file_log_timestamp" : false,
"log_level" : "INFO",
"logging_level" : "INFO",
"system_log" : false,
"system_log_facility" : "mail",
"log_message_prefix" : " SFI: ",
Expand Down
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@ requires-python = ">=3.12"
# Run 'tox -e deps' after making changes here. This will update requirement files.
# Make sure to list one dependency per line.
dependencies = [
"kafka-python",
"confluent_kafka",
"ess-streaming-data-types",
"graypy",
"h5py",
"kafka-python",
"requests",
"rich",
"graypy"
"rich"
]

dynamic = ["version"]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,58 +1,78 @@
{
"id" : "715ce7ba-3f91-11ef-932f-37a5c6fd60b1"
"name" : "Coda Metadata Schema"
"instrument": "coda",
"selector": "filename:starts_with:/ess/data/coda",
"variables" : {
"pid": {
"source": "NXS:/entry/entry_identifier_uuid",
"type": "string"
"source": "NXS",
"path": "/entry/entry_identifier_uuid",
"value_type": "string"
},
"proposal_id": {
"source": "NXS:/entry/experiment_identifier",
"type": "string",
"source": "NXS",
"path": ""/entry/experiment_identifier",
"value_type": "string",
},
"pi_firstname": {
"source": "SC:proposals/<proposal_id>:pi_firstname",
"type": "string"
"source": "SC",
"url": "proposals/<proposal_id>",
"field" : "pi_firstname",
"value_type": "string"
},
"pi_lastname": {
"source": "SC:proposals/<proposal_id>:pi_lastname",
"type": "string"
"source": "SC",
"url": "proposals/<proposal_id>",
"field": ":pi_lastname",
"value_type": "string"
},
"pi_email": {
"source": "SC:proposals/<proposal_id>:pi_email",
"type": "string"
"source": "SC",
"url": "proposals/<proposal_id>",
"field": "pi_email",
"value_type": "string"
},
"dataset_name": {
"source": "NXS:/entry/title",
"type": "string"
"source": "NXS"
"path": ""/entry/title",
"value_type": "string"
},
"instrument_name": {
"source": "NXS:/entry/instrument/name",
"type": "string",
"source": "NXS",
"path": ""/entry/instrument/name",
"value_type": "string",
},
"instrument_id": {
"source": "SC:instruments?filter=%7B%22where%22%20%3A%20%7B%20%22name%22%20%3A%20%22coda%22%20%7D%20%7D:id",
"type": "string"
"source": "SC",
"url": "instruments?filter=%7B%22where%22%20%3A%20%7B%20%22name%22%20%3A%20%22coda%22%20%7D%20%7D"
"field": "id",
"value_type": "string"
},
"start_time": {
"source": "NXS:/entry/start_time",
"type": "date"
"source": "NXS",
"path": ""/entry/start_time",
"value_type": "date"
},
"end_time": {
"source": "NXS:/entry/end_time",
"type": "date"
"source": "NXS",
"path": "/entry/end_time",
"value_type": "date"
},
"run_number": {
"source": "NXS:/entry/entry_identifier",
"type": "integer"
"source": "NXS",
"path": ""/entry/entry_identifier",
"value_type": "integer"
},
"acquisition_team_members_list": {
"source": "NXS:/entry/user_*/name",
"type": "string[]"
"source": "NXS",
"path" : "/entry/user_*/name",
"value_type": "string[]"
}
"acquisition_team_members": {
"source": "VALUES:join_with_space:<acquisition_team_members>",
"type": "string"
"source": "VALUE",
"operator" : "join_with_space"
"value" : "<acquisition_team_members>",
"value_type": "string"
}
},
"schema": {
Expand Down
85 changes: 59 additions & 26 deletions resources/config.sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,46 +3,79 @@
"topics": ["KAFKA_TOPIC_1", "KAFKA_TOPIC_2"],
"group_id": "GROUP_ID",
"bootstrap_servers": ["localhost:9093"],
"individual_message_commit": false,
"sasl_mechanism": "SCRAM-SHA-256",
"sasl_username": "USERNAME",
"sasl_password": "PASSWORD",
"ssl_ca_location": "FULL_PATH_TO_CERTIFICATE_FILE",
"individual_message_commit": true,
"enable_auto_commit": true,
"auto_offset_reset": "earliest"
"auto_offset_reset": "earliest",
"message_saving_options": {
"message_to_file": true,
"message_file_extension": "message.json",
"message_output": "SOURCE_FOLDER"
}
},
"user_office": {
"host": "https://useroffice.host",
"username": "USERNAME",
"password": "PASSWORD"
"password": "PASSWORD",
"token": "JWT_TOKEN"
},
"scicat": {
"host": "https://scicat.host",
"username": "USERNAME",
"password": "PASSWORD"
},
"graylog": {
"host" : "",
"port" : "",
"facility" : "scicat.ingestor"
"password": "PASSWORD",
"token": "JWT_TOKEN"
},
"graylog": {"host": "", "port": "", "facility": "scicat.ingestor"},
"dataset": {
"instrument_id" : "",
"instrument" : "INSTRUMENT_NAME",
"default_proposal_id" : "714781",
"ownable" : {
"ownerGroup": "ess",
"accessGroups": ["ymir","swap"]
"instrument_id": "ID_OF_FALLBACK_INSTRUMENT",
"instrument": "FALLBACK_INSTRUMENT_NAME",
"default_proposal_id": "DEFAULT_PROPOSAL_ID",
"ownable": {
"ownerGroup": "DEFAULT_OWNER_GROUP",
"accessGroups": ["ACCESS_GROUP_1"]
}
},
"options": {
"config_file" : "config.json",
"verbose" : false,
"file_log" : false,
"log_filepath_prefix" : ".scicat_ingestor_log",
"file_log_timestamp" : false,
"log_level" : "INFO",
"system_log" : false,
"system_log_facility" : "mail",
"log_message_prefix" : " SFI: ",
"check_by_job_id" : true,
"config_file": "config.json",
"verbose": false,
"file_log": false,
"file_log_base_name": "scicat_ingestor_log",
"file_log_timestamp": false,
"logging_level": "INFO",
"system_log": false,
"system_log_facility": "mail",
"log_message_prefix": "SFI",
"check_by_job_id": true,
"pyscicat": null,
"graylog" : false
"graylog": false
},
"ingestion_options": {
"dry_run": false,
"schemas_directory": "schemas",
"retrieve_instrument_from": "default",
"instrument_position_in_file_path": 3,
"file_handling_options": {
"hdf_structure_in_metadata": false,
"hdf_structure_to_file": true,
"hdf_structure_file_extension": ".hdf_structure.json",
"hdf_structure_output": "SOURCE_FOLDER",
"local_output_directory": "data",
"compute_file_stats": true,
"compute_file_hash": true,
"file_hash_algorithm": "blake2b",
"save_file_hash": true,
"hash_file_extension": "b2b",
"ingestor_files_directory": "ingestor"
},
"dataset_options": {
"force_dataset_pid": true,
"dataset_pid_prefix": "20.500.12269",
"use_job_id_as_dataset_id": true,
"beautify_metadata_keys": false,
"metadata_levels_separator": " "
}
}
}
Loading

0 comments on commit 4503711

Please sign in to comment.