Skip to content

Commit

Permalink
mf
Browse files Browse the repository at this point in the history
  • Loading branch information
mam10eks committed Jan 29, 2025
1 parent f661479 commit 5236cab
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 4 deletions.
1 change: 1 addition & 0 deletions python-client/tests/format_check/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@
TSV_OUTPUT_VALID = RESOURCES / "tsv-valid"
IR_QUERY_OUTPUT = RESOURCES / "query-processing-outputs" / "query-segmentation"
JSONL_OUTPUT_VALID = RESOURCES / "jsonl-valid"
JSONL_GZ_OUTPUT_VALID = RESOURCES / "jsonl-valid-gz"
JSONL_OUTPUT_INVALID = RESOURCES / "jsonl-invalid"
11 changes: 11 additions & 0 deletions python-client/tests/format_check/test_check_jsonl_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
_OK,
EMPTY_OUTPUT,
IR_QUERY_OUTPUT,
JSONL_GZ_OUTPUT_VALID,
JSONL_OUTPUT_INVALID,
JSONL_OUTPUT_VALID,
TSV_OUTPUT_VALID,
Expand All @@ -30,6 +31,16 @@ def test_valid_jsonl_output_file(self):
actual = check_format(JSONL_OUTPUT_VALID / "predictions.jsonl", "*.jsonl")
self.assertEqual(expected, actual)

def test_valid_jsonl_gz_output_directory(self):
expected = [_OK, "The jsonl file has the correct format."]
actual = check_format(JSONL_GZ_OUTPUT_VALID, "*.jsonl")
self.assertEqual(expected, actual)

def test_valid_jsonl_gz_output_file(self):
expected = [_OK, "The jsonl file has the correct format."]
actual = check_format(JSONL_GZ_OUTPUT_VALID / "predictions.jsonl.gz", "*.jsonl")
self.assertEqual(expected, actual)

def test_invalid_jsonl_output_directory(self):
actual = check_format(JSONL_OUTPUT_INVALID, "*.jsonl")
self.assertEqual(actual[0], _ERROR)
Expand Down
Binary file not shown.
6 changes: 4 additions & 2 deletions python-client/tira/check_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,12 @@ def check_format(self, run_output: Path):
return [_fmt.ERROR, str(e)]

def all_lines(self, run_output):
if str(run_output).endswith(".jsonl") and run_output.is_file():
if (str(run_output).endswith(".jsonl") or str(run_output).endswith(".jsonl.gz")) and run_output.is_file():
matches = [run_output]
else:
matches = [run_output / i for i in os.listdir(run_output) if i.endswith(".jsonl")]
matches = [
run_output / i for i in os.listdir(run_output) if i.endswith(".jsonl") or i.endswith(".jsonl.gz")
]

if len(matches) != 1:
raise ValueError(
Expand Down
1 change: 1 addition & 0 deletions python-client/tira/pandas_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def from_retriever_submission(
pd.DataFrame: The run file parsed to a pandas DataFrame.
"""
import pandas as pd

from tira.ir_datasets_util import translate_irds_id_to_tirex

task, team, software = approach.split("/")
Expand Down
5 changes: 3 additions & 2 deletions python-client/tira/third_party_integrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def persist_and_normalize_run(
if upload_to_tira and not in_tira_sandbox():
tira = _tira_client(tira_client)
tmp = tira.get_dataset(upload_to_tira)
if not tmp or 'dataset_id' not in tmp:
if not tmp or "dataset_id" not in tmp:
upload_to_tira = None
else:
upload_to_tira = None
Expand All @@ -165,6 +165,7 @@ def persist_and_normalize_run(
output_file = Path(output_file).parent
upload_run_anonymous(output_file, tira, upload_to_tira)


def _tira_client(default_tira_client=None):
if in_tira_sandbox():
return None
Expand All @@ -177,7 +178,7 @@ def _tira_client(default_tira_client=None):
return RestClient()


def upload_run_anonymous(directory: Path=None, tira_client=None, dataset_id=None):
def upload_run_anonymous(directory: Path = None, tira_client=None, dataset_id=None):
tira = _tira_client(tira_client)
if not tira:
return
Expand Down

0 comments on commit 5236cab

Please sign in to comment.