From a1478206434c98d2aa468a8da96958768192e124 Mon Sep 17 00:00:00 2001 From: James Wood Date: Fri, 26 Apr 2024 11:15:52 -0700 Subject: [PATCH] Feature/setup tests (#711) * Add C1247485682-LARC_CLOUD * remove ids * add temp limit * update cleanup * remove temp limit --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: jamesfwood Co-authored-by: James Wood --- .github/workflows/diff.yml | 25 +- .gitignore | 2 +- README.md | 8 +- .../__init__.py | 0 .../cmr_association_diff.py | 0 pyproject.toml | 8 +- tests/cmr/concise/ops/C1940473819-POCLOUD | 1 + tests/cmr/concise/uat/C1234724470-POCLOUD | 1 + tests/collection_names.py | 2 +- tests/conftest.py | 2 +- tests/remove_prs.py | 2 +- tests/verify_collection.py | 264 +++++++++--------- 12 files changed, 163 insertions(+), 152 deletions(-) rename {l2ss_py_autotest => concise_autotest}/__init__.py (100%) rename {l2ss_py_autotest => concise_autotest}/cmr_association_diff.py (100%) create mode 100644 tests/cmr/concise/ops/C1940473819-POCLOUD create mode 100644 tests/cmr/concise/uat/C1234724470-POCLOUD diff --git a/.github/workflows/diff.yml b/.github/workflows/diff.yml index 2eee0048..e3950dd5 100644 --- a/.github/workflows/diff.yml +++ b/.github/workflows/diff.yml @@ -1,5 +1,5 @@ # Run every third day starting from the 2nd of the month 1 am pacific -# check for new collection associations to the l2ss-py UMM-S record +# check for new collection associations to the concise UMM-S record # in UAT and OPS. If a new association is found, a PR is opened to add the new collection concept id to the # cmr/concise/*_associations.txt file. @@ -33,21 +33,17 @@ jobs: run: | poetry install - ls $GITHUB_WORKSPACE/tests/cmr/concise/uat/ > $GITHUB_WORKSPACE/tests/cmr/concise/uat_associations.txt - ls $GITHUB_WORKSPACE/tests/cmr/concise/ops/ > $GITHUB_WORKSPACE/tests/cmr/concise/ops_associations.txt + ls $GITHUB_WORKSPACE/tests/cmr/concise/uat/ > uat_associations.txt + ls $GITHUB_WORKSPACE/tests/cmr/concise/ops/ > ops_associations.txt - poetry run cmr_association_diff -e uat -t service -p POCLOUD -n 'PODAAC Concise' -a $GITHUB_WORKSPACE/tests/cmr/concise/uat_associations.txt --token $UAT_TOKEN_TEMP > $GITHUB_WORKSPACE/tests/cmr/concise/new_uat_associations.txt - poetry run cmr_association_diff -e ops -t service -p POCLOUD -n 'PODAAC Concise' -a $GITHUB_WORKSPACE/tests/cmr/concise/ops_associations.txt --token $OPS_TOKEN_TEMP > $GITHUB_WORKSPACE/tests/cmr/concise/new_ops_associations.txt + poetry run cmr_association_diff -e uat -t service -p POCLOUD -n 'PODAAC Concise' -a uat_associations.txt --token $UAT_TOKEN_TEMP > new_uat_associations.txt + poetry run cmr_association_diff -e ops -t service -p POCLOUD -n 'PODAAC Concise' -a ops_associations.txt --token $OPS_TOKEN_TEMP > new_ops_associations.txt - echo "new_uat_associations=$(poetry run python tests/collection_names.py --env uat --token $UAT_TOKEN_TEMP --file $GITHUB_WORKSPACE/tests/cmr/concise/new_uat_associations.txt)" >> $GITHUB_OUTPUT - echo "new_ops_associations=$(poetry run python tests/collection_names.py --env ops --token $OPS_TOKEN_TEMP --file $GITHUB_WORKSPACE/tests/cmr/concise/new_ops_associations.txt)" >> $GITHUB_OUTPUT + echo "new_uat_associations=$(poetry run python tests/collection_names.py --env uat --token $UAT_TOKEN_TEMP --file new_uat_associations.txt)" >> $GITHUB_OUTPUT + echo "new_ops_associations=$(poetry run python tests/collection_names.py --env ops --token $OPS_TOKEN_TEMP --file new_ops_associations.txt)" >> $GITHUB_OUTPUT - rm $GITHUB_WORKSPACE/tests/cmr/concise/uat_associations.txt - rm $GITHUB_WORKSPACE/tests/cmr/concise/ops_associations.txt - rm $GITHUB_WORKSPACE/tests/cmr/concise/new_uat_associations.txt - rm $GITHUB_WORKSPACE/tests/cmr/concise/new_ops_associations.txt + rm *_associations.txt open_pr_uat: - if: false needs: find_new strategy: fail-fast: false @@ -80,7 +76,7 @@ jobs: delete-branch: true title: UAT ${{ matrix.data.concept_id }} (${{ matrix.data.short_name }}) body: | - New association between l2ss-py and ${{ matrix.data.concept_id }} found in UAT. + New association between concise and ${{ matrix.data.concept_id }} found in UAT. Beginning verification of collection. labels: | unverified @@ -92,7 +88,6 @@ jobs: open_pr_ops: - if: false needs: find_new strategy: fail-fast: false @@ -125,7 +120,7 @@ jobs: delete-branch: true title: OPS ${{ matrix.data.concept_id }} (${{ matrix.data.short_name }}) body: | - New association between l2ss-py and ${{ matrix.data.concept_id }} found in OPS. + New association between concise and ${{ matrix.data.concept_id }} found in OPS. Beginning verification of collection. labels: | unverified diff --git a/.gitignore b/.gitignore index 269cc332..7472af5b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ /.idea/inspectionProfiles/Project_Default.xml /.idea/.gitignore /.idea/encodings.xml -/.idea/l2ss-py-autotest.iml +/.idea/concise-autotest.iml /.idea/misc.xml /.idea/modules.xml /.idea/vcs.xml diff --git a/README.md b/README.md index ed53f28e..71e0d905 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# l2ss-py-autotest +# concise-autotest -This repository contains functional/integration tests for l2ss-py. It also includes github +This repository contains functional/integration tests for concise. It also includes github action workflows for automatically running these tests whenever a new collection gets -associated to the l2ss-py UMM-S record. +associated to the concise UMM-S record. ## How it works @@ -17,7 +17,7 @@ associated to the l2ss-py UMM-S record. ## What to do if tests fail If a test fails, meaning an assertion did not succeed, or an unknown error occurs action must be taken. The cause of the failure should be determined and fixed. -A failing test generally indicates an issue with either metadata or l2ss-py itself and may require additional steps. +A failing test generally indicates an issue with either metadata or concise itself and may require additional steps. In some cases, the test may need to be updated to account for a unique edge case. ## What to do if tests are skipped diff --git a/l2ss_py_autotest/__init__.py b/concise_autotest/__init__.py similarity index 100% rename from l2ss_py_autotest/__init__.py rename to concise_autotest/__init__.py diff --git a/l2ss_py_autotest/cmr_association_diff.py b/concise_autotest/cmr_association_diff.py similarity index 100% rename from l2ss_py_autotest/cmr_association_diff.py rename to concise_autotest/cmr_association_diff.py diff --git a/pyproject.toml b/pyproject.toml index ccf7027a..158f6a69 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,11 @@ [tool.poetry] -name = "l2ss-py-autotest" +name = "concise-autotest" version = "0.1.0" -description = "Automated tests for new associations to l2ss-py service" +description = "Automated tests for new associations to concise service" authors = ["PO.DAAC "] license = "Apache 2.0" readme = "README.md" -packages = [{include = "l2ss_py_autotest"}] +packages = [{include = "concise_autotest"}] [tool.poetry.dependencies] python = "^3.9" @@ -34,4 +34,4 @@ requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.poetry.scripts] -cmr_association_diff = "l2ss_py_autotest.cmr_association_diff:run" +cmr_association_diff = "concise_autotest.cmr_association_diff:run" diff --git a/tests/cmr/concise/ops/C1940473819-POCLOUD b/tests/cmr/concise/ops/C1940473819-POCLOUD new file mode 100644 index 00000000..18fa0802 --- /dev/null +++ b/tests/cmr/concise/ops/C1940473819-POCLOUD @@ -0,0 +1 @@ +C1940473819-POCLOUD diff --git a/tests/cmr/concise/uat/C1234724470-POCLOUD b/tests/cmr/concise/uat/C1234724470-POCLOUD new file mode 100644 index 00000000..9391ada0 --- /dev/null +++ b/tests/cmr/concise/uat/C1234724470-POCLOUD @@ -0,0 +1 @@ +C1234724470-POCLOUD diff --git a/tests/collection_names.py b/tests/collection_names.py index 6edd7d47..6070d999 100644 --- a/tests/collection_names.py +++ b/tests/collection_names.py @@ -7,7 +7,7 @@ def main(): parser = argparse.ArgumentParser(description="Get Collection Names from CMR") parser.add_argument("--token", help="launchpad token") - parser.add_argument("--file", help="file with list of l2ss associations") + parser.add_argument("--file", help="file with list of concise associations") parser.add_argument("--env", help="CMR environment") args = parser.parse_args() diff --git a/tests/conftest.py b/tests/conftest.py index a39e5560..5755a4e9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -50,7 +50,7 @@ def pytest_addoption(parser): def pytest_generate_tests(metafunc): if metafunc.config.option.regression: - cmr_dirpath = pathlib.Path('cmr') + cmr_dirpath = pathlib.Path('cmr/concise') association_dir = 'uat' if metafunc.config.option.env == 'uat' else 'ops' associations = os.listdir(cmr_dirpath.joinpath(association_dir)) diff --git a/tests/remove_prs.py b/tests/remove_prs.py index f87cdb3b..ac33f560 100644 --- a/tests/remove_prs.py +++ b/tests/remove_prs.py @@ -26,7 +26,7 @@ # Replace these variables with your own values repo_owner = 'podaac' - repo_name = 'l2ss-py-autotest' + repo_name = 'concise-autotest' github_token = os.getenv("GITHUB_TOKEN") # Initialize a Github instance diff --git a/tests/verify_collection.py b/tests/verify_collection.py index a4feecd6..18a28007 100644 --- a/tests/verify_collection.py +++ b/tests/verify_collection.py @@ -10,6 +10,7 @@ import netCDF4 import numpy as np import podaac.subsetter.subset +import unittest import pytest import requests import xarray @@ -336,142 +337,155 @@ def get_lat_lon_var_names(dataset: xarray.Dataset, file_to_subset: str, collecti pytest.fail(f"Unable to find latitude and longitude variables.") -@pytest.mark.timeout(600) -def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables, - harmony_env, tmp_path: pathlib.Path, bearer_token): - test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}" - - logging.info("Using granule %s for test", granule_json['meta']['concept-id']) +def verify_dims(merged_group, origin_group, both_merged): + for dim in origin_group.dimensions: + if both_merged: + unittest.TestCase().assertEqual(merged_group.dimensions[dim].size, origin_group.dimensions[dim].size) + else: + unittest.TestCase().assertGreaterEqual(merged_group.dimensions[dim].size, origin_group.dimensions[dim].size) - # Compute a box that is smaller than the granule extent bounding box - north, south, east, west = get_bounding_box(granule_json) - east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95) - # Build harmony request - harmony_client = harmony.Client(env=harmony_env, token=bearer_token) - request_bbox = harmony.BBox(w=west, s=south, e=east, n=north) - request_collection = harmony.Collection(id=collection_concept_id) - harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox, - granule_id=[granule_json['meta']['concept-id']]) - logging.info("Sending harmony request %s", harmony_client.request_as_curl(harmony_request)) - - # Submit harmony request and download result - job_id = harmony_client.submit(harmony_request) - logging.info("Submitted harmony job %s", job_id) - harmony_client.wait_for_processing(job_id, show_progress=True) - subsetted_filepath = None - for filename in [file_future.result() - for file_future - in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]: - logging.info(f'Downloaded: %s', filename) - subsetted_filepath = pathlib.Path(filename) - - # Verify spatial subset worked - subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False) - group = None - # Try to read group in file - lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables) - - with netCDF4.Dataset(subsetted_filepath) as f: - group_list = [] - def group_walk(groups, nc_d, current_group): - global subsetted_ds_new - subsetted_ds_new = None - # check if the top group has lat or lon variable - if lat_var_name in list(nc_d.variables.keys()): - subsetted_ds_new = subsetted_ds - else: - # if not then we'll need to keep track of the group layers - group_list.append(current_group) - - # loop through the groups in the current layer - for g in groups: - # end the loop if we've already found latitude - if subsetted_ds_new: - break - # check if the groups have latitude, define the dataset and end the loop if found - if lat_var_name in list(nc_d.groups[g].variables.keys()): - group_list.append(g) - g = '/'.join(group_list) - subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=g, decode_times=False) - break - # recall the function on a group that has groups in it and didn't find latitude - # this is going 'deeper' into the groups - if len(list(nc_d.groups[g].groups.keys())) > 0: - group_walk(nc_d.groups[g].groups, nc_d.groups[g], g) - else: - continue - - group_walk(f.groups, f, '') - - assert lat_var_name and lon_var_name - - var_ds = None - msk = None - - if science_vars := get_science_vars(collection_variables): - for idx, value in enumerate(science_vars): - science_var_name = science_vars[idx]['umm']['Name'] - try: - var_ds = subsetted_ds_new[science_var_name] - msk = np.logical_not(np.isnan(var_ds.data.squeeze())) - break - except Exception: - try: - # if the variable couldn't be found because the name includes a group, e.g., - # `geolocation/relative_azimuth_angle`, - # then try to access the variable after removing the group name. - var_ds = subsetted_ds_new[science_var_name.rsplit("/", 1)[-1]] - msk = np.logical_not(np.isnan(var_ds.data.squeeze())) - break - except Exception: - var_ds = None - msk = None - - if var_ds is None and msk is None: - pytest.fail(f"Unable to find variable from umm-v to use as science variable.") +def verify_attrs(merged_obj, origin_obj, both_merged): + ignore_attributes = [ + 'request-bounding-box', 'request-bounding-box-description', 'PODAAC-dataset-shortname', + 'PODAAC-persistent-ID', 'time_coverage_end', 'time_coverage_start' + ] - else: - # Can't find a science var in UMM-V, just pick one + merged_attrs = merged_obj.ncattrs() + origin_attrs = origin_obj.ncattrs() - science_var_name = next(iter([v for v in subsetted_ds_new.variables if - str(v) not in lat_var_name and str(v) not in lon_var_name and 'time' not in str(v)])) + for attr in origin_attrs: + if attr in ignore_attributes: + # Skip attributes which are present in the Java implementation, + # but not (currently) present in the Python implementation + continue - var_ds = subsetted_ds_new[science_var_name] + if not both_merged and attr not in merged_attrs: + # Skip attributes which are not present in both merged and origin. + # This is normal operation as some attributes may be omited b/c + # they're inconsistent between granules + continue - try: - msk = np.logical_not(np.isnan(var_ds.data.squeeze())) - llat = subsetted_ds_new[lat_var_name].where(msk) - llon = subsetted_ds_new[lon_var_name].where(msk) - except ValueError: - - llat = subsetted_ds_new[lat_var_name] - llon = subsetted_ds_new[lon_var_name] + merged_attr = merged_obj.getncattr(attr) + if both_merged and isinstance(merged_attr, int): + # Skip integer values - the Java implementation seems to omit + # these values due to its internal handling of all values as + # Strings + continue - lat_max = llat.max() - lat_min = llat.min() + origin_attr = origin_obj.getncattr(attr) + if isinstance(origin_attr, np.ndarray): + unittest.TestCase().assertTrue(np.array_equal(merged_attr, origin_attr)) + else: + if attr != "history_json": + unittest.TestCase().assertEqual(merged_attr, origin_attr) - lon_min = llon.min() - lon_max = llon.max() - lon_min = (lon_min + 180) % 360 - 180 - lon_max = (lon_max + 180) % 360 - 180 +def verify_variables(merged_group, origin_group, subset_index, both_merged): + for var in origin_group.variables: + merged_var = merged_group.variables[var] + origin_var = origin_group.variables[var] - lat_var_fill_value = subsetted_ds_new[lat_var_name].encoding.get('_FillValue') - lon_var_fill_value = subsetted_ds_new[lon_var_name].encoding.get('_FillValue') + verify_attrs(merged_var, origin_var, both_merged) - if lat_var_fill_value: - if (lat_max <= north or np.isclose(lat_max, north)) and (lat_min >= south or np.isclose(lat_min, south)): - logging.info("Successful Latitude subsetting") - elif np.isnan(lat_max) and np.isnan(lat_min): - logging.info("Partial Lat Success - no Data") + if both_merged: + # both groups require subset indexes + merged_data = merged_var[subset_index[0]] + origin_data = origin_var[subset_index[1]] else: - assert False + # merged group requires a subset index + merged_data = np.resize(merged_var[subset_index], origin_var.shape) + origin_data = origin_var + + equal_nan = True + if merged_data.dtype.kind == 'S': + equal_nan = False - if lon_var_fill_value: - if (lon_max <= east or np.isclose(lon_max, east)) and (lon_min >= west or np.isclose(lon_min, west)): - logging.info("Successful Longitude subsetting") - elif np.isnan(lon_max) and np.isnan(lon_min): - logging.info("Partial Lon Success - no Data") + # verify variable data + if isinstance(origin_data, str): + unittest.TestCase().assertEqual(merged_data, origin_data) else: - assert False + unittest.TestCase().assertTrue(np.array_equal(merged_data, origin_data, equal_nan=equal_nan)) + + +def verify_groups(merged_group, origin_group, subset_index, file=None, both_merged=False): + if file: + print("verifying groups ....." + file) + + verify_dims(merged_group, origin_group, both_merged) + verify_attrs(merged_group, origin_group, both_merged) + verify_variables(merged_group, origin_group, subset_index, both_merged) + + for child_group in origin_group.groups: + merged_subgroup = merged_group[child_group] + origin_subgroup = origin_group[child_group] + verify_groups(merged_subgroup, origin_subgroup, subset_index, both_merged=both_merged) + + +def download_file(url, local_path, headers): + response = requests.get(url, stream=True, headers=headers) + if response.status_code == 200: + with open(local_path, 'wb') as file: + for chunk in response.iter_content(chunk_size=8192): + file.write(chunk) + print("Original File downloaded successfully. " + local_path) + else: + print(f"Failed to download the file. Status code: {response.status_code}") + + +@pytest.mark.timeout(600) +def test_concatenate(collection_concept_id, harmony_env, bearer_token): + + max_results = 2 + + harmony_client = harmony.Client(env=harmony_env, token=bearer_token) + collection = harmony.Collection(id=collection_concept_id) + + request = harmony.Request( + collection=collection, + concatenate=True, + max_results=max_results, + skip_preview=True, + format="application/x-netcdf4", + ) + + request.is_valid() + + print(harmony_client.request_as_curl(request)) + + job1_id = harmony_client.submit(request) + + print(f'\n{job1_id}') + + print(harmony_client.status(job1_id)) + + print('\nWaiting for the job to finish') + + results = harmony_client.result_json(job1_id) + + print('\nDownloading results:') + + futures = harmony_client.download_all(job1_id) + file_names = [f.result() for f in futures] + print('\nDone downloading.') + + filename = file_names[0] + + # Handle time dimension and variables dropping + merge_dataset = netCDF4.Dataset(filename, 'r') + + headers = { + "Authorization": f"Bearer {bearer_token}" + } + + original_files = merge_dataset.variables['subset_files'] + history_json = json.loads(merge_dataset.history_json) + assert len(original_files) == max_results + + for url in history_json[0].get("derived_from"): + local_file_name = os.path.basename(url) + download_file(url, local_file_name, headers) + + for i, file in enumerate(original_files): + origin_dataset = netCDF4.Dataset(file) + verify_groups(merge_dataset, origin_dataset, i, file=file)