Skip to content

Commit

Permalink
Merge pull request #109 from broadinstitute/jlc_avoid_clobber
Browse files Browse the repository at this point in the history
Prevent file overwrite by manage-study (SCP-2698)
  • Loading branch information
jlchang authored Oct 7, 2020
2 parents 3506e57 + 59b1b69 commit 9446a55
Show file tree
Hide file tree
Showing 5 changed files with 278 additions and 77 deletions.
85 changes: 46 additions & 39 deletions scripts/Commandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import subprocess as sp


class Commandline:
"""
This class wraps calls to the command line in a simple interface.
Expand All @@ -24,37 +25,33 @@ def __init__(self, name=None, logr_cur=None):

self.logr_cur = logr_cur if logr_cur else logging.getLogger(name)


# Tested
def func_CMD(self,
command,
use_bash=False,
test=False,
stdout=False):
def func_CMD(self, command, use_bash=False, test=False, stdout=False, mute=False):
"""
Runs the given command.
Runs the given command.
* command : Command to run on the commandline
: String
* use_bash : Boolean
: If true, sends the command through using
BASH not SH (Bourne) or default shell (windows)
: Although the False is _I believe_ is not specific
to an OS, True is.
* test : Boolean
If true, the commands will not run but will be logged.
* test : Boolean
If true, the commands will not run but will be logged.
* stdout : Boolean
: If true, stdout will be given instead of True.
: If true, stdout will be given instead of True.
On a fail False (boolean) will still be given.
* Return : Boolean
True indicates success
"""
* mute : Boolean
: If true, mute stdout even on fail False.
* Return : Boolean
True indicates success
"""

return_code = None

# Update command for bash shell
if use_bash:
command = "".join([os.sep, "bin", os.sep,
"bash -c \'", command, "\'"])
command = "".join([os.sep, "bin", os.sep, "bash -c '", command, "'"])

# Do not do anything when testing
if test:
Expand All @@ -63,10 +60,9 @@ def func_CMD(self,
try:
# Perform command and wait for completion
if stdout:
subp_cur = sp.Popen(command,
shell=True,
cwd=os.getcwd(),
stdout=sp.PIPE)
subp_cur = sp.Popen(
command, shell=True, cwd=os.getcwd(), stdout=sp.PIPE
)
else:
subp_cur = sp.Popen(command, shell=True, cwd=os.getcwd())
pid = str(subp_cur.pid)
Expand All @@ -80,28 +76,39 @@ def func_CMD(self,
if stdout:
return out
return True
elif mute:
return False
else:
self.logr_cur.error("".join([self.__class__.__name__,
"::Error::Received return code = ",
str(return_code)]))
self.logr_cur.error("".join([self.__class__.__name__,
"::Error::Command = ",
command]))
self.logr_cur.error("".join([self.__class__.__name__,
"::Error::Error out= ",
str(out)]))
self.logr_cur.error("".join([self.__class__.__name__,
"::Error::Error= ",
str(err)]))
self.logr_cur.error(
"".join(
[
self.__class__.__name__,
"::Error::Received return code = ",
str(return_code),
]
)
)
self.logr_cur.error(
"".join([self.__class__.__name__, "::Error::Command = ", command])
)
self.logr_cur.error(
"".join([self.__class__.__name__, "::Error::Error out= ", str(out)])
)
self.logr_cur.error(
"".join([self.__class__.__name__, "::Error::Error= ", str(err)])
)
return False

# Inform on errors:w
except(OSError, TypeError) as e:
self.logr_cur.error("".join([self.__class__.__name__,
"::Error::Fatal error."]))
self.logr_cur.error("".join([self.__class__.__name__,
"::Error::Command = ", command]))
self.logr_cur.error("".join([self.__class__.__name__,
"::Error:: Error = ", str(e)]))
except (OSError, TypeError) as e:
self.logr_cur.error(
"".join([self.__class__.__name__, "::Error::Fatal error."])
)
self.logr_cur.error(
"".join([self.__class__.__name__, "::Error::Command = ", command])
)
self.logr_cur.error(
"".join([self.__class__.__name__, "::Error:: Error = ", str(e)])
)
return False
return False
return False
9 changes: 9 additions & 0 deletions scripts/manage_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@
# Avoid sending a user-agent string while obtaining the number of studies in SCP
python manage_study.py --no-user-agent --token=$ACCESS_TOKEN list-studies --summary
EXIT CODES
# TODO: replace with python error handling and logging (SCP-2790)
79 incompatible scp-ingest-pipeline package version detected
80 exit-file-already-exists-in-study-bucket
81 exit-file-not-found-in-study-bucket
82 exit-failed-to-gsutil-delete-file
83 exit-uploaded-file-deleted
84 exit-no-file-cleanup-needed
85 exit-file-not-found-in-remote-bucket
"""

import argparse
Expand Down
99 changes: 88 additions & 11 deletions scripts/scp_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,10 @@ def check_api_return(self, ret):
print("Local ingest pipeline package version incompatibile with server")
print(ret.json()["error"])
print("")
exit(1)
# custom exit code to indicate
# incompatible scp-ingest-pipeline package version detected
# TODO: replace with python error handling and logging (SCP-2790)
exit(79)
api_return[c_CODE_RET_KEY] = ret.status_code
api_return[c_RESPONSE] = ret
return api_return
Expand Down Expand Up @@ -405,7 +408,6 @@ def get_studies(self, dry_run=False):
else:
if resp[c_SUCCESS_RET_KEY]:
studies_json = resp[c_RESPONSE].json()
print(f"from scp_api {studies_json}")
self.study_objects = studies_json
self.studies = [str(element["name"]) for element in studies_json]
self.name_to_id = [
Expand Down Expand Up @@ -816,18 +818,60 @@ def study_name_to_id(self, name, dry_run=False):
else:
return self.name_to_id.get(name, None)

@staticmethod
def exists_in_bucket(bucket_file_path, mute=False):
"""gsutil gstat to see if file is in study bucket"""
command = "gsutil stat " + bucket_file_path
return cmdline.func_CMD(command=command, mute=True)

@staticmethod
def upload_via_gsutil(bucket_id, file_path):
"""Copy file to Google Cloud Storage bucket, return stats and filename"""

gs_url = "gs://" + bucket_id

# Upload to bucket via gsutil
command = "gsutil cp " + file_path + " " + gs_url
cmdline.func_CMD(command=command)
filename = file_path.split("/")[-1]

# Get GCS details for the file just uploaded
# if filepath uses destination google bucket, no need to upload
if file_path[:5] == "gs://":
source_bucket = file_path[5:].split("/")[0]
else:
source_bucket = None

# check if file in bucket via gsutil
full_gs_url = gs_url + "/" + filename
if SCPAPIManager.exists_in_bucket(full_gs_url, mute=True):
if source_bucket == bucket_id:
print(f"Using existing file, {filename}, in study bucket.")
file_from_study_bucket = True
else:
print(
f"ERROR: {filename} already exists in study bucket, please delete existing file, then retry."
)
# custom exit code to indicate exit-file-already-exists-in-study-bucket
# TODO: replace with python error handling and logging (SCP-2790)
exit(80)
else:
if source_bucket == bucket_id:
print(f"\nERROR: {filename} not found in study bucket.")
# custom exit code to indicate exit-file-not-found-in-study-bucket
# TODO: replace with python error handling and logging (SCP-2790)
exit(81)
else:
file_from_study_bucket = False
print()

# Upload to bucket via gsutil
if not (file_from_study_bucket and source_bucket):
command = "gsutil cp " + file_path + " " + gs_url
if cmdline.func_CMD(command=command, stdout=False):
print("\n")
else:
print(f"\nERROR: failed to find upload file {file_path}.")
# custom exit code to indicate exit-file-not-found-in-remote-bucket
# TODO: replace with python error handling and logging (SCP-2790)
exit(85)

# Get GCS details for the file to be used
command = "gsutil stat " + gs_url + "/" + filename
stdout = cmdline.func_CMD(command=command, stdout=True)

Expand All @@ -838,7 +882,23 @@ def upload_via_gsutil(bucket_id, file_path):
[key, value] = line.split(": ")
gsutil_stat[key] = value.strip()

return [gsutil_stat, filename]
return [gsutil_stat, filename, file_from_study_bucket]

@staticmethod
def delete_via_gsutil(bucket_id, file_path):
"""Remove file from Google Cloud Storage bucket"""

gs_url = "gs://" + bucket_id
filename = file_path.split("/")[-1]

# Delete file from bucket via gsutil
command = "gsutil rm " + gs_url + "/" + filename

if not cmdline.func_CMD(command=command, stdout=False):
print(f"ERROR: failed to delete {filename}.")
# custom exit code to indicate exit-failed-to-gsutil-delete-file
# TODO: replace with python error handling and logging (SCP-2790)
exit(82)

def upload_study_file(
self,
Expand Down Expand Up @@ -873,7 +933,9 @@ def upload_study_file(
break
bucket_id = study["bucket_id"]

[gsutil_stat, filename] = self.upload_via_gsutil(bucket_id, file)
[gsutil_stat, filename, file_from_study_bucket] = self.upload_via_gsutil(
bucket_id, file
)

file_fields = {
"study_file": {
Expand All @@ -896,7 +958,22 @@ def upload_study_file(
values=file_fields,
dry_run=dry_run,
)
# print(f'/study_files response: {ret["response"].text}')
# handle file upload refused by server (handles metadata file cases)
if ret[c_CODE_RET_KEY] == 422:
print("\nUpload request failed with the following error(s)")
for key in ret[c_RESPONSE].json()["errors"].keys():
print(f"{key}:{ret[c_RESPONSE].json()['errors'][key]}")
if not file_from_study_bucket:
print("\nCleaning up after failed request:")
self.delete_via_gsutil(bucket_id, file)
# custom exit code to indicate exit-uploaded-file-deleted
# TODO: replace with python error handling and logging (SCP-2790)
exit(83)
else:
print("\nExisting file will persist in study bucket.")
# custom exit code to indicate exit-no-file-cleanup-needed
# TODO: replace with python error handling and logging (SCP-2790)
exit(84)

if parse:
study_files_response = ret["response"].json()
Expand Down Expand Up @@ -957,7 +1034,7 @@ def upload_cluster(
file,
"Cluster",
study_name,
cluster_name=cluster_name,
name=cluster_name,
description=description,
x=x,
y=x,
Expand Down
Loading

0 comments on commit 9446a55

Please sign in to comment.