Skip to content

Commit

Permalink
Merge pull request #33 from spestana/goespy-integration
Browse files Browse the repository at this point in the history
Move gtsa and goespy dependencies into package
  • Loading branch information
spestana authored Sep 23, 2024
2 parents d487013 + 28292e6 commit d37a6d6
Show file tree
Hide file tree
Showing 7 changed files with 970 additions and 7 deletions.
2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ dependencies = [
"flake8",
"scikit-learn",
"geogif",
"goespy @ git+https://github.com/spestana/goes-py",
"gtsa @ git+https://github.com/friedrichknuth/gtsa",
]
requires-python = ">=3.8"
authors = [
Expand Down
209 changes: 209 additions & 0 deletions src/goes_ortho/Downloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
"""That module contents the functions necessary to downloader the ABI-sensors and GLM-L2 Total Lightning
from the GOES satellite"""


def ABI_Downloader(home, bucket, year, month, day, hour, product, channel): # noqa: C901
"""ABI_Downloader(home, bucket,year,month,day,hour,product,channel): All these variables are strings.
The second argument is the Bucket it's the repository where has the contents from the satellite, example:
home = string, set directory to download ABI products
bucket='noaa-goes16'
year = can be List or a single string to Year date: example = ['2017','2018'] or "2018
month = can be List or a single string to month date: example = ['03','04'] or "03"
day = can be List or a single string for day date: example = ['10','20','30'] or "20"
hour = can be List or a single string to hour, and need be UTC coordinate time date: example = ['06','12','18'] or "06"
product = can be a List or a single string for ABI sensors products from GOES satellite next-generation example: ["ABI-L2-CMIPF"] or "ABI-L1b-RadF"
channel = Required only for "ABI-L1b-Rad" and "ABI-L2-CMIP" products. Can be a List or a single string for the channels from ABI sensors. Example = ['01','02'] or "13" (channel is ignored for other ABI products)
"""
from goes_ortho import boto3, botocore, checkData
from goes_ortho.utils import ProgressPercentage, __isAList

julianDay = ""
days = 0
s3 = boto3.resource(
"s3", config=botocore.client.Config(signature_version=botocore.UNSIGNED)
)
goes16 = s3.Bucket(bucket)

year, month, day, product, hour, channel, julianDay = __isAList(
year, month, day, product, hour, channel, julianDay=julianDay
)

## for loop to all variable year (it's a list var)
for y in year:
## same think above
for mth in month:
## I used that while loop, because I think it's a solution more "presentable"
## that while will travel on the days list, same to the julianDay
while days <= len(day) - 1 and days <= len(julianDay) - 1:
for prod in product:
print("Downloading... the product %s " % prod)
for nindex in hour:
print("Downloading dataset to... {0} UTC".format(nindex))
## all these loops it's necessary to travel all the length of the variables
## maybe that's not a elegant solution.
## s3.objects.filters it's a function from boto3 to list all keys on the bucket
## using a prefix
objs = goes16.objects.filter(
Delimiter="/",
Prefix="{0}/{1}/{2}/{3}/".format(
prod, y, julianDay[days], nindex
),
)

for obj in objs:
## the keys it's a "path"+"filename" in the bucket, solution
### we need only the filename, that's why used the rsplit function.

filename = obj.key.rsplit("/", 1)[1]

if (prod[:-1] == "ABI-L1b-Rad") or (
prod[:-1] == "ABI-L2-CMIP"
):
for ch in channel:
## when the filename has the same Channel from the user channel variable
## call the function from download, but before it's done check files and directory
if filename.partition(ch)[1] == ch:
# creating the new directory where we will put the dataset from the bucket

path = checkData.createPathGoesData(
home,
bucket,
y,
mth,
day[days],
prod,
nindex,
ch,
)

# checking if the file exist on the new directory and your size
if checkData.checkFiles(path, filename):
if checkData.checkSize(
path, filename, obj.size
):
pass

else:
# Downloading the file with the boto3
goes16.download_file(
obj.key,
path + filename,
Callback=ProgressPercentage(
filename, obj.size
),
)
else:
# Downloading the file with the boto3
goes16.download_file(
obj.key,
path + filename,
Callback=ProgressPercentage(
filename, obj.size
),
)
else:
# creating the new directory where we will put the dataset from the bucket

path = checkData.createPathGoesData(
home, bucket, y, mth, day[days], prod, nindex
)

# checking if the file exist on the new directory and your size
if checkData.checkFiles(path, filename):
if checkData.checkSize(path, filename, obj.size):
pass

else:
# Downloading the file with the boto3
goes16.download_file(
obj.key,
path + filename,
Callback=ProgressPercentage(
filename, obj.size
),
)
else:
# Downloading the file with the boto3
goes16.download_file(
obj.key,
path + filename,
Callback=ProgressPercentage(filename, obj.size),
)
days += 1

return 0


def GLM_Downloader(home, bucket, year, month, day, hour):
"""GLM_Downloader(home, bucket,year,month,day,hour): All these variables are strings.
The first argument is the Bucket it's the repository where has the contents from the satellite, example:
home = string, set directory to download GLM products
bucket='noaa-goes16'
year = type List for Year date: example = ['2017','2018']
month = type List for month date: example = ['03','04']
day = type List for day date: example = ['10','20','30']
hour = type List for hour need be UTC coordinate time date: example = ['06','12','18']"""

from goes_ortho import boto3, botocore, checkData
from goes_ortho.utils import ProgressPercentage, __isAList

julianDay = ""
s3 = boto3.resource(
"s3", config=botocore.client.Config(signature_version=botocore.UNSIGNED)
)
goes16 = s3.Bucket(bucket)
product = "GLM-L2-LCFA"
days = 00
year, month, day, product, hour, julianDay = __isAList(
year, month, day, product, hour, julianDay=julianDay
)

for y in year:
for mth in month:
while days <= len(day) - 1 and days <= len(julianDay) - 1:
for prod in product:
print("Downloading... the product %s " % prod)
for nindex in hour:
print("Downloading... the dataset from {0} UTC".format(nindex))
objs = goes16.objects.filter(
Delimiter="/",
Prefix="{0}/{1}/{2}/{3}/".format(
prod, y, julianDay[days], nindex
),
)

# print("{0}/{1}/{2}/{3}/".format(prod,y,julianDay[days],nindex))
for obj in objs:
filename = obj.key.rsplit("/", 1)[1]
## creating the directory where will put the dataset from the bucket
pathFile = checkData.createPathGoesData(
home, bucket, y, mth, day[days], prod, nindex
)

# checking if the data exist and your size!!!
if checkData.checkFiles(pathFile, filename):
if checkData.checkSize(pathFile, filename, obj.size):
pass

else:
# Downloading the file with the boto3
goes16.download_file(
obj.key,
pathFile + filename,
Callback=ProgressPercentage(filename, obj.size),
)
print("\n")
else:
# Downloading the file with the boto3
goes16.download_file(
obj.key,
pathFile + filename,
Callback=ProgressPercentage(filename, obj.size),
)
print("\n")

days += 1

return 0
13 changes: 12 additions & 1 deletion src/goes_ortho/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,12 @@
__all__ = ["clip", "geometry", "get_data", "orthorectify", "rad", "timeseries"]
__all__ = [
"clip",
"geometry",
"get_data",
"orthorectify",
"rad",
"timeseries",
"Downloader",
"checkData",
"utils",
"io",
]
122 changes: 122 additions & 0 deletions src/goes_ortho/checkData.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
def createPathGoesData(home, bucket, year, month, day, product, hour, channel=None):
"""The modules responsive about the has files on the new directory created with the createPathGoesData,
and if has a same File, check if are broken"""
"""Function used create a directory, with the calendar date the user give to function"""

import errno

from goes_ortho import os

pathReturn = ""
"""that part will get your home directory and the Satellite bucket you're getting your dataset
"""
satGoesPath = bucket.partition("noaa-")[2]

if channel is None:
## the code will try create the directory where the GOES-data will be saved, if has a except error as existent directory
## that error will be finish
try:
os.makedirs(
"{0}/{1}/{2}/{3}/{4}/{5}/{6}/".format(
home, satGoesPath, year, month, day, product, hour
)
)
except OSError as e:
if e.errno != errno.EEXIST:
raise

pathReturn = "{0}/{1}/{2}/{3}/{4}/{5}/{6}/".format(
home, satGoesPath, year, month, day, product, hour
)

else:
if os.path.exists(
"{0}/{1}/{2}/{3}/{4}/{5}/{6}/{7}/".format(
home, satGoesPath, year, month, day, product, hour, channel
)
):
pass

else:
try:
os.makedirs(
"{0}/{1}/{2}/{3}/{4}/{5}/{6}/{7}/".format(
home, satGoesPath, year, month, day, product, hour, channel
)
)
except OSError as e:
if e.errno != errno.EEXIST:
raise

pathReturn = "{0}/{1}/{2}/{3}/{4}/{5}/{6}/{7}/".format(
home, satGoesPath, year, month, day, product, hour, channel
)

return pathReturn


def checkFiles(path, singleFile):
"""checkFiles(path,singleFile): That function will check if has files on the recent created path with the
createPathGoesData function
The variable path is created with the createPathGoesData
The singleFile is the filename from the data in the bucket 'noaa-goes16'
and will check the singleFile exists on the created path
"""
from goes_ortho import os

if os.path.isfile("{0}/{1}".format(path, singleFile)):
return True
else:
return False


""" The modules responsive about existence of files on the new directory created with the createPathGoesData,
and if has a same File, check if is broken"""


def checkSize(path, singleFile, singleSize):
"""
That function will check if the file on the path created with the
createPathGoesData function is the same on the bucket.
The variable path is create with the createPathGoesData
The singleFile is the filename from the data in the bucket 'noaa-goes16'
and the singleSize is the real size from the same singleFile on the bucket.
And check if the size on the singleFile on the path created with the createPathGoesData is
equal to the original file on the bucket.
"""

from goes_ortho import os

if checkFiles(path, singleFile):
if os.path.getsize(path + singleFile) == singleSize:
return True
else:
return False


def pythonVersion():
## The function is necessary to check yout python version
# IF the your python is < or equal 2.7 so put a True bool
## Else (python > 2.7) put a False bool
import sys

if sys.hexversion <= 34017264:
return True
else:
return False


def setHome():
## Function necessary to check if your python version is < 2.7 or > 2.7
### if your python is more than 2.7 the function will default uses the pathlib function to get your home directory
### but if you use the python version 2.7, so the function will uses the os.path to get your home directory
from os.path import expanduser

if pythonVersion():
return expanduser("~")

else:
from pathlib import Path

return str(Path.home())
6 changes: 2 additions & 4 deletions src/goes_ortho/get_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,9 @@
import urllib.request
from pathlib import Path

import gtsa
import xarray as xr
import zarr
from dateutil import parser, rrule
from goespy.Downloader import ABI_Downloader

import goes_ortho as go

Expand Down Expand Up @@ -58,7 +56,7 @@ def build_zarr(downloadRequest_filepath):

# start Dask cluster
print("start Dask cluster")
_ = gtsa.io.dask_start_cluster(
_ = go.io.dask_start_cluster(
workers=6,
threads=2,
open_browser=False,
Expand Down Expand Up @@ -306,7 +304,7 @@ def download_abi(downloadRequest_filepath):
if not Path.is_dir(
download_filepaths[i]
): # os.path.exists(download_filepaths[i]):
_ = ABI_Downloader(
_ = go.Downloader.ABI_Downloader(
outDir,
bucket,
this_datetime.year,
Expand Down
Loading

0 comments on commit d37a6d6

Please sign in to comment.