-
-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #101 from Darylgolden/add-github-source
Add GitHub repository tracking
- Loading branch information
Showing
3 changed files
with
123 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
LICENSE_TYPE,Repository Count | ||
CC0-1.0,239474 | ||
CC-BY-4.0,89918 | ||
CC-BY-SA-4.0,23318 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
# Standard library | ||
import os | ||
import os.path | ||
import sys | ||
import traceback | ||
|
||
# Third-party | ||
import requests | ||
from requests.adapters import HTTPAdapter | ||
from urllib3.util.retry import Retry | ||
|
||
sys.path.append(".") | ||
# First-party/Local | ||
import quantify # noqa: E402 | ||
|
||
PATH_REPO_ROOT, PATH_WORK_DIR, PATH_DOTENV, DATETIME_TODAY = quantify.setup( | ||
__file__ | ||
) | ||
|
||
DATA_WRITE_FILE = os.path.join( | ||
PATH_WORK_DIR, | ||
f"data_github_" | ||
f"{DATETIME_TODAY.year}_{DATETIME_TODAY.month}_{DATETIME_TODAY.day}.csv", | ||
) | ||
|
||
|
||
def set_up_data_file(): | ||
"""Writes the header row of the data file.""" | ||
header_title = "LICENSE_TYPE,Repository Count" | ||
with open(DATA_WRITE_FILE, "w") as f: | ||
f.write(f"{header_title}\n") | ||
|
||
|
||
def get_response_elems(license): | ||
"""Provides the metadata for query of specified parameters | ||
Args: | ||
license: | ||
A string representing the type of license, and should be a segment | ||
of its URL towards the license description. Alternatively, the | ||
default None value stands for having no assumption about license | ||
type. | ||
Returns: | ||
dict: A dictionary mapping metadata to its value provided from the API | ||
query of specified parameters. | ||
""" | ||
try: | ||
base_url = "https://api.github.com/search/repositories?q=license:" | ||
request_url = f"{base_url}{license}" | ||
max_retries = Retry( | ||
total=5, | ||
backoff_factor=10, | ||
status_forcelist=[403, 408, 429, 500, 502, 503, 504], | ||
) | ||
session = requests.Session() | ||
session.mount("https://", HTTPAdapter(max_retries=max_retries)) | ||
with session.get(request_url) as response: | ||
response.raise_for_status() | ||
search_data = response.json() | ||
return {"totalResults": search_data["total_count"]} | ||
except Exception as e: | ||
raise e | ||
|
||
|
||
def record_license_data(license_type): | ||
"""Writes the row for LICENSE_TYPE to file to contain Github Query data. | ||
Args: | ||
license_type: | ||
A string representing the type of license, and should be a segment | ||
of its URL towards the license description. Alternatively, the | ||
default None value stands for having no assumption about license | ||
type. | ||
""" | ||
data_log = ( | ||
f"{license_type}," | ||
f"{get_response_elems(license_type)['totalResults']}" | ||
) | ||
with open(DATA_WRITE_FILE, "a") as f: | ||
f.write(f"{data_log}\n") | ||
|
||
|
||
def record_all_licenses(): | ||
"""Records the data of all license types findable in the license list and | ||
records these data into the DATA_WRITE_FILE as specified in that constant. | ||
""" | ||
licenses = ["CC0-1.0", "CC-BY-4.0", "CC-BY-SA-4.0"] | ||
for license in licenses: | ||
record_license_data(license) | ||
|
||
|
||
def main(): | ||
set_up_data_file() | ||
record_all_licenses() | ||
|
||
|
||
if __name__ == "__main__": | ||
try: | ||
main() | ||
except SystemExit as e: | ||
sys.exit(e.code) | ||
except KeyboardInterrupt: | ||
print("INFO (130) Halted via KeyboardInterrupt.", file=sys.stderr) | ||
sys.exit(130) | ||
except Exception: | ||
print("ERROR (1) Unhandled exception:", file=sys.stderr) | ||
print(traceback.print_exc(), file=sys.stderr) | ||
sys.exit(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters