Skip to content

Commit

Permalink
Merge pull request #101 from Darylgolden/add-github-source
Browse files Browse the repository at this point in the history
Add GitHub repository tracking
  • Loading branch information
TimidRobot authored Apr 5, 2024
2 parents 8412423 + 5e0475c commit 5b4f4ae
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 0 deletions.
4 changes: 4 additions & 0 deletions github/data_github_2024_4_5.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
LICENSE_TYPE,Repository Count
CC0-1.0,239474
CC-BY-4.0,89918
CC-BY-SA-4.0,23318
106 changes: 106 additions & 0 deletions github/github_scratcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Standard library
import os
import os.path
import sys
import traceback

# Third-party
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

sys.path.append(".")
# First-party/Local
import quantify # noqa: E402

PATH_REPO_ROOT, PATH_WORK_DIR, PATH_DOTENV, DATETIME_TODAY = quantify.setup(
__file__
)

DATA_WRITE_FILE = os.path.join(
PATH_WORK_DIR,
f"data_github_"
f"{DATETIME_TODAY.year}_{DATETIME_TODAY.month}_{DATETIME_TODAY.day}.csv",
)


def set_up_data_file():
"""Writes the header row of the data file."""
header_title = "LICENSE_TYPE,Repository Count"
with open(DATA_WRITE_FILE, "w") as f:
f.write(f"{header_title}\n")


def get_response_elems(license):
"""Provides the metadata for query of specified parameters
Args:
license:
A string representing the type of license, and should be a segment
of its URL towards the license description. Alternatively, the
default None value stands for having no assumption about license
type.
Returns:
dict: A dictionary mapping metadata to its value provided from the API
query of specified parameters.
"""
try:
base_url = "https://api.github.com/search/repositories?q=license:"
request_url = f"{base_url}{license}"
max_retries = Retry(
total=5,
backoff_factor=10,
status_forcelist=[403, 408, 429, 500, 502, 503, 504],
)
session = requests.Session()
session.mount("https://", HTTPAdapter(max_retries=max_retries))
with session.get(request_url) as response:
response.raise_for_status()
search_data = response.json()
return {"totalResults": search_data["total_count"]}
except Exception as e:
raise e


def record_license_data(license_type):
"""Writes the row for LICENSE_TYPE to file to contain Github Query data.
Args:
license_type:
A string representing the type of license, and should be a segment
of its URL towards the license description. Alternatively, the
default None value stands for having no assumption about license
type.
"""
data_log = (
f"{license_type},"
f"{get_response_elems(license_type)['totalResults']}"
)
with open(DATA_WRITE_FILE, "a") as f:
f.write(f"{data_log}\n")


def record_all_licenses():
"""Records the data of all license types findable in the license list and
records these data into the DATA_WRITE_FILE as specified in that constant.
"""
licenses = ["CC0-1.0", "CC-BY-4.0", "CC-BY-SA-4.0"]
for license in licenses:
record_license_data(license)


def main():
set_up_data_file()
record_all_licenses()


if __name__ == "__main__":
try:
main()
except SystemExit as e:
sys.exit(e.code)
except KeyboardInterrupt:
print("INFO (130) Halted via KeyboardInterrupt.", file=sys.stderr)
sys.exit(130)
except Exception:
print("ERROR (1) Unhandled exception:", file=sys.stderr)
print(traceback.print_exc(), file=sys.stderr)
sys.exit(1)
13 changes: 13 additions & 0 deletions sources.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,19 @@ The Flickr Developer Guide](https://www.flickr.com/services/developer/))
- Query limit: 3600 requests per hour
- Data available through CSV format

## GitHub

**Description:** A development platform for hosting and managing code.

**API documentation link:**
- [GitHub REST API v3](https://docs.github.com/en/rest)

**API information:**
- API key not required but recommended by GitHub
- Query limit: 60 requests per hour if unauthenticated,
5000 requests per hour if authenticated
- Data available through JSON format


## Google Custom Search JSON API

Expand Down

0 comments on commit 5b4f4ae

Please sign in to comment.