Skip to content

Commit

Permalink
Allow for GitHub Auth with Tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
tdunlap607 committed Sep 26, 2023
1 parent 5aa2a4f commit 6037102
Show file tree
Hide file tree
Showing 4 changed files with 197 additions and 62 deletions.
200 changes: 143 additions & 57 deletions patchparser/github_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,19 @@
"""
import re
import requests
from patchparser.utils import github_helper
import os


class CommitParse:
def __init__(self, repo_owner: str, repo_name: bool, sha: str) -> object:
def __init__(self, repo_owner: str, repo_name: bool, sha: str, commit_exist: bool) -> object:
"""Initialize a class to hold the data for parsing the commit data
Args:
repo_owner (str): Repo owner
repo_name (str): Repo name
sha (str): Target commit SHA
commit_exist (bool): Value from API response
Returns:
object: CommitParse
Expand Down Expand Up @@ -60,6 +63,7 @@ def __init__(self, repo_owner: str, repo_name: bool, sha: str) -> object:
self.commit_verification_verified = None
self.commit_verification_reason = None
self.parents = None
self.commit_exist = commit_exist


def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list:
Expand All @@ -86,52 +90,61 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list:
file_name = row["filename"]
file_number = index
file_extension = file_name.split(".")[-1]
raw_file_patch = row["patch"]
"""Not all will have patches. E.g., PDF files"""
if "patch" in row:
raw_file_patch = row["patch"]
else:
raw_file_patch = None
status = row["status"]
total_file_additions = row["additions"]
total_file_deletions = row["deletions"]
total_file_changes = row["changes"]

"""Patches are None in some instances (e.g., XLSX files)"""
if raw_file_patch is not None:
if raw_file_patch is not None and "patch" in row:
"""Find patch headers (e.g., @@ @@)"""
headers_search = re.findall(r"@@(.*?)@@", raw_file_patch)

"""Cleaning the headers, found @@REPLACE_ME@@ in some random code"""
headers = []
for head_row in headers_search:
if '-' in head_row and '+' in head_row:
# get the original line headers
original_header_lines = re.search(f"@@ -(.*?) \+", f"@@{head_row}@@").group(1)
# make sure the header is of type int
if original_header_lines.split(',')[-1].isdigit():
headers.append(f"@@{head_row}@@")
headers.append(f"@@{head_row}@@")
total_patches = len(headers)

for index, header in enumerate(headers):
patch_number = index
"""Get line numbers changed for original code"""
original_lines = re.search(f"@@ -(.*?) \+", header).group(1)
if "," in original_lines:
original_line_start = int(original_lines.split(",")[0])
original_line_length = int(original_lines.split(",")[1])
else:
"""This occus for added txt files where the total length is 1: appears as @@ -A -B @@"""
original_line_start = int(original_lines)
original_line_length = int(original_lines)
original_line_end = original_line_start + original_line_length - 1

"""Get line numbers changed for modified code"""
modified_lines = re.search(f" \+(.*) @@", header).group(1)
if "," in modified_lines:
modified_line_start = int(modified_lines.split(",")[0])
modified_line_length = int(modified_lines.split(",")[1])
if header == None:
pass
else:
"""This occurs for added binary files the header will appear as @@ -A,X -B @@"""
modified_line_start = int(modified_lines)
modified_line_length = int(modified_lines)
"""Get line numbers changed for original code"""
try:
original_lines = re.search(f"@@ -(.*?) \+", header).group(1)
if "," in original_lines:
original_line_start = int(original_lines.split(",")[0])
original_line_length = int(original_lines.split(",")[1])
else:
"""This occus for added txt files where the total length is 1: appears as @@ -A -B @@"""
original_line_start = int(original_lines)
original_line_length = int(original_lines)
original_line_end = original_line_start + original_line_length - 1
except Exception as e:
print(f"Error on line 133 of github_parser: {str(e)}")

modified_line_end = modified_line_start + modified_line_length - 1
try:
"""Get line numbers changed for modified code"""
modified_lines = re.search(f" \+(.*) @@", header).group(1)
if "," in modified_lines:
modified_line_start = int(modified_lines.split(",")[0])
modified_line_length = int(modified_lines.split(",")[1])
else:
"""This occurs for added binary files the header will appear as @@ -A,X -B @@"""
modified_line_start = int(modified_lines)
modified_line_length = int(modified_lines)

modified_line_end = modified_line_start + modified_line_length - 1
except Exception as e:
print(f"Error on line 148 of github_parser: {str(e)}")

"""Check if length of index is equal to last patch, if so read to end of raw_patch"""
if index + 1 == len(headers):
Expand All @@ -146,7 +159,8 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list:
"""Create a temporary class to hold the parsed patch data"""
temp_parsed_commit = CommitParse(parsed_commit.repo_owner,
parsed_commit.repo_name,
parsed_commit.sha)
parsed_commit.sha,
parsed_commit.commit_exist)

"""Set various values"""
temp_parsed_commit.message = parsed_commit.message
Expand Down Expand Up @@ -196,7 +210,8 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list:
"""Sometimes patch is None (e.g., XLSX files)"""
temp_parsed_commit = CommitParse(parsed_commit.repo_owner,
parsed_commit.repo_name,
parsed_commit.sha)
parsed_commit.sha,
parsed_commit.commit_exist)

temp_parsed_commit.message = parsed_commit.message
temp_parsed_commit.file_name = file_name
Expand Down Expand Up @@ -224,8 +239,12 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list:

"""Append the class as a dictionary to the data list"""
data.append(temp_parsed_commit.__dict__)

return data

if len(data) == 0:
data.append(parsed_commit.__dict__)
return data
else:
return data


def parse_raw_patch(temp_raw_patch: str) -> dict:
Expand Down Expand Up @@ -288,13 +307,15 @@ def parse_raw_patch(temp_raw_patch: str) -> dict:
return patch_parse


def commit(repo_owner: str, repo_name: str, sha: str, verbose=False) -> list:
def commit(repo_owner: str, repo_name: str, sha: str, github_token = False, verbose=False) -> list:
"""Pass the GitHub repo_owner, repo_name, and associated commit to parse.
Args:
repo_owner (str): Target repo owner
repo_name (str): Target repo name
commit_sha (str): Target commit SHA from GitHub
github_token (bool): ENV GitHub Token has been set
verbose (bool): Print aspect of process
Returns:
list: List of dictionaries strcutred around the class CommitParse
Expand All @@ -303,35 +324,100 @@ def commit(repo_owner: str, repo_name: str, sha: str, verbose=False) -> list:
"""Commit info API URL"""
url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/commits/{sha}"

GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')

headers = {'Authorization': 'token %s' % GITHUB_TOKEN}

"""Smart GitHub rate manager"""
github_helper.smart_limit(verbose=verbose)

"""Get the response"""
response = requests.get(url)
if verbose:
print(f"Starting request: {url}")
response = requests.get(url, headers=headers)
response.close()
if verbose:
print(f"Request Complete: {url}")

"""Convert to json"""
commit_info = response.json()

"""Initialize a CommitParse to hold data"""
parsed_commit = CommitParse(repo_owner=repo_owner,
repo_name=repo_name,
sha=commit_info["sha"])
"""Confirm the commit exists"""
if "sha" in commit_info:
commit_exist = True

"""Initialize a CommitParse to hold data"""
parsed_commit = CommitParse(repo_owner=repo_owner,
repo_name=repo_name,
sha=commit_info["sha"],
commit_exist=commit_exist)

"""Add commit message"""
parsed_commit.message = commit_info["commit"]["message"]
parsed_commit.commit_author_name = commit_info["commit"]["author"]["name"]
if commit_info["author"] != None and len(commit_info["author"]) > 0:
parsed_commit.commit_author_login = commit_info["author"]["login"]
parsed_commit.commit_author_email = commit_info["commit"]["author"]["email"]
parsed_commit.commit_author_date = commit_info["commit"]["author"]["date"]
parsed_commit.commit_committer_name = commit_info["commit"]["committer"]["name"]
if commit_info["committer"] != None and len(commit_info["committer"]) > 0:
parsed_commit.commit_committer_login = commit_info["committer"]["login"]
parsed_commit.commit_committer_email = commit_info["commit"]["committer"]["email"]
parsed_commit.commit_committer_date = commit_info["commit"]["committer"]["date"]
parsed_commit.commit_tree_sha = commit_info["commit"]["tree"]["sha"]
parsed_commit.commit_tree_url = commit_info["commit"]["tree"]["url"]
parsed_commit.commit_verification_verified = commit_info["commit"]["verification"]["verified"]
parsed_commit.commit_verification_reason = commit_info["commit"]["verification"]["reason"]
parsed_commit.parents = [z["sha"] for z in commit_info["parents"]]

"""Parse the files"""
parsed_files = parse_commit_info(commit_info["files"], parsed_commit)

if verbose:
print(f"Parsing commit complete: {commit_info['sha']}")

return parsed_files
else:
"""Handles commit errors, e.g., repo was deleted"""
if verbose:
print(f"\nIssue with request:\n{response.json()}")
commit_exist = False
parsed_commit = CommitParse(repo_owner=repo_owner,
repo_name=repo_name,
sha=sha,
commit_exist=commit_exist)

return [parsed_commit.__dict__]



def raw_commit(repo_owner: str, repo_name: str, sha: str, verbose=False) -> dict:
"""Pass the GitHub repo_owner, repo_name, and associated commit to parse.
Args:
repo_owner (str): Target repo owner
repo_name (str): Target repo name
commit_sha (str): Target commit SHA from GitHub
Returns:
dict: Raw commit response
"""

"""Commit info API URL"""
url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/commits/{sha}"

"""Add commit message"""
parsed_commit.message = commit_info["commit"]["message"]
parsed_commit.commit_author_name = commit_info["commit"]["author"]["name"]
parsed_commit.commit_author_login = commit_info["author"]["login"]
parsed_commit.commit_author_email = commit_info["commit"]["author"]["email"]
parsed_commit.commit_author_date = commit_info["commit"]["author"]["date"]
parsed_commit.commit_committer_name = commit_info["commit"]["committer"]["name"]
parsed_commit.commit_committer_login = commit_info["committer"]["login"]
parsed_commit.commit_committer_email = commit_info["commit"]["committer"]["email"]
parsed_commit.commit_committer_date = commit_info["commit"]["committer"]["date"]
parsed_commit.commit_tree_sha = commit_info["commit"]["tree"]["sha"]
parsed_commit.commit_tree_url = commit_info["commit"]["tree"]["url"]
parsed_commit.commit_verification_verified = commit_info["commit"]["verification"]["verified"]
parsed_commit.commit_verification_reason = commit_info["commit"]["verification"]["reason"]
parsed_commit.parents = [z["sha"] for z in commit_info["parents"]]
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')

"""Parse the files"""
parsed_files = parse_commit_info(commit_info["files"], parsed_commit)

return parsed_files
headers = {'Authorization': 'token %s' % GITHUB_TOKEN}

"""Smart GitHub rate manager"""
github_helper.smart_limit(verbose=verbose)

"""Get the response"""
response = requests.get(url, headers=headers)
response.close()

"""Convert to json"""
commit_info = response.json()

return commit_info
1 change: 1 addition & 0 deletions patchparser/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from patchparser.utils import github_helper
47 changes: 47 additions & 0 deletions patchparser/utils/github_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
Helper functions to interact with GitHub
"""
import datetime
import time
import requests
import os

# LOAD the GitHub Token from an environment variable
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')


def github_rate_limit():
"""Obtains the remaining rate limit for your token
Returns:
json: Response of GitHub rate limit
"""
headers = {'Authorization': 'token %s' % GITHUB_TOKEN}
url = "https://api.github.com/rate_limit"
response = requests.get(url, headers=headers)
response.close()
return response.json()


def smart_limit(verbose=False):
"""
Handles the GitHub rate limit issues
"""
rate = github_rate_limit()
rate_limit_remaining = rate['rate']['remaining']
reset = datetime.datetime.fromtimestamp(rate["rate"]["reset"])
if verbose:
print(f"Rate Limit Remaining: {rate_limit_remaining} | "
f"Reset: {reset} | "
f"Current time: {datetime.datetime.now()}")

"""Handles rate limit issues"""
if rate_limit_remaining <= 50:
"""Get seconds until reset occurs"""
time_until_reset = reset - datetime.datetime.now()
print(f"Seconds until reset: {time_until_reset.seconds}")
print(f"Starting sleep countdown now: {datetime.datetime.now()}")
"""Sleep until rate limit reset...add 30 seconds to be safe"""
for i in reversed(range(0, time_until_reset.seconds, 60)):
print(f"Sleep state remaining: {i} seconds.")
time.sleep(60)
11 changes: 6 additions & 5 deletions tests/test_github_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@ def test_github_api(self):
Testing GitHub Parser for a given commit by using the GitHub API
Example commit: https://github.com/tdunlap607/patchparser/commit/0dfe5bacc3833160dbe3ea9edf49cd7d599ad290
"""
parsed = gp.commit(repo_owner="tdunlap607",
repo_name="patchparser",
sha="0dfe5bacc3833160dbe3ea9edf49cd7d599ad290")

parsed = gp.commit(repo_owner="s3c2",
repo_name="vfcfinder",
sha="f573763decf499349721c48f11dc8299a91255d1",
verbose=True)

"""Expecting 5 changes from the above commit"""
self.assertEqual(len(parsed), 5)
self.assertEqual(len(parsed), 9)


if __name__ == '__main__':
Expand Down

0 comments on commit 6037102

Please sign in to comment.