diff --git a/patchparser/github_parser.py b/patchparser/github_parser.py index 3c93b53..4e1d35b 100644 --- a/patchparser/github_parser.py +++ b/patchparser/github_parser.py @@ -3,16 +3,19 @@ """ import re import requests +from patchparser.utils import github_helper +import os class CommitParse: - def __init__(self, repo_owner: str, repo_name: bool, sha: str) -> object: + def __init__(self, repo_owner: str, repo_name: bool, sha: str, commit_exist: bool) -> object: """Initialize a class to hold the data for parsing the commit data Args: repo_owner (str): Repo owner repo_name (str): Repo name sha (str): Target commit SHA + commit_exist (bool): Value from API response Returns: object: CommitParse @@ -60,6 +63,7 @@ def __init__(self, repo_owner: str, repo_name: bool, sha: str) -> object: self.commit_verification_verified = None self.commit_verification_reason = None self.parents = None + self.commit_exist = commit_exist def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list: @@ -86,14 +90,18 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list: file_name = row["filename"] file_number = index file_extension = file_name.split(".")[-1] - raw_file_patch = row["patch"] + """Not all will have patches. E.g., PDF files""" + if "patch" in row: + raw_file_patch = row["patch"] + else: + raw_file_patch = None status = row["status"] total_file_additions = row["additions"] total_file_deletions = row["deletions"] total_file_changes = row["changes"] """Patches are None in some instances (e.g., XLSX files)""" - if raw_file_patch is not None: + if raw_file_patch is not None and "patch" in row: """Find patch headers (e.g., @@ @@)""" headers_search = re.findall(r"@@(.*?)@@", raw_file_patch) @@ -101,37 +109,42 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list: headers = [] for head_row in headers_search: if '-' in head_row and '+' in head_row: - # get the original line headers - original_header_lines = re.search(f"@@ -(.*?) \+", f"@@{head_row}@@").group(1) - # make sure the header is of type int - if original_header_lines.split(',')[-1].isdigit(): - headers.append(f"@@{head_row}@@") + headers.append(f"@@{head_row}@@") total_patches = len(headers) for index, header in enumerate(headers): patch_number = index - """Get line numbers changed for original code""" - original_lines = re.search(f"@@ -(.*?) \+", header).group(1) - if "," in original_lines: - original_line_start = int(original_lines.split(",")[0]) - original_line_length = int(original_lines.split(",")[1]) - else: - """This occus for added txt files where the total length is 1: appears as @@ -A -B @@""" - original_line_start = int(original_lines) - original_line_length = int(original_lines) - original_line_end = original_line_start + original_line_length - 1 - - """Get line numbers changed for modified code""" - modified_lines = re.search(f" \+(.*) @@", header).group(1) - if "," in modified_lines: - modified_line_start = int(modified_lines.split(",")[0]) - modified_line_length = int(modified_lines.split(",")[1]) + if header == None: + pass else: - """This occurs for added binary files the header will appear as @@ -A,X -B @@""" - modified_line_start = int(modified_lines) - modified_line_length = int(modified_lines) + """Get line numbers changed for original code""" + try: + original_lines = re.search(f"@@ -(.*?) \+", header).group(1) + if "," in original_lines: + original_line_start = int(original_lines.split(",")[0]) + original_line_length = int(original_lines.split(",")[1]) + else: + """This occus for added txt files where the total length is 1: appears as @@ -A -B @@""" + original_line_start = int(original_lines) + original_line_length = int(original_lines) + original_line_end = original_line_start + original_line_length - 1 + except Exception as e: + print(f"Error on line 133 of github_parser: {str(e)}") - modified_line_end = modified_line_start + modified_line_length - 1 + try: + """Get line numbers changed for modified code""" + modified_lines = re.search(f" \+(.*) @@", header).group(1) + if "," in modified_lines: + modified_line_start = int(modified_lines.split(",")[0]) + modified_line_length = int(modified_lines.split(",")[1]) + else: + """This occurs for added binary files the header will appear as @@ -A,X -B @@""" + modified_line_start = int(modified_lines) + modified_line_length = int(modified_lines) + + modified_line_end = modified_line_start + modified_line_length - 1 + except Exception as e: + print(f"Error on line 148 of github_parser: {str(e)}") """Check if length of index is equal to last patch, if so read to end of raw_patch""" if index + 1 == len(headers): @@ -146,7 +159,8 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list: """Create a temporary class to hold the parsed patch data""" temp_parsed_commit = CommitParse(parsed_commit.repo_owner, parsed_commit.repo_name, - parsed_commit.sha) + parsed_commit.sha, + parsed_commit.commit_exist) """Set various values""" temp_parsed_commit.message = parsed_commit.message @@ -196,7 +210,8 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list: """Sometimes patch is None (e.g., XLSX files)""" temp_parsed_commit = CommitParse(parsed_commit.repo_owner, parsed_commit.repo_name, - parsed_commit.sha) + parsed_commit.sha, + parsed_commit.commit_exist) temp_parsed_commit.message = parsed_commit.message temp_parsed_commit.file_name = file_name @@ -224,8 +239,12 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list: """Append the class as a dictionary to the data list""" data.append(temp_parsed_commit.__dict__) - - return data + + if len(data) == 0: + data.append(parsed_commit.__dict__) + return data + else: + return data def parse_raw_patch(temp_raw_patch: str) -> dict: @@ -288,13 +307,15 @@ def parse_raw_patch(temp_raw_patch: str) -> dict: return patch_parse -def commit(repo_owner: str, repo_name: str, sha: str, verbose=False) -> list: +def commit(repo_owner: str, repo_name: str, sha: str, github_token = False, verbose=False) -> list: """Pass the GitHub repo_owner, repo_name, and associated commit to parse. Args: repo_owner (str): Target repo owner repo_name (str): Target repo name commit_sha (str): Target commit SHA from GitHub + github_token (bool): ENV GitHub Token has been set + verbose (bool): Print aspect of process Returns: list: List of dictionaries strcutred around the class CommitParse @@ -303,35 +324,100 @@ def commit(repo_owner: str, repo_name: str, sha: str, verbose=False) -> list: """Commit info API URL""" url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/commits/{sha}" + GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN') + + headers = {'Authorization': 'token %s' % GITHUB_TOKEN} + + """Smart GitHub rate manager""" + github_helper.smart_limit(verbose=verbose) + """Get the response""" - response = requests.get(url) + if verbose: + print(f"Starting request: {url}") + response = requests.get(url, headers=headers) response.close() + if verbose: + print(f"Request Complete: {url}") """Convert to json""" commit_info = response.json() - """Initialize a CommitParse to hold data""" - parsed_commit = CommitParse(repo_owner=repo_owner, - repo_name=repo_name, - sha=commit_info["sha"]) + """Confirm the commit exists""" + if "sha" in commit_info: + commit_exist = True + + """Initialize a CommitParse to hold data""" + parsed_commit = CommitParse(repo_owner=repo_owner, + repo_name=repo_name, + sha=commit_info["sha"], + commit_exist=commit_exist) + + """Add commit message""" + parsed_commit.message = commit_info["commit"]["message"] + parsed_commit.commit_author_name = commit_info["commit"]["author"]["name"] + if commit_info["author"] != None and len(commit_info["author"]) > 0: + parsed_commit.commit_author_login = commit_info["author"]["login"] + parsed_commit.commit_author_email = commit_info["commit"]["author"]["email"] + parsed_commit.commit_author_date = commit_info["commit"]["author"]["date"] + parsed_commit.commit_committer_name = commit_info["commit"]["committer"]["name"] + if commit_info["committer"] != None and len(commit_info["committer"]) > 0: + parsed_commit.commit_committer_login = commit_info["committer"]["login"] + parsed_commit.commit_committer_email = commit_info["commit"]["committer"]["email"] + parsed_commit.commit_committer_date = commit_info["commit"]["committer"]["date"] + parsed_commit.commit_tree_sha = commit_info["commit"]["tree"]["sha"] + parsed_commit.commit_tree_url = commit_info["commit"]["tree"]["url"] + parsed_commit.commit_verification_verified = commit_info["commit"]["verification"]["verified"] + parsed_commit.commit_verification_reason = commit_info["commit"]["verification"]["reason"] + parsed_commit.parents = [z["sha"] for z in commit_info["parents"]] + + """Parse the files""" + parsed_files = parse_commit_info(commit_info["files"], parsed_commit) + + if verbose: + print(f"Parsing commit complete: {commit_info['sha']}") + + return parsed_files + else: + """Handles commit errors, e.g., repo was deleted""" + if verbose: + print(f"\nIssue with request:\n{response.json()}") + commit_exist = False + parsed_commit = CommitParse(repo_owner=repo_owner, + repo_name=repo_name, + sha=sha, + commit_exist=commit_exist) + + return [parsed_commit.__dict__] + + + +def raw_commit(repo_owner: str, repo_name: str, sha: str, verbose=False) -> dict: + """Pass the GitHub repo_owner, repo_name, and associated commit to parse. + + Args: + repo_owner (str): Target repo owner + repo_name (str): Target repo name + commit_sha (str): Target commit SHA from GitHub + + Returns: + dict: Raw commit response + """ + + """Commit info API URL""" + url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/commits/{sha}" - """Add commit message""" - parsed_commit.message = commit_info["commit"]["message"] - parsed_commit.commit_author_name = commit_info["commit"]["author"]["name"] - parsed_commit.commit_author_login = commit_info["author"]["login"] - parsed_commit.commit_author_email = commit_info["commit"]["author"]["email"] - parsed_commit.commit_author_date = commit_info["commit"]["author"]["date"] - parsed_commit.commit_committer_name = commit_info["commit"]["committer"]["name"] - parsed_commit.commit_committer_login = commit_info["committer"]["login"] - parsed_commit.commit_committer_email = commit_info["commit"]["committer"]["email"] - parsed_commit.commit_committer_date = commit_info["commit"]["committer"]["date"] - parsed_commit.commit_tree_sha = commit_info["commit"]["tree"]["sha"] - parsed_commit.commit_tree_url = commit_info["commit"]["tree"]["url"] - parsed_commit.commit_verification_verified = commit_info["commit"]["verification"]["verified"] - parsed_commit.commit_verification_reason = commit_info["commit"]["verification"]["reason"] - parsed_commit.parents = [z["sha"] for z in commit_info["parents"]] + GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN') - """Parse the files""" - parsed_files = parse_commit_info(commit_info["files"], parsed_commit) - - return parsed_files \ No newline at end of file + headers = {'Authorization': 'token %s' % GITHUB_TOKEN} + + """Smart GitHub rate manager""" + github_helper.smart_limit(verbose=verbose) + + """Get the response""" + response = requests.get(url, headers=headers) + response.close() + + """Convert to json""" + commit_info = response.json() + + return commit_info \ No newline at end of file diff --git a/patchparser/utils/__init__.py b/patchparser/utils/__init__.py new file mode 100644 index 0000000..90c775e --- /dev/null +++ b/patchparser/utils/__init__.py @@ -0,0 +1 @@ +from patchparser.utils import github_helper \ No newline at end of file diff --git a/patchparser/utils/github_helper.py b/patchparser/utils/github_helper.py new file mode 100644 index 0000000..fdcea73 --- /dev/null +++ b/patchparser/utils/github_helper.py @@ -0,0 +1,47 @@ +""" +Helper functions to interact with GitHub +""" +import datetime +import time +import requests +import os + +# LOAD the GitHub Token from an environment variable +GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN') + + +def github_rate_limit(): + """Obtains the remaining rate limit for your token + + Returns: + json: Response of GitHub rate limit + """ + headers = {'Authorization': 'token %s' % GITHUB_TOKEN} + url = "https://api.github.com/rate_limit" + response = requests.get(url, headers=headers) + response.close() + return response.json() + + +def smart_limit(verbose=False): + """ + Handles the GitHub rate limit issues + """ + rate = github_rate_limit() + rate_limit_remaining = rate['rate']['remaining'] + reset = datetime.datetime.fromtimestamp(rate["rate"]["reset"]) + if verbose: + print(f"Rate Limit Remaining: {rate_limit_remaining} | " + f"Reset: {reset} | " + f"Current time: {datetime.datetime.now()}") + + """Handles rate limit issues""" + if rate_limit_remaining <= 50: + """Get seconds until reset occurs""" + time_until_reset = reset - datetime.datetime.now() + print(f"Seconds until reset: {time_until_reset.seconds}") + print(f"Starting sleep countdown now: {datetime.datetime.now()}") + """Sleep until rate limit reset...add 30 seconds to be safe""" + for i in reversed(range(0, time_until_reset.seconds, 60)): + print(f"Sleep state remaining: {i} seconds.") + time.sleep(60) \ No newline at end of file diff --git a/tests/test_github_parser.py b/tests/test_github_parser.py index 51e3d4d..0022344 100644 --- a/tests/test_github_parser.py +++ b/tests/test_github_parser.py @@ -9,12 +9,13 @@ def test_github_api(self): Testing GitHub Parser for a given commit by using the GitHub API Example commit: https://github.com/tdunlap607/patchparser/commit/0dfe5bacc3833160dbe3ea9edf49cd7d599ad290 """ - parsed = gp.commit(repo_owner="tdunlap607", - repo_name="patchparser", - sha="0dfe5bacc3833160dbe3ea9edf49cd7d599ad290") - + parsed = gp.commit(repo_owner="s3c2", + repo_name="vfcfinder", + sha="f573763decf499349721c48f11dc8299a91255d1", + verbose=True) + """Expecting 5 changes from the above commit""" - self.assertEqual(len(parsed), 5) + self.assertEqual(len(parsed), 9) if __name__ == '__main__':