diff --git a/patchparser/github_parser.py b/patchparser/github_parser.py index 4e1d35b..aed2cb8 100644 --- a/patchparser/github_parser.py +++ b/patchparser/github_parser.py @@ -76,12 +76,12 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list: Returns: list: List of dictionaries with desired data for project """ - + """Master list to hold information""" data = [] - + total_files_changed = len(commit_info) - + """ Enumerate through each row withiin commit_info. A row represents changed files in the commit @@ -99,19 +99,19 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list: total_file_additions = row["additions"] total_file_deletions = row["deletions"] total_file_changes = row["changes"] - + """Patches are None in some instances (e.g., XLSX files)""" if raw_file_patch is not None and "patch" in row: """Find patch headers (e.g., @@ @@)""" - headers_search = re.findall(r"@@(.*?)@@", raw_file_patch) - + headers_search = re.findall(r"@@(.*?)@@", raw_file_patch) + """Cleaning the headers, found @@REPLACE_ME@@ in some random code""" headers = [] for head_row in headers_search: if '-' in head_row and '+' in head_row: headers.append(f"@@{head_row}@@") total_patches = len(headers) - + for index, header in enumerate(headers): patch_number = index if header == None: @@ -119,10 +119,13 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list: else: """Get line numbers changed for original code""" try: - original_lines = re.search(f"@@ -(.*?) \+", header).group(1) + original_lines = re.search( + f"@@ -(.*?) \+", header).group(1) if "," in original_lines: - original_line_start = int(original_lines.split(",")[0]) - original_line_length = int(original_lines.split(",")[1]) + original_line_start = int( + original_lines.split(",")[0]) + original_line_length = int( + original_lines.split(",")[1]) else: """This occus for added txt files where the total length is 1: appears as @@ -A -B @@""" original_line_start = int(original_lines) @@ -130,38 +133,42 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list: original_line_end = original_line_start + original_line_length - 1 except Exception as e: print(f"Error on line 133 of github_parser: {str(e)}") - + try: """Get line numbers changed for modified code""" - modified_lines = re.search(f" \+(.*) @@", header).group(1) + modified_lines = re.search( + f" \+(.*) @@", header).group(1) if "," in modified_lines: - modified_line_start = int(modified_lines.split(",")[0]) - modified_line_length = int(modified_lines.split(",")[1]) + modified_line_start = int( + modified_lines.split(",")[0]) + modified_line_length = int( + modified_lines.split(",")[1]) else: """This occurs for added binary files the header will appear as @@ -A,X -B @@""" modified_line_start = int(modified_lines) modified_line_length = int(modified_lines) - + modified_line_end = modified_line_start + modified_line_length - 1 except Exception as e: print(f"Error on line 148 of github_parser: {str(e)}") - + """Check if length of index is equal to last patch, if so read to end of raw_patch""" if index + 1 == len(headers): - raw_patch = raw_file_patch[raw_file_patch.find(headers[index])+len(headers[index]):] + raw_patch = raw_file_patch[raw_file_patch.find( + headers[index])+len(headers[index]):] else: - raw_patch = raw_file_patch[raw_file_patch.find(headers[index])+len(headers[index]):raw_file_patch.find(headers[index+1])] + raw_patch = raw_file_patch[raw_file_patch.find( + headers[index])+len(headers[index]):raw_file_patch.find(headers[index+1])] - """Call the function to help parse the patch to get data""" patch_parse = parse_raw_patch(raw_patch) - - """Create a temporary class to hold the parsed patch data""" + + """Create a temporary class to hold the parsed patch data""" temp_parsed_commit = CommitParse(parsed_commit.repo_owner, parsed_commit.repo_name, parsed_commit.sha, parsed_commit.commit_exist) - + """Set various values""" temp_parsed_commit.message = parsed_commit.message temp_parsed_commit.file_name = file_name @@ -203,7 +210,7 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list: temp_parsed_commit.commit_verification_verified = parsed_commit.commit_verification_verified temp_parsed_commit.commit_verification_reason = parsed_commit.commit_verification_reason temp_parsed_commit.parents = parsed_commit.parents - + """Append the class as a dictionary to the data list""" data.append(temp_parsed_commit.__dict__) else: @@ -212,7 +219,7 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list: parsed_commit.repo_name, parsed_commit.sha, parsed_commit.commit_exist) - + temp_parsed_commit.message = parsed_commit.message temp_parsed_commit.file_name = file_name temp_parsed_commit.file_number = file_number @@ -236,16 +243,16 @@ def parse_commit_info(commit_info: list, parsed_commit: CommitParse) -> list: temp_parsed_commit.commit_verification_verified = parsed_commit.commit_verification_verified temp_parsed_commit.commit_verification_reason = parsed_commit.commit_verification_reason temp_parsed_commit.parents = parsed_commit.parents - + """Append the class as a dictionary to the data list""" data.append(temp_parsed_commit.__dict__) - + if len(data) == 0: data.append(parsed_commit.__dict__) return data else: return data - + def parse_raw_patch(temp_raw_patch: str) -> dict: """Parses a single raw patch into original code and modified code @@ -256,19 +263,19 @@ def parse_raw_patch(temp_raw_patch: str) -> dict: Returns: dict: Simple dictionary with various key values for parsing the raw patch """ - + """Split the code so we can parse line by line""" split_code = temp_raw_patch.splitlines() - + """Create placeholders for desired values""" original_code = [] modified_code = [] - + additions = 0 added_code = [] deletions = 0 deleted_code = [] - + """Loop through each line of code to parse it""" for line in split_code: """[1:] is due to the spaces added from the git diff for +/- indicators in str""" @@ -286,28 +293,28 @@ def parse_raw_patch(temp_raw_patch: str) -> dict: """Add any unchanged lines to original/modified code""" original_code.append(line[1:]) modified_code.append(line[1:]) - + original_code_str = "\n".join(original_code) modified_code_str = "\n".join(modified_code) added_code_str = "\n".join(added_code) deleted_code_str = "\n".join(deleted_code) changes = additions + deletions - + """Create a simple patch to return""" patch_parse = dict( - original_code = original_code_str, - modified_code = modified_code_str, - additions = additions, - added_code = added_code_str, - deletions = deletions, - deleted_code = deleted_code_str, - changes = changes + original_code=original_code_str, + modified_code=modified_code_str, + additions=additions, + added_code=added_code_str, + deletions=deletions, + deleted_code=deleted_code_str, + changes=changes ) - + return patch_parse -def commit(repo_owner: str, repo_name: str, sha: str, github_token = False, verbose=False) -> list: +def commit(repo_owner: str, repo_name: str, sha: str, github_token=False, verbose=False) -> list: """Pass the GitHub repo_owner, repo_name, and associated commit to parse. Args: @@ -320,17 +327,18 @@ def commit(repo_owner: str, repo_name: str, sha: str, github_token = False, verb Returns: list: List of dictionaries strcutred around the class CommitParse """ - + """Commit info API URL""" url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/commits/{sha}" - + GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN') - - headers = {'Authorization': 'token %s' % GITHUB_TOKEN} - + + headers = {'Authorization': 'Bearer %s' % GITHUB_TOKEN} + """Smart GitHub rate manager""" - github_helper.smart_limit(verbose=verbose) - + github_helper.smart_limit(token=GITHUB_TOKEN, + verbose=verbose) + """Get the response""" if verbose: print(f"Starting request: {url}") @@ -338,20 +346,20 @@ def commit(repo_owner: str, repo_name: str, sha: str, github_token = False, verb response.close() if verbose: print(f"Request Complete: {url}") - + """Convert to json""" commit_info = response.json() - + """Confirm the commit exists""" if "sha" in commit_info: commit_exist = True """Initialize a CommitParse to hold data""" - parsed_commit = CommitParse(repo_owner=repo_owner, + parsed_commit = CommitParse(repo_owner=repo_owner, repo_name=repo_name, sha=commit_info["sha"], commit_exist=commit_exist) - + """Add commit message""" parsed_commit.message = commit_info["commit"]["message"] parsed_commit.commit_author_name = commit_info["commit"]["author"]["name"] @@ -366,31 +374,31 @@ def commit(repo_owner: str, repo_name: str, sha: str, github_token = False, verb parsed_commit.commit_committer_date = commit_info["commit"]["committer"]["date"] parsed_commit.commit_tree_sha = commit_info["commit"]["tree"]["sha"] parsed_commit.commit_tree_url = commit_info["commit"]["tree"]["url"] - parsed_commit.commit_verification_verified = commit_info["commit"]["verification"]["verified"] + parsed_commit.commit_verification_verified = commit_info[ + "commit"]["verification"]["verified"] parsed_commit.commit_verification_reason = commit_info["commit"]["verification"]["reason"] parsed_commit.parents = [z["sha"] for z in commit_info["parents"]] - + """Parse the files""" parsed_files = parse_commit_info(commit_info["files"], parsed_commit) if verbose: print(f"Parsing commit complete: {commit_info['sha']}") - + return parsed_files else: """Handles commit errors, e.g., repo was deleted""" if verbose: print(f"\nIssue with request:\n{response.json()}") commit_exist = False - parsed_commit = CommitParse(repo_owner=repo_owner, + parsed_commit = CommitParse(repo_owner=repo_owner, repo_name=repo_name, sha=sha, commit_exist=commit_exist) - + return [parsed_commit.__dict__] - - - + + def raw_commit(repo_owner: str, repo_name: str, sha: str, verbose=False) -> dict: """Pass the GitHub repo_owner, repo_name, and associated commit to parse. @@ -402,22 +410,23 @@ def raw_commit(repo_owner: str, repo_name: str, sha: str, verbose=False) -> dict Returns: dict: Raw commit response """ - + """Commit info API URL""" url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/commits/{sha}" - + GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN') - - headers = {'Authorization': 'token %s' % GITHUB_TOKEN} - + + headers = {'Authorization': 'Bearer %s' % GITHUB_TOKEN} + """Smart GitHub rate manager""" - github_helper.smart_limit(verbose=verbose) - + github_helper.smart_limit(token=GITHUB_TOKEN, + verbose=verbose) + """Get the response""" response = requests.get(url, headers=headers) response.close() - + """Convert to json""" commit_info = response.json() - - return commit_info \ No newline at end of file + + return commit_info diff --git a/patchparser/utils/github_helper.py b/patchparser/utils/github_helper.py index fdcea73..e3e92e0 100644 --- a/patchparser/utils/github_helper.py +++ b/patchparser/utils/github_helper.py @@ -4,36 +4,41 @@ import datetime import time import requests -import os -# LOAD the GitHub Token from an environment variable -GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN') - -def github_rate_limit(): +def github_rate_limit(token: str): """Obtains the remaining rate limit for your token + Args: + token (str): API Token + Returns: json: Response of GitHub rate limit """ - headers = {'Authorization': 'token %s' % GITHUB_TOKEN} + headers = {'Authorization': 'Bearer %s' % token} url = "https://api.github.com/rate_limit" response = requests.get(url, headers=headers) response.close() return response.json() -def smart_limit(verbose=False): +def smart_limit(token: str, verbose=False): """ - Handles the GitHub rate limit issues + Handles the rate limit issues + + Args: + token (str): API Token + + Returns: + json: Response of GitHub rate limit """ - rate = github_rate_limit() + rate = github_rate_limit(token=token) rate_limit_remaining = rate['rate']['remaining'] reset = datetime.datetime.fromtimestamp(rate["rate"]["reset"]) if verbose: print(f"Rate Limit Remaining: {rate_limit_remaining} | " - f"Reset: {reset} | " - f"Current time: {datetime.datetime.now()}") + f"Reset: {reset} | " + f"Current time: {datetime.datetime.now()}") """Handles rate limit issues""" if rate_limit_remaining <= 50: @@ -44,4 +49,4 @@ def smart_limit(verbose=False): """Sleep until rate limit reset...add 30 seconds to be safe""" for i in reversed(range(0, time_until_reset.seconds, 60)): print(f"Sleep state remaining: {i} seconds.") - time.sleep(60) \ No newline at end of file + time.sleep(60) diff --git a/tests/test_github_parser.py b/tests/test_github_parser.py index 0022344..8f69178 100644 --- a/tests/test_github_parser.py +++ b/tests/test_github_parser.py @@ -13,7 +13,7 @@ def test_github_api(self): repo_name="vfcfinder", sha="f573763decf499349721c48f11dc8299a91255d1", verbose=True) - + """Expecting 5 changes from the above commit""" self.assertEqual(len(parsed), 9)