From 9ab76e756e4db1ef45808b34e01983467c56393a Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 10:16:56 -0600 Subject: [PATCH 1/8] address linting errors in data_parse.py Signed-off-by: Isaac Milarsky --- .pylintrc | 2 +- augur/api/view/routes.py | 1 + augur/api/view/utils.py | 1 + augur/application/db/data_parse.py | 301 ++++++++++++++++++++++- augur/tasks/gitlab/issues_task.py | 2 +- augur/tasks/gitlab/merge_request_task.py | 4 +- 6 files changed, 298 insertions(+), 13 deletions(-) diff --git a/.pylintrc b/.pylintrc index 0b1b7d2049..0056af873b 100644 --- a/.pylintrc +++ b/.pylintrc @@ -12,7 +12,7 @@ #refactoring checker #enable=R -disable=E0611,E1101,W1203,R0801,W0614,W0611,C0411,C0103,C0301,C0303,C0304,C0305,W0311 +disable=E0611,E1101,W1203,R0801,W0614,W0611,C0411,C0103,C0301,C0303,C0304,C0305,W0311,E0401 # Analyse import fallback blocks. This can be used to support both Python 2 and diff --git a/augur/api/view/routes.py b/augur/api/view/routes.py index bf6e8fc056..0c47afbc2e 100644 --- a/augur/api/view/routes.py +++ b/augur/api/view/routes.py @@ -1,4 +1,5 @@ import logging +import math from flask import Flask, render_template, render_template_string, request, abort, jsonify, redirect, url_for, session, flash from sqlalchemy.orm.exc import NoResultFound from .utils import * diff --git a/augur/api/view/utils.py b/augur/api/view/utils.py index 7712873b55..043dd44831 100644 --- a/augur/api/view/utils.py +++ b/augur/api/view/utils.py @@ -1,6 +1,7 @@ from pathlib import Path from concurrent.futures import ThreadPoolExecutor from flask import render_template, flash, url_for, Flask +from .init import init_logging from .init import * from ..server import app, db_session from augur.application.config import AugurConfig diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 2d5c51a899..088c482988 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -39,6 +39,20 @@ def extract_needed_pr_label_data(labels: List[dict], repo_id: int, tool_source: def extract_needed_mr_label_data(labels: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for mr label data from the api response + + Arguments: + labels: List of dictionaries of label data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of parsed label dicts + """ if len(labels) == 0: return [] @@ -65,8 +79,21 @@ def extract_needed_mr_label_data(labels: List[dict], repo_id: int, tool_source: return label_dicts -# retrieve only the needed data for pr assignees from the api response def extract_needed_pr_assignee_data(assignees: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for pr assignees from the api response + + Arguments: + assignees: List of dictionaries of asignee data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of parsed asignee dicts + """ if len(assignees) == 0: return [] @@ -89,6 +116,20 @@ def extract_needed_pr_assignee_data(assignees: List[dict], repo_id: int, tool_so return assignee_dicts def extract_needed_merge_request_assignee_data(assignees: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for merge request assignees from the api response + + Arguments: + assignees: List of dictionaries of asignee data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of parsed asignee dicts + """ if len(assignees) == 0: return [] @@ -112,8 +153,21 @@ def extract_needed_merge_request_assignee_data(assignees: List[dict], repo_id: i -# retrieve only the needed data for pr reviewers from the api response def extract_needed_pr_reviewer_data(reviewers: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for pr reviewers from the api response + + Arguments: + reviewers: List of dictionaries of reviewer data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of parsed reviewer dicts + """ if len(reviewers) == 0: return [] @@ -299,6 +353,20 @@ def extract_needed_issue_assignee_data(assignees: List[dict], repo_id: int, tool return assignee_dicts def extract_needed_gitlab_issue_assignee_data(assignees: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for gitlab issue assignees from the api response + + Arguments: + assignees: List of dictionaries of gitlab assignee data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of parsed assignee dicts + """ if len(assignees) == 0: return [] @@ -351,6 +419,20 @@ def extract_needed_issue_label_data(labels: List[dict], repo_id: int, tool_sourc def extract_needed_gitlab_issue_label_data(labels: List[dict], repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for gitlab issue labels from the api response + + Arguments: + labels: List of dictionaries of gitlab issue label data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of parsed label dicts + """ if len(labels) == 0: return [] @@ -376,8 +458,22 @@ def extract_needed_gitlab_issue_label_data(labels: List[dict], repo_id: int, too -# retrieve only the needed data for pr labels from the api response def extract_needed_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for pr labels from the api response + + Arguments: + message: Message data dict + issue_id: id of the issue + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + Dict of message ref data. + """ message_ref_dict = { 'issue_id': issue_id, @@ -409,7 +505,19 @@ def extract_needed_pr_message_ref_data(comment: dict, pull_request_id: int, repo def extract_needed_pr_data(pr, repo_id, tool_source, tool_version): + """ + Retrieve only the needed data for the pr api response + + Arguments: + pr: PR data dict + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + + Returns: + Parsed pr dict + """ pr = { 'repo_id': repo_id, @@ -468,6 +576,20 @@ def extract_needed_pr_data(pr, repo_id, tool_source, tool_version): return pr def extract_needed_issue_data(issue: dict, repo_id: int, tool_source: str, tool_version: str, data_source: str): + """ + Retrieve only the needed data for the issue api response + + Arguments: + issue: Issue data dict + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: platform source + + + Returns: + Parsed issue dict + """ dict_data = { 'cntrb_id': None, # this the contributor who closed the issue @@ -612,6 +734,19 @@ def extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, return review_row def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, tool_version): + """ + Retrieve only the needed data for the pr gitlab api response + + Arguments: + pr: PR data dict + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + + + Returns: + Parsed pr dict + """ pr_dict = { 'repo_id': repo_id, @@ -659,6 +794,20 @@ def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, t def extract_needed_issue_data_from_gitlab_issue(issue: dict, repo_id: int, tool_source: str, tool_version: str, data_source: str): + """ + Retrieve only the needed data for the issue gitlab api response + + Arguments: + issue: Issue data dict + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: platform source + + + Returns: + Parsed issue dict + """ issue_dict = { "repo_id": repo_id, @@ -692,6 +841,22 @@ def extract_needed_issue_data_from_gitlab_issue(issue: dict, repo_id: int, tool_ def extract_gitlab_mr_event_data(event: dict, pr_id: int, platform_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: + """ + Retrieve only the needed data for the mr event gitlab api response + + Arguments: + event: Event data dict + pr_id: id of the pr + platform_id: id of the platform + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: platform source + + + Returns: + Parsed event dict + """ mr_event = { 'pull_request_id': pr_id, @@ -712,6 +877,22 @@ def extract_gitlab_mr_event_data(event: dict, pr_id: int, platform_id: int, repo return mr_event def extract_gitlab_issue_event_data(event: dict, issue_id: int, platform_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: + """ + Retrieve only the needed data for the issue event gitlab api response + + Arguments: + event: Event data dict + issue_id: id of the issue + platform_id: id of the platform + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: platform source + + + Returns: + Parsed event dict + """ issue_event = { "issue_event_src_id": event['target_id'], @@ -732,8 +913,21 @@ def extract_gitlab_issue_event_data(event: dict, issue_id: int, platform_id: int return issue_event -# retrieve only the needed data for pr reviewers from the api response -def extract_needed_mr_reviewer_data(data: List[dict], pull_request_id, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_mr_reviewer_data(data: List[dict], pull_request_id, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for pr reviewers from the api response + + Arguments: + data: List of dictionaries that contain mr reviewer data to parse + pull_request_id: id of the PR + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + List of extracted relevent data from needed mr reviwer data + """ if len(data) == 0: return [] @@ -741,7 +935,7 @@ def extract_needed_mr_reviewer_data(data: List[dict], pull_request_id, repo_id: reviewer_dicts = [] for x in data: - for reviewer in x["suggested_approvers"]: + for _ in x["suggested_approvers"]: reviewer_dict = { 'pull_request_id': pull_request_id, @@ -757,6 +951,21 @@ def extract_needed_mr_reviewer_data(data: List[dict], pull_request_id, repo_id: def extract_needed_mr_commit_data(commit, repo_id, pull_request_id, tool_source, tool_version, data_source): + """ + Retrieve only the needed data for mr commit data from the api response + + Arguments: + commit: commit data dictionary + repo_id: augur id of the repository + pull_request_id: id of the PR + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + Dictionary of the extracted commit data + """ commit = { 'pull_request_id': pull_request_id, @@ -773,7 +982,21 @@ def extract_needed_mr_commit_data(commit, repo_id, pull_request_id, tool_source, def extract_needed_mr_file_data(gitlab_file_data, repo_id, pull_request_id, tool_source, tool_version, data_source): + """ + Retrieve only the needed data for mr file data from the api response + + Arguments: + gitlab_file_data: file data dictionary + repo_id: augur id of the repository + pull_request_id: id of the PR + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + Returns: + List of dicts of parsed gitlab file changes + """ files = [] changes = gitlab_file_data["changes"] @@ -781,7 +1004,7 @@ def extract_needed_mr_file_data(gitlab_file_data, repo_id, pull_request_id, tool try: deletes = int(file_changes['diff'].split('@@')[1].strip().split(' ')[0].split(',')[1]) adds = int(file_changes['diff'].split('@@')[1].strip().split(' ')[1].split(',')[1]) - except: + except Exception: deletes = 0 adds = 0 @@ -802,7 +1025,21 @@ def extract_needed_mr_file_data(gitlab_file_data, repo_id, pull_request_id, tool def extract_needed_mr_metadata(mr_dict, repo_id, pull_request_id, tool_source, tool_version, data_source): + """ + Retrieve only the needed data for mr metadata from the api response + + Arguments: + mr_dict: mr data dictionary + repo_id: augur id of the repository + pull_request_id: id of the PR + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + Returns: + List of dicts of parsed mr metadata + """ head = {'sha': mr_dict['diff_refs']['head_sha'], 'ref': mr_dict['target_branch'], 'label': str(mr_dict['target_project_id']) + ':' + mr_dict['target_branch'], @@ -841,6 +1078,22 @@ def extract_needed_mr_metadata(mr_dict, repo_id, pull_request_id, tool_source, t def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Extract the message id for a given message on an issue from an api response + and connect it to the relevent repo id. + + Arguments: + message: message data dict + issue_id: id of the issue + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + Dict containing the message ref id as well as the repo id. + """ message_ref_dict = { 'issue_id': issue_id, @@ -855,7 +1108,22 @@ def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, r return message_ref_dict -def extract_needed_gitlab_message_data(comment: dict, platform_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str): +def extract_needed_gitlab_message_data(comment: dict, platform_id: int, tool_source: str, tool_version: str, data_source: str): + """ + Extract specific metadata for a comment from an api response + and connect it to the relevent platform id. + + Arguments: + comment: comment data dict + platform_id: augur id of the relevant platform + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + Dict containing parsed comment text and metadata + """ comment_dict = { "pltfrm_id": platform_id, @@ -870,8 +1138,23 @@ def extract_needed_gitlab_message_data(comment: dict, platform_id: int, repo_id: return comment_dict -# retrieve only the needed data for pr labels from the api response def extract_needed_gitlab_mr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: + """ + Retrieve only the needed data for pr labels from the api response + + Arguments: + comment: comment data dict + pull_request_id: id of the PR + repo_id: augur id of the repository + platform_id: augur id of the relevant platform + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + + Returns: + Dict containing the comment, pr and repo id of the parsed comment data. + """ pr_msg_ref = { 'pull_request_id': pull_request_id, diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index 7f0c7787ee..43b9c62942 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -238,7 +238,7 @@ def process_gitlab_issue_messages(data, task_name, repo_id, logger, augur_db): } message_dicts.append( - extract_needed_gitlab_message_data(message, platform_id, repo_id, tool_source, tool_version, data_source) + extract_needed_gitlab_message_data(message, platform_id, tool_source, tool_version, data_source) ) diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index 5672a79895..5a183d0b0b 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -184,7 +184,7 @@ def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db): } message_dicts.append( - extract_needed_gitlab_message_data(message, platform_id, repo_id, tool_source, tool_version, data_source) + extract_needed_gitlab_message_data(message, platform_id, tool_source, tool_version, data_source) ) @@ -299,7 +299,7 @@ def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): pull_request_id = mr_number_to_id_map[id] - reviewers = extract_needed_mr_reviewer_data(values, pull_request_id, repo_id, tool_source, tool_version, data_source) + reviewers = extract_needed_mr_reviewer_data(values, pull_request_id, tool_source, tool_version, data_source) all_reviewers += reviewers From b166fd6b466ad3ac94baf6eeb33de24c4db92c43 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 10:24:22 -0600 Subject: [PATCH 2/8] fix typos and add module doc-strings Signed-off-by: Isaac Milarsky --- augur/api/view/routes.py | 3 +++ augur/api/view/utils.py | 3 +++ augur/application/db/data_parse.py | 6 +++--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/augur/api/view/routes.py b/augur/api/view/routes.py index 0c47afbc2e..3a3fcccd03 100644 --- a/augur/api/view/routes.py +++ b/augur/api/view/routes.py @@ -1,3 +1,6 @@ +""" +Defines the api routes for the augur views +""" import logging import math from flask import Flask, render_template, render_template_string, request, abort, jsonify, redirect, url_for, session, flash diff --git a/augur/api/view/utils.py b/augur/api/view/utils.py index 043dd44831..298e9950ae 100644 --- a/augur/api/view/utils.py +++ b/augur/api/view/utils.py @@ -1,3 +1,6 @@ +""" +Defines utility functions used by the augur api views +""" from pathlib import Path from concurrent.futures import ThreadPoolExecutor from flask import render_template, flash, url_for, Flask diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 088c482988..7562181398 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -926,7 +926,7 @@ def extract_needed_mr_reviewer_data(data: List[dict], pull_request_id, tool_sour Returns: - List of extracted relevent data from needed mr reviwer data + List of extracted relevant data from needed mr reviwer data """ if len(data) == 0: @@ -1080,7 +1080,7 @@ def extract_needed_mr_metadata(mr_dict, repo_id, pull_request_id, tool_source, t def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: """ Extract the message id for a given message on an issue from an api response - and connect it to the relevent repo id. + and connect it to the relevant repo id. Arguments: message: message data dict @@ -1111,7 +1111,7 @@ def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, r def extract_needed_gitlab_message_data(comment: dict, platform_id: int, tool_source: str, tool_version: str, data_source: str): """ Extract specific metadata for a comment from an api response - and connect it to the relevent platform id. + and connect it to the relevant platform id. Arguments: comment: comment data dict From 7f205db4869d2532d25dc1b05562477d961481db Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 11:25:51 -0600 Subject: [PATCH 3/8] add docs to pull_requests/tasks.py Signed-off-by: Isaac Milarsky --- augur/tasks/github/pull_requests/tasks.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 478260bcd7..8db394754c 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -76,7 +76,16 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth) -> None: def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db): - + """ + Parse and insert all retrieved PR data. + + Arguments: + pull_requests: List of paginated pr endpoint data + task_name: Name of the calling task and the repo + repo_id: augur id of the repository + logger: logging object + augur_db: sqlalchemy db object + """ tool_source = "Pr Task" tool_version = "2.0" data_source = "Github API" From 9273baac4c842b1d9aa42ccd49d656329c326826 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 11:43:48 -0600 Subject: [PATCH 4/8] apply linting to events_task.py for gitlab methods Signed-off-by: Isaac Milarsky --- augur/tasks/github/util/util.py | 11 ++++++ augur/tasks/gitlab/events_task.py | 61 ++++++++++++++++++++++++++++--- 2 files changed, 66 insertions(+), 6 deletions(-) diff --git a/augur/tasks/github/util/util.py b/augur/tasks/github/util/util.py index 0400b82e1a..42989dcca3 100644 --- a/augur/tasks/github/util/util.py +++ b/augur/tasks/github/util/util.py @@ -58,6 +58,17 @@ def parse_json_response(logger: logging.Logger, response: httpx.Response) -> dic return json.loads(json.dumps(response.text)) def get_repo_weight_by_issue(logger,repo_git): + """ + Retrieve the sum of the number of issues and prs in a repository from a graphql query. + + Arguments: + logger: logger object + repo_git: repository url + + Returns: + Sum of issues and prs for that repo + """ + from augur.tasks.github.util.gh_graphql_entities import GitHubRepo as GitHubRepoGraphql owner,name = get_owner_repo(repo_git) diff --git a/augur/tasks/gitlab/events_task.py b/augur/tasks/gitlab/events_task.py index 4224988b9f..8058831ba3 100644 --- a/augur/tasks/gitlab/events_task.py +++ b/augur/tasks/gitlab/events_task.py @@ -1,3 +1,6 @@ +""" +Module to define the task methods to collect gitlab event data for augur +""" import logging from augur.tasks.init.celery_app import celery_app as celery @@ -13,6 +16,12 @@ @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_issue_events(repo_git) -> int: + """ + Retrieve and parse gitlab events for the desired repo + + Arguments: + repo_git: the repo url string + """ owner, repo = get_owner_repo(repo_git) @@ -36,6 +45,13 @@ def collect_gitlab_issue_events(repo_git) -> int: @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_merge_request_events(repo_git) -> int: + """ + Retrieve and parse gitlab mrs for the desired repo + + Arguments: + repo_git: the repo url string + """ + owner, repo = get_owner_repo(repo_git) @@ -57,13 +73,22 @@ def collect_gitlab_merge_request_events(repo_git) -> int: logger.info(f"{owner}/{repo} has no gitlab merge request events") -def retrieve_all_gitlab_event_data(type, repo_git, logger, key_auth) -> None: +def retrieve_all_gitlab_event_data(gtype, repo_git, logger, key_auth) -> None: + """ + Retrieve only the needed data for mr label data from the api response + + Arguments: + gtype: type of event data + repo_git: url of the relevant repo + logger: loggin object + key_auth: key auth cache and rotator object + """ owner, repo = get_owner_repo(repo_git) logger.info(f"Collecting gitlab issue events for {owner}/{repo}") - url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type={type}" + url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type={gtype}" events = GitlabApiHandler(key_auth, logger) all_data = [] @@ -75,18 +100,28 @@ def retrieve_all_gitlab_event_data(type, repo_git, logger, key_auth) -> None: if len(page_data) == 0: logger.debug( - f"{owner}/{repo}: Gitlab {type} Events Page {page} contains no data...returning") - logger.info(f"{owner}/{repo}: {type} Events Page {page} of {num_pages}") + f"{owner}/{repo}: Gitlab {gtype} Events Page {page} contains no data...returning") + logger.info(f"{owner}/{repo}: {gtype} Events Page {page} of {num_pages}") return all_data - logger.info(f"{owner}/{repo}: Gitlab {type} Events Page {page} of {num_pages}") + logger.info(f"{owner}/{repo}: Gitlab {gtype} Events Page {page} of {num_pages}") all_data += page_data return all_data def process_issue_events(events, task_name, repo_id, logger, augur_db): - + """ + Retrieve only the needed data for mr label data from the api response + + Arguments: + events: List of dictionaries of issue event data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ + tool_source = "Gitlab issue events task" tool_version = "2.0" data_source = "Gitlab API" @@ -122,7 +157,21 @@ def process_issue_events(events, task_name, repo_id, logger, augur_db): def process_mr_events(events, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for mr events from the api response + + Arguments: + labels: List of dictionaries of label data + repo_id: augur id of the repository + tool_source: The part of augur that processed the data + tool_version: The version of the augur task that processed the data + data_source: The source of the data + + Returns: + List of parsed label dicts + """ + tool_source = "Gitlab mr events task" tool_version = "2.0" data_source = "Gitlab API" From 4cbfa15bcfac78bca9aac2dccae0275e589f9a5c Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 11:47:27 -0600 Subject: [PATCH 5/8] unneeded elif Signed-off-by: Isaac Milarsky --- augur/api/view/routes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/api/view/routes.py b/augur/api/view/routes.py index 3a3fcccd03..72164a9291 100644 --- a/augur/api/view/routes.py +++ b/augur/api/view/routes.py @@ -41,9 +41,9 @@ def root(path=""): def logo(brand=None): if brand is None: return redirect(url_for('static', filename='img/augur_logo.png')) - elif "augur" in brand: + if "augur" in brand: return logo(None) - elif "chaoss" in brand: + if "chaoss" in brand: return redirect(url_for('static', filename='img/Chaoss_Logo_white.png')) return "" From fc3d13b29c0f154f4711d6f29452d06a1633113c Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 12:22:54 -0600 Subject: [PATCH 6/8] add doc-strings to gitlab tasks and data structures Signed-off-by: Isaac Milarsky --- augur/tasks/gitlab/gitlab_api_handler.py | 17 ++- augur/tasks/gitlab/gitlab_api_key_handler.py | 13 ++- augur/tasks/gitlab/gitlab_random_key_auth.py | 4 +- augur/tasks/gitlab/gitlab_task_session.py | 14 ++- augur/tasks/gitlab/issues_task.py | 56 ++++++++- augur/tasks/gitlab/merge_request_task.py | 115 +++++++++++++++++++ 6 files changed, 209 insertions(+), 10 deletions(-) diff --git a/augur/tasks/gitlab/gitlab_api_handler.py b/augur/tasks/gitlab/gitlab_api_handler.py index 8f111d3c48..8735a8318b 100644 --- a/augur/tasks/gitlab/gitlab_api_handler.py +++ b/augur/tasks/gitlab/gitlab_api_handler.py @@ -1,4 +1,7 @@ - +""" +Defines a GitlabApiHandler class to paginate and handle interaction with GitLab's +api through automatic use of relevant key auth and pagination tools. +""" import httpx import time import logging @@ -61,7 +64,7 @@ def get_length(self, url): issue_len = len(issues) """ - num_pages = self.get_num_pages() + num_pages = self.get_num_pages(url) self.logger.info(f"Num pages: {num_pages}") @@ -255,6 +258,16 @@ def get_num_pages(self, url) -> Optional[int]: return num_pages def hit_api(self, url, timeout, method): + """Attempt to retrieve data at given url. + + Args: + url: The url to retrieve the data from + timeout: time to wait until timeout + method: GET, POST, etc. + + Returns + The response object from hitting the url and the data on the page + """ return hit_api(self.key_manager, url, self.logger, timeout, method=method) diff --git a/augur/tasks/gitlab/gitlab_api_key_handler.py b/augur/tasks/gitlab/gitlab_api_key_handler.py index 50efa446f8..20bc1219ca 100644 --- a/augur/tasks/gitlab/gitlab_api_key_handler.py +++ b/augur/tasks/gitlab/gitlab_api_key_handler.py @@ -1,3 +1,8 @@ +""" +Defines the handler logic needed to effectively fetch GitLab auth keys +from either the redis cache or the database. Follows the same patterns as +the github api key handler. +""" import httpx import time import random @@ -11,7 +16,7 @@ class NoValidKeysError(Exception): - pass + """Defines an exception that is thrown when no gitlab keys are valid""" class GitlabApiKeyHandler(): @@ -102,7 +107,9 @@ def get_api_keys(self) -> List[str]: try: keys = self.get_api_keys_from_database() break - except: + except Exception as e: + self.logger.error(f"Ran into issue when fetching key from database:\n {e}\n") + self.logger.error("Sleeping for 5 seconds...") time.sleep(5) attempts += 1 @@ -135,7 +142,7 @@ def get_api_keys(self) -> List[str]: # shuffling the keys so not all processes get the same keys in the same order - valid_now = valid_keys + #valid_now = valid_keys #try: #self.logger.info(f'valid keys before shuffle: {valid_keys}') #valid_keys = random.sample(valid_keys, len(valid_keys)) diff --git a/augur/tasks/gitlab/gitlab_random_key_auth.py b/augur/tasks/gitlab/gitlab_random_key_auth.py index 86ad64b056..64ba31dd19 100644 --- a/augur/tasks/gitlab/gitlab_random_key_auth.py +++ b/augur/tasks/gitlab/gitlab_random_key_auth.py @@ -6,8 +6,8 @@ class GitlabRandomKeyAuth(RandomKeyAuth): - """Defines a github specific RandomKeyAuth class so - github collections can have a class randomly selects an api key for each request + """Defines a gitlab specific RandomKeyAuth class so + gitlab collections can have a class randomly selects an api key for each request """ def __init__(self, session: DatabaseSession, logger): diff --git a/augur/tasks/gitlab/gitlab_task_session.py b/augur/tasks/gitlab/gitlab_task_session.py index 1871e46c50..58a6e64373 100644 --- a/augur/tasks/gitlab/gitlab_task_session.py +++ b/augur/tasks/gitlab/gitlab_task_session.py @@ -1,14 +1,26 @@ +""" +Defines a GitLab-specific session and manifest object for use in GitLab tasks +""" from logging import Logger from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth from augur.application.db.session import DatabaseSession class GitlabTaskManifest: + """ + Manifest object that represents the state and common elements of + the specified task. GitLab version for the GitLab tasks. + + Attributes: + augur_db: sqlalchemy db object + key_auth: GitLab specific key auth retrieval collection + logger: logging object + platform_id: GitLab specific platform id (github is 1) + """ def __init__(self, logger): from augur.tasks.init.celery_app import engine - from augur.application.db.session import DatabaseSession self.augur_db = DatabaseSession(logger, engine) self.key_auth = GitlabRandomKeyAuth(self.augur_db.session, logger) diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index 43b9c62942..cf6e5e5dab 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -1,3 +1,6 @@ +""" +Defines the set of tasks used to retrieve GitLab issue data. +""" import logging import traceback @@ -14,7 +17,12 @@ @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_issues(repo_git : str) -> int: + """ + Retrieve and parse gitlab issues for the desired repo + Arguments: + repo_git: the repo url string + """ logger = logging.getLogger(collect_gitlab_issues.__name__) with GitlabTaskManifest(logger) as manifest: @@ -45,6 +53,14 @@ def collect_gitlab_issues(repo_git : str) -> int: def retrieve_all_gitlab_issue_data(repo_git, logger, key_auth) -> None: + """ + Retrieve only the needed data for issues from the api response + + Arguments: + repo_git: url of the relevant repo + logger: loggin object + key_auth: key auth cache and rotator object + """ owner, repo = get_owner_repo(repo_git) @@ -73,7 +89,17 @@ def retrieve_all_gitlab_issue_data(repo_git, logger, key_auth) -> None: return all_data def process_issues(issues, task_name, repo_id, logger, augur_db) -> None: - + """ + Retrieve only the needed data for issues from the api response + + Arguments: + issues: List of dictionaries of issue data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ + # get repo_id or have it passed tool_source = "Gitlab Issue Task" tool_version = "2.0" @@ -153,6 +179,13 @@ def process_issues(issues, task_name, repo_id, logger, augur_db) -> None: @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_issue_comments(issue_ids, repo_git) -> int: + """ + Retrieve and parse gitlab events for the desired repo + + Arguments: + issue_ids: Set of issue ids to collect coments for + repo_git: repo url + """ owner, repo = get_owner_repo(repo_git) @@ -175,6 +208,15 @@ def collect_gitlab_issue_comments(issue_ids, repo_git) -> int: def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git): + """ + Retrieve only the needed data for issue comments + + Arguments: + key_auth: key auth cache and rotator object + logger: loggin object + issue_ids: ids of issues to find comements for + repo_git: repo url + """ owner, repo = get_owner_repo(repo_git) @@ -186,7 +228,7 @@ def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git): for id in issue_ids: - print(f"Collecting {owner}/{repo} gitlab issue comments for issue {index} of {issue_count}") + logger.info(f"Collecting {owner}/{repo} gitlab issue comments for issue {index} of {issue_count}") url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/issues/{id}/notes" @@ -206,6 +248,16 @@ def retrieve_all_gitlab_issue_comments(key_auth, logger, issue_ids, repo_git): def process_gitlab_issue_messages(data, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for issue messages from the api response + + Arguments: + data: List of dictionaries of issue event data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ tool_source = "Gitlab issue comments" tool_version = "2.0" diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index 5a183d0b0b..a04f055a27 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -13,6 +13,12 @@ @celery.task(base=AugurCoreRepoCollectionTask) def collect_gitlab_merge_requests(repo_git: str) -> int: + """ + Retrieve and parse gitlab MRs for the desired repo + + Arguments: + repo_git: the repo url string + """ logger = logging.getLogger(collect_gitlab_merge_requests.__name__) @@ -37,6 +43,14 @@ def collect_gitlab_merge_requests(repo_git: str) -> int: def retrieve_all_mr_data(repo_git: str, logger, key_auth) -> None: + """ + Retrieve only the needed data for MRs from the api response + + Arguments: + repo_git: url of the relevant repo + logger: loggin object + key_auth: key auth cache and rotator object + """ owner, repo = get_owner_repo(repo_git) @@ -66,6 +80,19 @@ def retrieve_all_mr_data(repo_git: str, logger, key_auth) -> None: def process_merge_requests(data, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for mr label data from the api response + + Arguments: + data: collection of mr data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + + Returns: + List of parsed MR ids. + """ tool_source = "Mr Task" tool_version = "2.0" @@ -129,6 +156,13 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db): @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_comments(mr_ids, repo_git) -> int: + """ + Retrieve and parse gitlab events for the desired repo + + Arguments: + mr_ids: ids of MRs to paginate comments for + repo_git: the repo url string + """ owner, repo = get_owner_repo(repo_git) @@ -152,6 +186,16 @@ def collect_merge_request_comments(mr_ids, repo_git) -> int: def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for mr label data from the api response + + Arguments: + data: List of dictionaries of mr message data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ tool_source = "Gitlab mr comments" tool_version = "2.0" @@ -214,6 +258,13 @@ def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db): @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_metadata(mr_ids, repo_git) -> int: + """ + Retrieve and parse gitlab events for the desired repo + + Arguments: + mr_ids: list of mr ids to find metadata for + repo_git: the repo url string + """ owner, repo = get_owner_repo(repo_git) @@ -236,6 +287,16 @@ def collect_merge_request_metadata(mr_ids, repo_git) -> int: logger.info(f"{owner}/{repo} has no gitlab merge request metadata") def process_mr_metadata(data, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for mr label data from the api response + + Arguments: + data: List of dictionaries of mr metadata + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ tool_source = "Mr Metadata Task" tool_version = "2.0" @@ -261,6 +322,13 @@ def process_mr_metadata(data, task_name, repo_id, logger, augur_db): @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_reviewers(mr_ids, repo_git) -> int: + """ + Retrieve and parse mr reviewers for the desired repo + + Arguments: + mr_ids: mrs to search for reviewers for + repo_git: the repo url string + """ owner, repo = get_owner_repo(repo_git) @@ -283,6 +351,16 @@ def collect_merge_request_reviewers(mr_ids, repo_git) -> int: logger.info(f"{owner}/{repo} has no gitlab merge request reviewers") def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for mr reviewer data from the api response + + Arguments: + data: List of dictionaries of mr reviewer data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ tool_source = "Mr Reviewr Task" tool_version = "2.0" @@ -311,6 +389,13 @@ def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_commits(mr_ids, repo_git) -> int: + """ + Retrieve and parse mr commits for the desired repo + + Arguments: + mr_ids: ids of mrs to get commits for + repo_git: the repo url string + """ owner, repo = get_owner_repo(repo_git) @@ -334,6 +419,16 @@ def collect_merge_request_commits(mr_ids, repo_git) -> int: def process_mr_commits(data, task_name, repo_id, logger, augur_db): + """ + Retrieve only the needed data for mr commits from the api response + + Arguments: + data: List of dictionaries of mr commit data + task_name: name of the task as well as the repo being processed + repo_id: augur id of the repo + logger: logging object + augur_db: sqlalchemy db object + """ tool_source = "Mr Commit Task" tool_version = "2.0" @@ -363,6 +458,13 @@ def process_mr_commits(data, task_name, repo_id, logger, augur_db): @celery.task(base=AugurCoreRepoCollectionTask) def collect_merge_request_files(mr_ids, repo_git) -> int: + """ + Retrieve and parse gitlab events for the desired repo + + Arguments: + mr_ids: the ids of mrs to get files for. + repo_git: the repo url string + """ owner, repo = get_owner_repo(repo_git) @@ -409,6 +511,19 @@ def process_mr_files(data, task_name, repo_id, logger, augur_db): def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, response_type): + """ + Retrieve specific mr data from the GitLab api. + + Arguments: + ids: mr ids to paginate info for + url: endpoint to paginate or hit + name: name of data to collect + owner: owner of the repo + repo: repo name + key_auth: key auth cache and rotator object + logger: loggin object + response_type: type of data to get from the api + """ all_data = {} mr_count = len(ids) From 579dec0d90cb8e38cd72fd6b2f02dc4659bdd624 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 12:26:38 -0600 Subject: [PATCH 7/8] remove unused variable Signed-off-by: Isaac Milarsky --- augur/tasks/gitlab/merge_request_task.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index a04f055a27..60d2ececda 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -346,17 +346,16 @@ def collect_merge_request_reviewers(mr_ids, repo_git) -> int: if reviewers: logger.info(f"Length of merge request reviewers: {len(reviewers)}") - process_mr_reviewers(reviewers, f"{owner}/{repo}: Mr reviewer task", repo_id, logger, augur_db) + process_mr_reviewers(reviewers, repo_id, logger, augur_db) else: logger.info(f"{owner}/{repo} has no gitlab merge request reviewers") -def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): +def process_mr_reviewers(data, repo_id, logger, augur_db): """ Retrieve only the needed data for mr reviewer data from the api response Arguments: data: List of dictionaries of mr reviewer data - task_name: name of the task as well as the repo being processed repo_id: augur id of the repo logger: logging object augur_db: sqlalchemy db object From 59afa4371f7199155d681b499493ac104bb4f90c Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 23 Jan 2024 12:28:22 -0600 Subject: [PATCH 8/8] use vars Signed-off-by: Isaac Milarsky --- augur/tasks/gitlab/merge_request_task.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index 60d2ececda..a7305acd46 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -346,11 +346,11 @@ def collect_merge_request_reviewers(mr_ids, repo_git) -> int: if reviewers: logger.info(f"Length of merge request reviewers: {len(reviewers)}") - process_mr_reviewers(reviewers, repo_id, logger, augur_db) + process_mr_reviewers(reviewers, f"{owner}/{repo}: Mr reviewer task", repo_id, logger, augur_db) else: logger.info(f"{owner}/{repo} has no gitlab merge request reviewers") -def process_mr_reviewers(data, repo_id, logger, augur_db): +def process_mr_reviewers(data, task_name, repo_id, logger, augur_db): """ Retrieve only the needed data for mr reviewer data from the api response @@ -365,6 +365,8 @@ def process_mr_reviewers(data, repo_id, logger, augur_db): tool_version = "2.0" data_source = "Gitlab API" + logger.info(f"Running {task_name}...") + # create mapping from mr number to pull request id of current mrs mr_number_to_id_map = {} mrs = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).all()