From 48fea60ed50def1dd09d99a8d7c9b072055a9b2f Mon Sep 17 00:00:00 2001 From: Simran Date: Fri, 20 Sep 2024 21:31:16 +0530 Subject: [PATCH] Changes to gitlab manager fixing pagination --- Dockerfile | 19 ++++++++- gitlab_manager.py | 102 +++++++++++++++++++++++++++++++++------------- repo_cloner.py | 8 +++- 3 files changed, 99 insertions(+), 30 deletions(-) diff --git a/Dockerfile b/Dockerfile index 503d0eb..f60f678 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,9 +2,26 @@ FROM python:3.9-slim WORKDIR /app +# Accept proxy settings as build arguments +ARG HTTP_PROXY +ARG HTTPS_PROXY +ARG NO_PROXY + +# Set the proxy environment variables +ENV http_proxy=${HTTP_PROXY} +ENV https_proxy=${HTTPS_PROXY} +ENV no_proxy=${NO_PROXY} + # Install git and clean up to reduce image size RUN apt-get update && \ - apt-get install -y --no-install-recommends git openssh-client && \ + apt-get install -y --no-install-recommends \ + git \ + openssh-client \ + curl \ + inetutils-ping \ + net-tools \ + iproute2 \ + dnsutils && \ rm -rf /var/lib/apt/lists/* # Copy only necessary files diff --git a/gitlab_manager.py b/gitlab_manager.py index 892bb6c..0ebe3cf 100644 --- a/gitlab_manager.py +++ b/gitlab_manager.py @@ -1,49 +1,95 @@ import requests +import re from logger import log_info, log_error, log_debug, log_warning, log_critical, log_exception -from urllib.parse import quote from config import GITLAB_TOKEN, GITLAB_API_URL class GitLabManager: def __init__(self, projects): - self.projects = [proj.strip() for proj in projects if proj.strip()] + self.allowed_projects = [project.strip() for project in projects if project.strip()] self.headers = {'PRIVATE-TOKEN': GITLAB_TOKEN} def get_repositories(self): repos = [] - for project in self.projects: - encoded_project = quote(project, safe='') - page = 1 - while True: - url = f"{GITLAB_API_URL}/groups/{encoded_project}/projects" - params = {'per_page': 100, 'page': page, 'include_subgroups': True} - try: - response = requests.get(url, headers=self.headers, params=params, timeout=10, verify=False) - if response.status_code != 200: - log_error(f"Failed to fetch repos for project {project}: {response.status_code}") - break - data = response.json() - if not data: - break - for repo in data: - if not repo['archived']: + url = f"{GITLAB_API_URL}/projects" + params = { + 'pagination': 'keyset', + 'per_page': 100, + 'order_by': 'id', + 'sort': 'asc', + 'include_subgroups': True + } + + while True: + try: + # Send the request. Use params only for the first request, then use 'url' directly if it's updated. + response = requests.get(url, headers=self.headers, params=params if 'params' in locals() else None, + timeout=40, verify=False) + if response.status_code != 200: + log_error(f"Failed to fetch repos for gitlab projects: {response.status_code}") + break + + # Parse the response data + data = response.json() + if not data: + break + + # Process the repositories + for repo in data: + if not repo['archived']: + if any(repo['path_with_namespace'].startswith(allowed_project) for allowed_project in + self.allowed_projects): repos.append({ 'name': repo['name'], 'ssh_url_to_repo': repo['ssh_url_to_repo'], 'http_url_to_repo': repo['http_url_to_repo'], 'default_branch': repo['default_branch'], - 'name_with_namespace': repo['name_with_namespace'] + 'name_with_namespace': repo['name_with_namespace'], + 'path_with_namespace': repo['path_with_namespace'] }) - page += 1 - except requests.exceptions.Timeout: - log_error(f"Request timed out while fetching repositories for project {project}") - break + else: + log_info( + f"Skipping repository {repo['path_with_namespace']} as it does not fall under the allowed projects.") - except requests.exceptions.SSLError as ssl_error: - log_error(f"SSL error occurred while fetching repositories for project {project}: {ssl_error}") - break + # Extract the next page URL from the 'Link' header + link_header = response.headers.get('Link', None) + if link_header and 'rel="next"' in link_header: + url = self.extract_next_page_url(link_header) + if not url: + log_error("Failed to extract the next page URL, stopping pagination.") + break - except requests.exceptions.RequestException as e: - log_exception(f"An error occurred while fetching repositories for project {project}: {e}") + # From this point on, don't use 'params' anymore + params = None + else: break + except requests.exceptions.Timeout: + log_error(f"Request timed out while fetching repositories for gitlab projects") + break + + except requests.exceptions.SSLError as ssl_error: + log_error(f"SSL error occurred while fetching repositories for gitlab project: {ssl_error}") + break + + except requests.exceptions.RequestException as e: + log_exception(f"An error occurred while fetching repositories for gitlab project: {e}") + break + return repos + + def extract_next_page_url(self, link_header): + """ + Extracts the URL for the next page from the 'Link' header. + """ + try: + # The 'Link' header contains URLs with rel="next", rel="prev", etc. + match = re.search(r'<([^>]+)>;\s*rel="next"', link_header) + if match: + return match.group(1) # Return the next page URL + else: + log_error("Next page URL not found in Link header.") + return None + except Exception as e: + log_error(f"Failed to extract next page URL from Link header: {e}") + return None + diff --git a/repo_cloner.py b/repo_cloner.py index 6f341c7..00ce48b 100644 --- a/repo_cloner.py +++ b/repo_cloner.py @@ -34,10 +34,15 @@ def clone_or_update_repo(self, repo_info, base_dir): log_error(f"Failed to update {repo_name}: {e}") def clone_gitlab_repo(self, repo_info, base_dir): - path_parts = repo_info['name_with_namespace'].split(' / ') + # Extract the namespace path from the repository info + repo_namespace_path = repo_info['path_with_namespace'] + + # Proceed with cloning + path_parts = repo_namespace_path.split('/') repo_name = path_parts[-1] repo_path = os.path.join(base_dir, *path_parts[1:-1], repo_name) clone_url = repo_info['ssh_url_to_repo'] if CLONE_METHOD == 'ssh' else repo_info['http_url_to_repo'] + if not os.path.exists(repo_path): os.makedirs(os.path.dirname(repo_path), exist_ok=True) try: @@ -59,3 +64,4 @@ def clone_gitlab_repo(self, repo_info, base_dir): self.clone_gitlab_repo(repo_info, base_dir) except GitCommandError as e: log_error(f"Failed to update {repo_name}: {e}") +