Skip to content

Commit

Permalink
Changes to gitlab manager fixing pagination
Browse files Browse the repository at this point in the history
  • Loading branch information
Simran committed Sep 20, 2024
1 parent 4e83579 commit 48fea60
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 30 deletions.
19 changes: 18 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,26 @@ FROM python:3.9-slim

WORKDIR /app

# Accept proxy settings as build arguments
ARG HTTP_PROXY
ARG HTTPS_PROXY
ARG NO_PROXY

# Set the proxy environment variables
ENV http_proxy=${HTTP_PROXY}
ENV https_proxy=${HTTPS_PROXY}
ENV no_proxy=${NO_PROXY}

# Install git and clean up to reduce image size
RUN apt-get update && \
apt-get install -y --no-install-recommends git openssh-client && \
apt-get install -y --no-install-recommends \
git \
openssh-client \
curl \
inetutils-ping \
net-tools \
iproute2 \
dnsutils && \
rm -rf /var/lib/apt/lists/*

# Copy only necessary files
Expand Down
102 changes: 74 additions & 28 deletions gitlab_manager.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,95 @@
import requests
import re
from logger import log_info, log_error, log_debug, log_warning, log_critical, log_exception
from urllib.parse import quote
from config import GITLAB_TOKEN, GITLAB_API_URL

class GitLabManager:
def __init__(self, projects):
self.projects = [proj.strip() for proj in projects if proj.strip()]
self.allowed_projects = [project.strip() for project in projects if project.strip()]
self.headers = {'PRIVATE-TOKEN': GITLAB_TOKEN}

def get_repositories(self):
repos = []
for project in self.projects:
encoded_project = quote(project, safe='')
page = 1
while True:
url = f"{GITLAB_API_URL}/groups/{encoded_project}/projects"
params = {'per_page': 100, 'page': page, 'include_subgroups': True}
try:
response = requests.get(url, headers=self.headers, params=params, timeout=10, verify=False)
if response.status_code != 200:
log_error(f"Failed to fetch repos for project {project}: {response.status_code}")
break
data = response.json()
if not data:
break
for repo in data:
if not repo['archived']:
url = f"{GITLAB_API_URL}/projects"
params = {
'pagination': 'keyset',
'per_page': 100,
'order_by': 'id',
'sort': 'asc',
'include_subgroups': True
}

while True:
try:
# Send the request. Use params only for the first request, then use 'url' directly if it's updated.
response = requests.get(url, headers=self.headers, params=params if 'params' in locals() else None,
timeout=40, verify=False)
if response.status_code != 200:
log_error(f"Failed to fetch repos for gitlab projects: {response.status_code}")
break

# Parse the response data
data = response.json()
if not data:
break

# Process the repositories
for repo in data:
if not repo['archived']:
if any(repo['path_with_namespace'].startswith(allowed_project) for allowed_project in
self.allowed_projects):
repos.append({
'name': repo['name'],
'ssh_url_to_repo': repo['ssh_url_to_repo'],
'http_url_to_repo': repo['http_url_to_repo'],
'default_branch': repo['default_branch'],
'name_with_namespace': repo['name_with_namespace']
'name_with_namespace': repo['name_with_namespace'],
'path_with_namespace': repo['path_with_namespace']
})
page += 1
except requests.exceptions.Timeout:
log_error(f"Request timed out while fetching repositories for project {project}")
break
else:
log_info(
f"Skipping repository {repo['path_with_namespace']} as it does not fall under the allowed projects.")

except requests.exceptions.SSLError as ssl_error:
log_error(f"SSL error occurred while fetching repositories for project {project}: {ssl_error}")
break
# Extract the next page URL from the 'Link' header
link_header = response.headers.get('Link', None)
if link_header and 'rel="next"' in link_header:
url = self.extract_next_page_url(link_header)
if not url:
log_error("Failed to extract the next page URL, stopping pagination.")
break

except requests.exceptions.RequestException as e:
log_exception(f"An error occurred while fetching repositories for project {project}: {e}")
# From this point on, don't use 'params' anymore
params = None
else:
break

except requests.exceptions.Timeout:
log_error(f"Request timed out while fetching repositories for gitlab projects")
break

except requests.exceptions.SSLError as ssl_error:
log_error(f"SSL error occurred while fetching repositories for gitlab project: {ssl_error}")
break

except requests.exceptions.RequestException as e:
log_exception(f"An error occurred while fetching repositories for gitlab project: {e}")
break

return repos

def extract_next_page_url(self, link_header):
"""
Extracts the URL for the next page from the 'Link' header.
"""
try:
# The 'Link' header contains URLs with rel="next", rel="prev", etc.
match = re.search(r'<([^>]+)>;\s*rel="next"', link_header)
if match:
return match.group(1) # Return the next page URL
else:
log_error("Next page URL not found in Link header.")
return None
except Exception as e:
log_error(f"Failed to extract next page URL from Link header: {e}")
return None

8 changes: 7 additions & 1 deletion repo_cloner.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,15 @@ def clone_or_update_repo(self, repo_info, base_dir):
log_error(f"Failed to update {repo_name}: {e}")

def clone_gitlab_repo(self, repo_info, base_dir):
path_parts = repo_info['name_with_namespace'].split(' / ')
# Extract the namespace path from the repository info
repo_namespace_path = repo_info['path_with_namespace']

# Proceed with cloning
path_parts = repo_namespace_path.split('/')
repo_name = path_parts[-1]
repo_path = os.path.join(base_dir, *path_parts[1:-1], repo_name)
clone_url = repo_info['ssh_url_to_repo'] if CLONE_METHOD == 'ssh' else repo_info['http_url_to_repo']

if not os.path.exists(repo_path):
os.makedirs(os.path.dirname(repo_path), exist_ok=True)
try:
Expand All @@ -59,3 +64,4 @@ def clone_gitlab_repo(self, repo_info, base_dir):
self.clone_gitlab_repo(repo_info, base_dir)
except GitCommandError as e:
log_error(f"Failed to update {repo_name}: {e}")

0 comments on commit 48fea60

Please sign in to comment.