Skip to content

Commit

Permalink
Merge pull request #96 from mwestphall/SOFTWARE-5950-rsync-tarballs
Browse files Browse the repository at this point in the history
SOFTWARE-5950: Replace lftp bash script for syncing tarball-install with python script
  • Loading branch information
matyasselmeci authored Nov 20, 2024
2 parents 411cfa0 + f344045 commit ee60e4c
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 47 deletions.
46 changes: 0 additions & 46 deletions bin/update_tarball-install.sh

This file was deleted.

22 changes: 22 additions & 0 deletions distrepos/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
from distrepos.mirror_run import update_mirrors_for_tag
from distrepos.link_static import link_static_data
from distrepos.util import lock_context, check_rsync, log_ml, run_with_log
from distrepos.tarball_sync import update_tarball_dirs

from datetime import datetime
from pathlib import Path
Expand Down Expand Up @@ -224,6 +225,24 @@ def link_static(options: Options) -> int:
_log.exception(f"Unexpected error updating static-data symlinks: {e}")
return ERR_FAILURES


def sync_tarballs(options: Options) -> int:
"""
Sync client tarballs from an upstream rsync server to repo
"""
_log.info("Syncing tarball cients")
try:
ok, err = update_tarball_dirs(options)
if ok:
_log.info("tarball clients updated successfully")
return 0
else:
_log.warning(f"Unable to sync tarball clietns: {err}")
return ERR_FAILURES
except Exception as e:
_log.exception(f"Unexpected error syncing tarball clients: {e}")
return ERR_FAILURES

#
# Main function
#
Expand Down Expand Up @@ -281,6 +300,9 @@ def main(argv: t.Optional[t.List[str]] = None) -> int:
if ActionType.LINK_STATIC in args.action and not result:
result = link_static(options)

if ActionType.TARBALL_SYNC in args.action and not result:
result = sync_tarballs(options)

# If all actions were successful, update the repo timestamp
if not result:
update_repo_timestamp(options)
Expand Down
8 changes: 8 additions & 0 deletions distrepos/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@
DEFAULT_CONFIG = "/etc/distrepos.conf"
DEFAULT_DESTROOT = "/data/repo"
DEFAULT_KOJI_RSYNC = "rsync://kojihub2000.chtc.wisc.edu/repos-dist"
DEFAULT_TARBALL_RSYNC = "rsync://rsync.cs.wisc.edu/vdt/"
DEFAULT_LOCK_DIR = "/var/lock/rsync_dist_repo"

DEFAULT_TARBALL_INSTALL_DIR = 'tarball-install'

# These options are required to be present _and_ nonempty. Some of them may
# come from the DEFAULT section.

Expand Down Expand Up @@ -76,18 +79,21 @@ class Options(t.NamedTuple):
static_root: t.Optional[Path]
koji_rsync: str
condor_rsync: str
tarball_rsync: str
lock_dir: t.Optional[Path]
mirror_root: t.Optional[Path]
mirror_working_root: t.Optional[Path]
mirror_prev_root: t.Optional[Path]
mirror_hosts: t.List[str]
tarball_install: str


class ActionType(str, Enum):
RSYNC = "rsync"
CADIST = "cadist"
MIRROR = "mirror"
LINK_STATIC = "link_static"
TARBALL_SYNC = "tarball_sync"


def format_tag(
Expand Down Expand Up @@ -387,11 +393,13 @@ def get_options(args: Namespace, config: ConfigParser) -> Options:
static_root=Path(static_root) if static_root else None,
condor_rsync=options_section.get("condor_rsync", DEFAULT_CONDOR_RSYNC),
koji_rsync=options_section.get("koji_rsync", DEFAULT_KOJI_RSYNC),
tarball_rsync=options_section.get("tarball_rsync", DEFAULT_TARBALL_RSYNC),
lock_dir=Path(args.lock_dir) if args.lock_dir else None,
mirror_root=mirror_root,
mirror_working_root=mirror_working_root,
mirror_prev_root=mirror_prev_root,
mirror_hosts=mirror_hosts,
tarball_install=options_section.get("tarball_install", DEFAULT_TARBALL_INSTALL_DIR)
)
return options

Expand Down
130 changes: 130 additions & 0 deletions distrepos/tarball_sync.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@

import logging
from pathlib import Path
from distrepos.error import DiskFullError, TagFailure
from distrepos.params import Options, Tag
from distrepos.tag_run import update_release_repos
from distrepos.util import log_rsync, rsync_disk_is_full, rsync_with_link
from typing import Tuple, List

_log = logging.getLogger(__name__)

def tarball_sync(options: Options) -> Tuple[bool, str]:
"""
rsync the tarball clients from vdt to local storage
"""
_log.debug("tarball_sync")

tarball_rsync = options.tarball_rsync
working_dir = Path(options.working_root) / options.tarball_install
dest_dir = Path(options.dest_root) / options.tarball_install

description = f"rsync from tarball repo"
ok, proc = rsync_with_link(tarball_rsync, working_dir, dest_dir)
log_rsync(proc, description)
if ok:
_log.info("%s ok", description)
else:
if rsync_disk_is_full(proc):
raise DiskFullError(description)
return False, f"Error pulling tarball clients: {proc.stderr}"

return True, ""

class TarballInfo():
full_path: Path
date_string: str
os: str
arch: str

def __init__(self, tarball_path: Path):
self.full_path = tarball_path
name_parts = tarball_path.name.split('.')
if len(name_parts) >= 5:
self.arch = name_parts[-3]
self.os = name_parts[-4]
self.date_string = name_parts[-5]

def is_valid(self):
""" Check whether values for all fields were parsed from the file name """
return self.date_string and self.os and self.arch

WN_TARBALL_NAME_PREFIX = "osg-wn-client-latest"

def create_latest_symlinks(options: Options) -> Tuple[bool, str]:
"""
For each tarball client directory synced via rsync, create a "latest"
symlink for each synced arch and el version
"""
# TODO this assumes a number of things about the structure of downloaded tarball directories
# We probably want to parameterize this somewhere
# Assuming tarballs are rsynced into <dest_root>/<tarball_install>/<series>/<arch>/<name>.<os>.<arch>.tar.gz
# Create a "latest" symlink in each <dest_root>/<tarball_install>/<series> for each <os>, <arch> combination

working_dir = Path(options.working_root) / options.tarball_install

for series_dir in working_dir.iterdir():
if not series_dir.is_dir():
continue

for arch_dir in series_dir.iterdir():
if not arch_dir.is_dir():
continue

infos = [TarballInfo(f) for f in arch_dir.iterdir() if f.is_file()]

valid_infos = [i for i in infos if i.is_valid()]

if len(valid_infos) != len(infos):
# Treat unparsable file names as a warning rather than an error
_log.warning(f"Found {len(infos) - len(valid_infos)} unparsable tarball file names in {arch_dir}")

# Sanity check that each arch subdir only contains tarballs for a single arch
arches = set(i.arch for i in valid_infos)
if len(arches) != 1:
return False, f"Got mixed set of arches for tarball clients in {arch_dir}"

arch = next(a for a in arches)

# Find the most recent tarball for each os version, sorted by OS
oses = set(i.os for i in infos)
for os in oses:
latest_symlink = series_dir / f"{WN_TARBALL_NAME_PREFIX}.{os}.{arch}.tar.gz"
os_tarballs = [i for i in infos if i.os == os]
os_tarballs.sort(key=lambda i: i.date_string, reverse=True)
latest_os_tarball = os_tarballs[0].full_path

latest_symlink.symlink_to(latest_os_tarball.relative_to(series_dir))

return True, ""



def update_tarball_dirs(options: Options) -> Tuple[bool, str]:
"""
Rsync tarball client files from the upstream to local storage, then symlink the "latest"
tarball for each directory.
"""

working_dir = Path(options.working_root) / options.tarball_install
dest_dir = Path(options.dest_root) / options.tarball_install
prev_dir = Path(options.previous_root) / options.tarball_install

working_dir.mkdir(parents=True, exist_ok=True)
dest_dir.mkdir(parents=True, exist_ok=True)
prev_dir.mkdir(parents=True, exist_ok=True)

# Sync tarballs
ok, err = tarball_sync(options)
if not ok:
return False, err

# Create symlinkes
ok, err = create_latest_symlinks(options)
if not ok:
return False, err

# Move working dir to dest dir
update_release_repos(dest_dir, working_dir, prev_dir)
return True, ""

2 changes: 1 addition & 1 deletion docker/supervisor-distrepos.conf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[program:distrepos]
command=/bin/bash -c 'for action in link_static rsync mirror cadist; do /bin/distrepos --action $action; done; sleep 360'
command=/bin/bash -c 'for action in link_static rsync mirror cadist tarball_sync; do /bin/distrepos --action $action; done; sleep 360'
autorestart=true

# Log the output of distrepos to supervisord's stdout/err so k8s logging picks it up
Expand Down
6 changes: 6 additions & 0 deletions etc/distrepos.conf
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ release_repo = 24.0/$${EL}/$${ARCH}/release -> condor-release
condor_rsync = rsync://rsync.cs.wisc.edu/htcondor
# The base rsync endpoint to pull Koji distrepos from
koji_rsync = rsync://kojihub2000.chtc.wisc.edu/repos-dist
# The base rsync endpoint to pull tarball installs from
tarball_rsync = rsync://rsync.cs.wisc.edu/vdt/

# The final base dir of the repos on local disk; repos will be moved
# there once rsyncing is successful.
Expand Down Expand Up @@ -99,6 +101,10 @@ mirror_hosts =
http://mirror.hep.wisc.edu/upstream


# Parent directory within dest_root for tarball client
tarball_install = tarball-install


#
#
# Tags and tagsets
Expand Down

0 comments on commit ee60e4c

Please sign in to comment.