From 6973bc4f75214ad98b33e54e8f75cd0a39819d24 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 4 Dec 2024 21:50:57 -0800 Subject: [PATCH 1/5] Update Dockerfile to use RockyLinux 9 and FDB 7.1 This needs to be done so that we can create agent images before upgrading the Joshua cluster. The RockyLinux 9 based image allows the Joshua to run ensembles built with RHEL9. --- Dockerfile | 76 ++++++++++++++++++++---------------------------------- setup.py | 4 +-- 2 files changed, 30 insertions(+), 50 deletions(-) diff --git a/Dockerfile b/Dockerfile index cf22577..ae32f72 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,47 +1,40 @@ -FROM centos:7 +FROM rockylinux:9 as build # this is joshua-agent -ARG DEVTOOLSET_VERSION=11 WORKDIR /tmp -RUN yum repolist && \ - yum install -y \ - centos-release-scl-rh \ +RUN dnf update -y && \ + dnf install -y \ epel-release \ - scl-utils \ - yum-utils && \ - yum -y install \ + dnf-plugins-core && \ + dnf config-manager --set-enabled crb && \ + dnf install -y \ bzip2 \ criu \ - devtoolset-${DEVTOOLSET_VERSION} \ - devtoolset-${DEVTOOLSET_VERSION}-libasan-devel \ - devtoolset-${DEVTOOLSET_VERSION}-liblsan-devel \ - devtoolset-${DEVTOOLSET_VERSION}-libtsan-devel \ - devtoolset-${DEVTOOLSET_VERSION}-libubsan-devel \ - devtoolset-${DEVTOOLSET_VERSION}-libatomic-devel \ - devtoolset-${DEVTOOLSET_VERSION}-systemtap-sdt-devel \ gettext \ golang \ java-11-openjdk-devel \ mono-core \ net-tools \ - rh-python38 \ - rh-python38-python-devel \ - rh-python38-python-pip \ - rh-ruby27 \ - rh-ruby27-ruby-devel \ - libatomic && \ - source /opt/rh/devtoolset-${DEVTOOLSET_VERSION}/enable && \ - source /opt/rh/rh-python38/enable && \ - source /opt/rh/rh-ruby27/enable && \ + python3 \ + python3-devel \ + python3-pip \ + ruby \ + ruby-devel \ + libffi-devel \ + libatomic && \ pip3 install \ python-dateutil \ subprocess32 \ psutil \ kubernetes \ urllib3==1.26.14 \ - boto3 && \ - gem install ffi --platform=ruby && \ + boto3 + +# =========================== END OF LAYER: build ============================== +FROM build as devel + +RUN gem install ffi --platform=ruby && \ groupadd -r joshua -g 4060 && \ useradd \ -rm \ @@ -55,8 +48,8 @@ RUN yum repolist && \ rm -rf /tmp/* # valgrind -RUN source /opt/rh/devtoolset-${DEVTOOLSET_VERSION}/enable && \ - curl -Ls --retry 5 --fail https://sourceware.org/pub/valgrind/valgrind-3.20.0.tar.bz2 -o valgrind.tar.bz2 && \ + +RUN curl -Ls --retry 5 --fail https://sourceware.org/pub/valgrind/valgrind-3.20.0.tar.bz2 -o valgrind.tar.bz2 && \ echo "8536c031dbe078d342f121fa881a9ecd205cb5a78e639005ad570011bdb9f3c6 valgrind.tar.bz2" > valgrind-sha.txt && \ sha256sum -c valgrind-sha.txt && \ mkdir valgrind && \ @@ -71,35 +64,27 @@ RUN source /opt/rh/devtoolset-${DEVTOOLSET_VERSION}/enable && \ COPY childsubreaper/ /opt/joshua/install/childsubreaper COPY joshua/ /opt/joshua/install/joshua COPY setup.py /opt/joshua/install/ - -RUN source /opt/rh/devtoolset-${DEVTOOLSET_VERSION}/enable && \ - source /opt/rh/rh-python38/enable && \ - source /opt/rh/rh-ruby27/enable && \ - pip3 install /opt/joshua/install && \ +RUN pip3 install /opt/joshua/install && \ rm -rf /opt/joshua/install ARG OLD_FDB_BINARY_DIR=/app/deploy/global_data/oldBinaries/ -ARG OLD_TLS_LIBRARY_DIR=/app/deploy/runtime/.tls_5_1/ -ARG FDB_VERSION="6.3.18" +ARG FDB_VERSION="7.1.57" RUN if [ "$(uname -p)" == "x86_64" ]; then \ mkdir -p ${OLD_FDB_BINARY_DIR} \ - ${OLD_TLS_LIBRARY_DIR} \ /usr/lib/foundationdb/plugins && \ - for old_fdb_server_version in 7.3.43 7.3.27 7.1.61 7.1.57 7.1.43 7.1.35 7.1.33 7.1.27 7.1.25 7.1.23 7.1.19 6.3.18 6.3.17 6.3.16 6.3.15 6.3.13 6.3.12 6.3.9 6.2.30 6.2.29 6.2.28 6.2.27 6.2.26 6.2.25 6.2.24 6.2.23 6.2.22 6.2.21 6.2.20 6.2.19 6.2.18 6.2.17 6.2.16 6.2.15 6.2.10 6.1.13 6.1.12 6.1.11 6.1.10 6.0.18 6.0.17 6.0.16 6.0.15 6.0.14 5.2.8 5.2.7 5.1.7 5.1.6; do \ + # Skip these old versions 6.2.30 6.2.29 6.2.28 6.2.27 6.2.26 6.2.25 6.2.24 6.2.23 6.2.22 6.2.21 6.2.20 6.2.19 6.2.18 6.2.17 6.2.16 6.2.15 6.2.10 6.1.13 6.1.12 6.1.11 6.1.10 6.0.18 6.0.17 6.0.16 6.0.15 6.0.14 5.2.8 5.2.7 5.1.7 5.1.6 + for old_fdb_server_version in 7.3.43 7.3.27 7.1.61 7.1.57 7.1.43 7.1.35 7.1.33 7.1.27 7.1.25 7.1.23 7.1.19 6.3.18 6.3.17 6.3.16 6.3.15 6.3.13 6.3.12 6.3.9; do \ curl -Ls --retry 5 --fail https://github.com/apple/foundationdb/releases/download/${old_fdb_server_version}/fdbserver.x86_64 -o ${OLD_FDB_BINARY_DIR}/fdbserver-${old_fdb_server_version}; \ done && \ chmod +x ${OLD_FDB_BINARY_DIR}/* && \ - curl -Ls --retry 5 --fail https://fdb-joshua.s3.amazonaws.com/old_tls_library.tgz | tar -xz -C ${OLD_TLS_LIBRARY_DIR} --strip-components=1 && \ curl -Ls --retry 5 --fail https://github.com/apple/foundationdb/releases/download/${FDB_VERSION}/libfdb_c.x86_64.so -o /usr/lib64/libfdb_c_${FDB_VERSION}.so && \ ln -s /usr/lib64/libfdb_c_${FDB_VERSION}.so /usr/lib64/libfdb_c.so && \ - ln -s ${OLD_TLS_LIBRARY_DIR}/FDBGnuTLS.so /usr/lib/foundationdb/plugins/fdb-libressl-plugin.so && \ - ln -s ${OLD_TLS_LIBRARY_DIR}/FDBGnuTLS.so /usr/lib/foundationdb/plugins/FDBGnuTLS.so; \ fi # Download swift binaries ARG SWIFT_SIGNING_KEY=8A7495662C3CD4AE18D95637FAF6989E1BC16FEA ARG SWIFT_PLATFORM=centos -ARG OS_MAJOR_VER=7 +ARG OS_MAJOR_VER=9 ARG SWIFT_WEBROOT=https://download.swift.org/development ENV SWIFT_SIGNING_KEY=$SWIFT_SIGNING_KEY \ @@ -110,8 +95,7 @@ ENV SWIFT_SIGNING_KEY=$SWIFT_SIGNING_KEY \ RUN echo "${SWIFT_WEBROOT}/latest-build.yml" -# aarch64 package is not available for CentOS7 -# https://www.swift.org/download/ +# Note: Swift package details may need further investigation for Rocky Linux 9 RUN if [ "$(uname -p)" == "x86_64" ]; then \ set -e; \ export $(curl -Ls ${SWIFT_WEBROOT}/latest-build.yml | grep 'download:' | sed 's/:[^:\/\/]/=/g') && \ @@ -137,11 +121,7 @@ ENV FDB_CLUSTER_FILE=/etc/foundationdb/fdb.cluster ENV AGENT_TIMEOUT=300 USER joshua -CMD source /opt/rh/devtoolset-${DEVTOOLSET_VERSION}/enable && \ - source /opt/rh/rh-python38/enable && \ - source /opt/rh/rh-ruby27/enable && \ - python3 -m joshua.joshua_agent \ +CMD python3 -m joshua.joshua_agent \ -C ${FDB_CLUSTER_FILE} \ --work_dir /var/joshua \ --agent-idle-timeout ${AGENT_TIMEOUT} - diff --git a/setup.py b/setup.py index e2e32f2..fc737e3 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ Module( "joshua-client", "Joshua Client - interface to a great big supercomputer", - ["argparse", "foundationdb==6.3.18", "python-dateutil", "lxml"], + ["argparse", "foundationdb==7.1.57", "python-dateutil", "lxml"], [], [childsubreaper], [ @@ -29,7 +29,7 @@ Module( "joshua", "Joshua - a supercomputer that runs simulations of war^H^H^Hdatabases", - ["argparse", "foundationdb==6.3.18", "subprocess32"], + ["argparse", "foundationdb==7.1.57", "subprocess32"], [], [childsubreaper], ["Operating System :: POSIX :: Linux"], From 564a8210add799401e4f632ded5d971345efb498 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 4 Dec 2024 22:27:05 -0800 Subject: [PATCH 2/5] Fix errors and tidy up --- Dockerfile | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index ae32f72..11fd27a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM rockylinux:9 as build +FROM rockylinux:9 # this is joshua-agent WORKDIR /tmp @@ -22,19 +22,15 @@ RUN dnf update -y && \ ruby \ ruby-devel \ libffi-devel \ - libatomic && \ + libatomic && \ pip3 install \ python-dateutil \ subprocess32 \ psutil \ kubernetes \ urllib3==1.26.14 \ - boto3 - -# =========================== END OF LAYER: build ============================== -FROM build as devel - -RUN gem install ffi --platform=ruby && \ + boto3 && \ + gem install ffi --platform=ruby && \ groupadd -r joshua -g 4060 && \ useradd \ -rm \ @@ -61,30 +57,33 @@ RUN curl -Ls --retry 5 --fail https://sourceware.org/pub/valgrind/valgrind-3.20. cd .. && \ rm -rf /tmp/* +# Install Joshua client COPY childsubreaper/ /opt/joshua/install/childsubreaper COPY joshua/ /opt/joshua/install/joshua COPY setup.py /opt/joshua/install/ -RUN pip3 install /opt/joshua/install && \ +RUN ARTIFACT=client pip3 install /opt/joshua/install && \ rm -rf /opt/joshua/install +# install old fdbserver binaries and libfdb_c.so +# Skip these old versions: 6.2.30 6.2.29 6.2.28 6.2.27 6.2.26 6.2.25 6.2.24 6.2.23 6.2.22 6.2.21 6.2.20 6.2.19 6.2.18 6.2.17 6.2.16 6.2.15 6.2.10 6.1.13 6.1.12 6.1.11 6.1.10 6.0.18 6.0.17 6.0.16 6.0.15 6.0.14 5.2.8 5.2.7 5.1.7 5.1.6 +# because 7.3 no longer supports upgrade from these versions. ARG OLD_FDB_BINARY_DIR=/app/deploy/global_data/oldBinaries/ ARG FDB_VERSION="7.1.57" RUN if [ "$(uname -p)" == "x86_64" ]; then \ mkdir -p ${OLD_FDB_BINARY_DIR} \ /usr/lib/foundationdb/plugins && \ - # Skip these old versions 6.2.30 6.2.29 6.2.28 6.2.27 6.2.26 6.2.25 6.2.24 6.2.23 6.2.22 6.2.21 6.2.20 6.2.19 6.2.18 6.2.17 6.2.16 6.2.15 6.2.10 6.1.13 6.1.12 6.1.11 6.1.10 6.0.18 6.0.17 6.0.16 6.0.15 6.0.14 5.2.8 5.2.7 5.1.7 5.1.6 for old_fdb_server_version in 7.3.43 7.3.27 7.1.61 7.1.57 7.1.43 7.1.35 7.1.33 7.1.27 7.1.25 7.1.23 7.1.19 6.3.18 6.3.17 6.3.16 6.3.15 6.3.13 6.3.12 6.3.9; do \ curl -Ls --retry 5 --fail https://github.com/apple/foundationdb/releases/download/${old_fdb_server_version}/fdbserver.x86_64 -o ${OLD_FDB_BINARY_DIR}/fdbserver-${old_fdb_server_version}; \ done && \ chmod +x ${OLD_FDB_BINARY_DIR}/* && \ curl -Ls --retry 5 --fail https://github.com/apple/foundationdb/releases/download/${FDB_VERSION}/libfdb_c.x86_64.so -o /usr/lib64/libfdb_c_${FDB_VERSION}.so && \ - ln -s /usr/lib64/libfdb_c_${FDB_VERSION}.so /usr/lib64/libfdb_c.so && \ + ln -s /usr/lib64/libfdb_c_${FDB_VERSION}.so /usr/lib64/libfdb_c.so; \ fi # Download swift binaries ARG SWIFT_SIGNING_KEY=8A7495662C3CD4AE18D95637FAF6989E1BC16FEA ARG SWIFT_PLATFORM=centos -ARG OS_MAJOR_VER=9 +ARG OS_MAJOR_VER=7 ARG SWIFT_WEBROOT=https://download.swift.org/development ENV SWIFT_SIGNING_KEY=$SWIFT_SIGNING_KEY \ From a70da5b32107c776d4812410940db65d489b2c09 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 4 Dec 2024 22:37:29 -0800 Subject: [PATCH 3/5] Disable swift --- Dockerfile | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index 11fd27a..9ac1ab5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -95,26 +95,27 @@ ENV SWIFT_SIGNING_KEY=$SWIFT_SIGNING_KEY \ RUN echo "${SWIFT_WEBROOT}/latest-build.yml" # Note: Swift package details may need further investigation for Rocky Linux 9 -RUN if [ "$(uname -p)" == "x86_64" ]; then \ - set -e; \ - export $(curl -Ls ${SWIFT_WEBROOT}/latest-build.yml | grep 'download:' | sed 's/:[^:\/\/]/=/g') && \ - export $(curl -Ls ${SWIFT_WEBROOT}/latest-build.yml | grep 'download_signature:' | sed 's/:[^:\/\/]/=/g') && \ - export DOWNLOAD_DIR=$(echo $download | sed "s/-${OS_VER}.tar.gz//g") && \ - echo $DOWNLOAD_DIR > .swift_tag && \ - export GNUPGHOME="$(mktemp -d)" && \ - curl -fLs ${SWIFT_WEBROOT}/${DOWNLOAD_DIR}/${download} -o latest_toolchain.tar.gz && \ - curl -fLs ${SWIFT_WEBROOT}/${DOWNLOAD_DIR}/${download_signature} -o latest_toolchain.tar.gz.sig && \ - curl -fLs https://swift.org/keys/all-keys.asc | gpg --import - && \ - gpg --batch --verify latest_toolchain.tar.gz.sig latest_toolchain.tar.gz && \ - tar -xzf latest_toolchain.tar.gz --directory / --strip-components=1 && \ - chmod -R o+r /usr/lib/swift && \ - rm -rf "$GNUPGHOME" latest_toolchain.tar.gz.sig latest_toolchain.tar.gz; \ - fi +# swift: error while loading shared libraries: libncurses.so.5: cannot open shared object file: No such file or directory +#RUN if [ "$(uname -p)" == "x86_64" ]; then \ +# set -e; \ +# export $(curl -Ls ${SWIFT_WEBROOT}/latest-build.yml | grep 'download:' | sed 's/:[^:\/\/]/=/g') && \ +# export $(curl -Ls ${SWIFT_WEBROOT}/latest-build.yml | grep 'download_signature:' | sed 's/:[^:\/\/]/=/g') && \ +# export DOWNLOAD_DIR=$(echo $download | sed "s/-${OS_VER}.tar.gz//g") && \ +# echo $DOWNLOAD_DIR > .swift_tag && \ +# export GNUPGHOME="$(mktemp -d)" && \ +# curl -fLs ${SWIFT_WEBROOT}/${DOWNLOAD_DIR}/${download} -o latest_toolchain.tar.gz && \ +# curl -fLs ${SWIFT_WEBROOT}/${DOWNLOAD_DIR}/${download_signature} -o latest_toolchain.tar.gz.sig && \ +# curl -fLs https://swift.org/keys/all-keys.asc | gpg --import - && \ +# gpg --batch --verify latest_toolchain.tar.gz.sig latest_toolchain.tar.gz && \ +# tar -xzf latest_toolchain.tar.gz --directory / --strip-components=1 && \ +# chmod -R o+r /usr/lib/swift && \ +# rm -rf "$GNUPGHOME" latest_toolchain.tar.gz.sig latest_toolchain.tar.gz; \ +# fi # Print Installed Swift Version -RUN if [ "$(uname -p)" == "x86_64" ]; then \ - swift --version; \ - fi +# RUN if [ "$(uname -p)" == "x86_64" ]; then \ +# swift --version; \ +# fi ENV FDB_CLUSTER_FILE=/etc/foundationdb/fdb.cluster ENV AGENT_TIMEOUT=300 From 49174ff445f5f1deae8378462043d5b10bc3f24e Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 5 Dec 2024 11:30:55 -0800 Subject: [PATCH 4/5] Update agent scaler to use RockyLinux 9 --- k8s/agent-scaler/Dockerfile | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/k8s/agent-scaler/Dockerfile b/k8s/agent-scaler/Dockerfile index 9fe80b3..45f3d40 100644 --- a/k8s/agent-scaler/Dockerfile +++ b/k8s/agent-scaler/Dockerfile @@ -1,21 +1,18 @@ -FROM centos:7 +FROM rockylinux:9 ARG AGENT_TAG=joshua-agent:latest -# Install Python 3 and Mono -RUN yum repolist && \ - yum install -y \ - centos-release-scl-rh \ +RUN dnf -y update && \ + dnf install -y \ epel-release \ scl-utils \ yum-utils && \ - yum -y install \ + dnf -y install \ gettext \ - rh-python38 \ - rh-python38-python-pip && \ - yum -y clean all --enablerepo='*' && \ + python3-pip && \ + dnf -y clean all --enablerepo='*' && \ case $(uname -m) in \ - x86_64) curl -Ls https://dl.k8s.io/release/v1.20.0/bin/linux/amd64/kubectl -o /usr/local/bin/kubectl;; \ - aarch64) curl -Ls https://dl.k8s.io/release/v1.20.0/bin/linux/arm64/kubectl -o /usr/local/bin/kubectl;; \ + x86_64) curl -Ls https://dl.k8s.io/release/v1.27.5/bin/linux/amd64/kubectl -o /usr/local/bin/kubectl;; \ + aarch64) curl -Ls https://dl.k8s.io/release/v1.27.5/bin/linux/arm64/kubectl -o /usr/local/bin/kubectl;; \ *) echo "unsupported architecture for kubectl"; exit 1 ;; \ esac; \ chmod +x /usr/local/bin/kubectl @@ -31,7 +28,7 @@ RUN chmod +x \ /tools/joshua_model.py # libfdb_c.so -ARG FDB_VERSION="6.3.18" +ARG FDB_VERSION="7.1.57" RUN curl -Ls https://github.com/apple/foundationdb/releases/download/${FDB_VERSION}/libfdb_c.x86_64.so \ -o /lib64/libfdb_c.so && \ chmod +x /lib64/libfdb_c.so @@ -39,10 +36,9 @@ RUN curl -Ls https://github.com/apple/foundationdb/releases/download/${FDB_VERSI ENV LD_LIBRARY_PATH="/lib64:$LD_LIBRARY_PATH" # FDB python binding -RUN source /opt/rh/rh-python38/enable && \ - pip3 install \ - foundationdb==6.3.18 \ - boto3 +RUN pip3 install \ + foundationdb==7.1.57 \ + boto3 ENV BATCH_SIZE=1 ENV MAX_JOBS=10 @@ -52,4 +48,4 @@ ENV AGENT_TAG=${AGENT_TAG} ENV NAMESPACE=joshua # Entry point -ENTRYPOINT source /opt/rh/rh-python38/enable && /tools/agent-scaler.sh +ENTRYPOINT /tools/agent-scaler.sh From 6f228f0100c3a95a19736db346bc6cc65e7ffd87 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 5 Dec 2024 11:51:49 -0800 Subject: [PATCH 5/5] Use Rocky Linux 9.3 --- Dockerfile | 2 +- k8s/agent-scaler/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9ac1ab5..21adbab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM rockylinux:9 +FROM rockylinux:9.3 # this is joshua-agent WORKDIR /tmp diff --git a/k8s/agent-scaler/Dockerfile b/k8s/agent-scaler/Dockerfile index 45f3d40..91195ca 100644 --- a/k8s/agent-scaler/Dockerfile +++ b/k8s/agent-scaler/Dockerfile @@ -1,4 +1,4 @@ -FROM rockylinux:9 +FROM rockylinux:9.3 ARG AGENT_TAG=joshua-agent:latest RUN dnf -y update && \