Skip to content

Commit

Permalink
Merge pull request #15 from BCWResearch/pgo-rebase
Browse files Browse the repository at this point in the history
PGO
  • Loading branch information
patrick-bcw authored Jul 26, 2024
2 parents 7492d07 + 962bd8d commit 96e65bd
Show file tree
Hide file tree
Showing 10 changed files with 435 additions and 16 deletions.
165 changes: 161 additions & 4 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
push:
branches:
- develop
- deployment
- pgo-rebase

env:
DEFAULT_TAGS: |
Expand All @@ -16,12 +16,160 @@ env:
IMAGE_BASE_NAME: us.gcr.io/${{ secrets.GCP_PROJECT_ID }}

jobs:
build-pgo:
# if: ${{ false }} # disable for now
name: Build pgo docker image
runs-on: ubuntu-latest
permissions:
contents: 'read'
id-token: 'write'
steps:
- name: Checkout sources
uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Get docker image meta (pgo-worker)
id: pgo-worker-meta
uses: docker/metadata-action@v5
with:
images: ${{ env.IMAGE_BASE_NAME }}/zero-bin-pgo-worker
tags: ${{ env.DEFAULT_TAGS }}

- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/auth@v2
with:
project_id: ${{ secrets.GCP_PROJECT_ID }}
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_ID }}
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
token_format: 'access_token'
access_token_lifetime: '900s'

- name: Login to GCR
uses: docker/login-action@v3
with:
registry: us.gcr.io
username: oauth2accesstoken
password: ${{ steps.auth.outputs.access_token }}

- name: Build and push pgo worker
uses: docker/build-push-action@v5
with:
context: .
file: pgo-worker.Dockerfile
push: true
tags: ${{ steps.pgo-worker-meta.outputs.tags }}
labels: ${{ steps.pgo-worker-meta.outputs.labels }}
# cache-from: type=gha
# cache-to: type=gha,mode=max

deploy-pgo:
name: Deploy pgo-worker to GKE
# if: ${{ false }} # disable for now
runs-on: ubuntu-latest
needs: build-pgo
permissions:
contents: 'read'
id-token: 'write'
steps:
- name: Checkout sources
uses: actions/checkout@v4

- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/auth@v2
with:
project_id: ${{ secrets.GCP_PROJECT_ID }}
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_ID }}
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}

- name: Get GKE credentials
id: 'get-credentials'
uses: 'google-github-actions/get-gke-credentials@v2'
with:
cluster_name: 'immutable-prod'
location: 'us-central1'

- name: Install helm
uses: azure/[email protected]
with:
version: 3.14.3
id: install

- name: Deploy pgo-worker to GKE
id: deploy
run: |-
cd ./deploy/helm
helm template zero-bin ./zero-bin -f ./zero-bin/values.yaml --set hull.config.specific.version=sha-${GITHUB_SHA} --set hull.config.specific.workerImageName=zero-bin-pgo-worker -n zkevm | kubectl apply -f - -l app.kubernetes.io/component=worker
run-benchmark:
name: Run benchmarks tests
# if: ${{ false }} # disable for now
runs-on: gha-runner-set
permissions:
contents: 'read'
id-token: 'write'
needs: deploy-pgo
env:
USE_GKE_GCLOUD_AUTH_PLUGIN: true
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/auth@v2
with:
project_id: ${{ secrets.GCP_PROJECT_ID }}
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_ID }}
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}

- name: Set up Google Cloud SDK
uses: google-github-actions/setup-gcloud@v2
with:
install_components: 'gke-gcloud-auth-plugin'

- name: Get GKE credentials
id: 'get-credentials'
uses: 'google-github-actions/get-gke-credentials@v2'
with:
cluster_name: 'immutable-prod'
location: 'us-central1'
use_auth_provider: true

- uses: azure/setup-kubectl@v3
with:
version: 'v1.28.2'

- name: Run benchmark script
env:
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
GCP_WORKLOAD_IDENTITY_ID: ${{ secrets.GCP_WORKLOAD_IDENTITY_ID }}
GCP_SERVICE_ACCOUNT: ${{ secrets.GCP_SERVICE_ACCOUNT }}
MACHINE_TYPE: "t2d-standard-60"
NUM_WORKERS: 1
CPU: 55
MEMORY: 32Gi
BLOCK_START: 177
BLOCK_END: 177
OTHER_ARGS: "pgo.test.run"
RPC_ENDPOINT: "INTERNAL_RPC"
CPU_THRESHOLD: 1200
run: |-
export USE_GKE_GCLOUD_AUTH_PLUGIN=True
gcloud container clusters get-credentials immutable-prod --project ${{ secrets.GCP_PROJECT_ID }} --region us-central1
./tools/run-benchmark.sh ${{ env.MACHINE_TYPE }} ${{ env.NUM_WORKERS }} ${{ env.CPU }} ${{ env.CPU }} ${{ env.MEMORY }} ${{ env.MEMORY }} ${{ env.BLOCK_START }} ${{ env.BLOCK_END }} ${{ env.OTHER_ARGS }} ${{ env.RPC_ENDPOINT }}
kubectl scale --replicas=0 deployment/zero-bin-worker -n zkevm
build:
name: Build docker images
runs-on: ubuntu-latest
permissions:
contents: 'read'
id-token: 'write'
needs: run-benchmark
steps:
- name: Checkout sources
uses: actions/checkout@v4
Expand All @@ -36,7 +184,7 @@ jobs:
images: ${{ env.IMAGE_BASE_NAME }}/zero-bin-leader
tags: ${{ env.DEFAULT_TAGS }}

- name: Get docker image meta (worker)
- name: Get docker image meta (worker optimized)
id: worker-meta
uses: docker/metadata-action@v5
with:
Expand All @@ -51,7 +199,16 @@ jobs:
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_ID }}
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
token_format: 'access_token'
access_token_lifetime: '900s'
access_token_lifetime: '1800s'

- name: Set up Google Cloud SDK
uses: google-github-actions/setup-gcloud@v1

- name: Pull pgo file from GCS
run: |-
mkdir -p ./target/pgo-profiles
gsutil cp $(gsutil -m ls -l gs://zkevm-csv/profiles | grep $(date -I) | sed 's/.*\(gs:\/\/\)/\1/' | tail -n -1) ./target/pgo-profiles
ls -lh ./target/pgo-profiles
- name: Login to GCR
uses: docker/login-action@v3
Expand All @@ -64,7 +221,7 @@ jobs:
uses: docker/build-push-action@v5
with:
context: .
file: worker.Dockerfile
file: optimized-worker.Dockerfile
push: true
tags: ${{ steps.worker-meta.outputs.tags }}
labels: ${{ steps.worker-meta.outputs.labels }}
Expand Down
7 changes: 3 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ num = "0.4.3"
num-bigint = "0.4.5"
num-traits = "0.2.19"
once_cell = "1.19.0"
paladin-core = "0.4.2"
paladin-core = { git = "https://github.com/julian-bcw/paladin-bcw", rev = "b6fa8d01d5b56d0e3257894dfa9052f4fc2af76f" }
parking_lot = "0.12.3"
paste = "1.0.15"
pest = "2.7.10"
Expand Down
5 changes: 4 additions & 1 deletion deploy/helm/zero-bin/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ hull:
fullnameOverride: zero-bin
specific:
version: develop
workerImageName: zero-bin-worker
objects:
serviceaccount:
default:
Expand Down Expand Up @@ -98,7 +99,7 @@ hull:
containers:
worker:
image:
repository: us.gcr.io/immutable-418115/zero-bin-worker
repository: _HT!us.gcr.io/immutable-418115/{{ (index . "$").Values.hull.config.specific.workerImageName }}
tag: _HT*hull.config.specific.version
imagePullPolicy: Always
env:
Expand All @@ -108,6 +109,8 @@ hull:
staticName: true
name: rmq-credentials
key: amqp_uri
GCS_UPLOAD_DIR:
value: profiles/
RUST_LOG:
value: info
RUST_BACKTRACE:
Expand Down
45 changes: 45 additions & 0 deletions optimized-worker.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
FROM rustlang/rust:nightly-bullseye-slim as builder

# Install jemalloc
RUN apt-get update && apt-get install -y libjemalloc2 libjemalloc-dev make libssl-dev pkg-config

# Install llvm-tools-preview (requirement for cargo-pgo to compile an optimized binary)
RUN rustup component add llvm-tools-preview

# Install cargo-pgo, used for building an optimized binary from the gathered profiling data
RUN cargo install cargo-pgo

RUN mkdir -p zero_bin
COPY Cargo.toml .
# Cleanup all workspace members and add selected crates again
RUN sed -i '/members =/{:a;N;/]/!ba};//d' Cargo.toml
RUN sed -i 's#\[workspace\]#\[workspace\]\nmembers = \["zero_bin\/worker", "zero_bin\/common", "zero_bin\/ops"\, "evm_arithmetization", "mpt_trie", "proc_macro"\]#' Cargo.toml
COPY Cargo.lock .
COPY ./rust-toolchain.toml ./

COPY proof_gen proof_gen
COPY mpt_trie mpt_trie
COPY evm_arithmetization evm_arithmetization
COPY proc_macro proc_macro
COPY zero_bin/common zero_bin/common
COPY zero_bin/ops zero_bin/ops
COPY zero_bin/worker zero_bin/worker

RUN \
touch zero_bin/common/src/lib.rs && \
touch zero_bin/ops/src/lib.rs && \
touch zero_bin/worker/src/main.rs && \
touch evm_arithmetization/src/lib.rs && \
touch mpt_trie/src/lib.rs && \
touch proc_macro/src/lib.rs

ENV RUSTFLAGS='-C target-cpu=native -Zlinker-features=-lld'

COPY ./target/pgo-profiles/*.profraw ./target/pgo-profiles/

RUN cargo pgo optimize build -- --bin worker

FROM debian:bullseye-slim
RUN apt-get update && apt-get install -y ca-certificates libjemalloc2 make libssl-dev
COPY --from=builder ./target/x86_64-unknown-linux-gnu/release/worker /usr/local/bin/worker
CMD ["worker"]
67 changes: 67 additions & 0 deletions pgo-worker.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# NOTE: only a single worker needs to be deployed in k8s for this step.

# BONUS NOTE: recommended block=4825, checkpoint=4824
# but this will need to be specified by the coordinator

FROM rustlang/rust:nightly-bullseye-slim as builder

RUN apt-get update && apt-get install -y libjemalloc2 libjemalloc-dev make libssl-dev pkg-config

# Install cargo-pgo, used for building a binary with profiling enabled
RUN cargo install cargo-pgo

RUN mkdir -p zero_bin
COPY Cargo.toml .
# Cleanup all workspace members and add selected crates again
RUN sed -i '/members =/{:a;N;/]/!ba};//d' Cargo.toml
RUN sed -i 's#\[workspace\]#\[workspace\]\nmembers = \["zero_bin\/worker", "zero_bin\/common", "zero_bin\/ops"\, "evm_arithmetization", "mpt_trie", "proc_macro"\]#' Cargo.toml
COPY Cargo.lock .
COPY ./rust-toolchain.toml ./

COPY pgo_worker_wrapper.py ./zero_bin/pgo_worker_wrapper.py

COPY proof_gen proof_gen
COPY mpt_trie mpt_trie
COPY evm_arithmetization evm_arithmetization
COPY proc_macro proc_macro
COPY zero_bin/common zero_bin/common
COPY zero_bin/ops zero_bin/ops
COPY zero_bin/worker zero_bin/worker

RUN \
touch zero_bin/common/src/lib.rs && \
touch zero_bin/ops/src/lib.rs && \
touch zero_bin/worker/src/main.rs && \
touch evm_arithmetization/src/lib.rs && \
touch mpt_trie/src/lib.rs && \
touch proc_macro/src/lib.rs

# Disable the lld linker for now, as it's causing issues with the linkme package.
# https://github.com/rust-lang/rust/pull/124129
# https://github.com/dtolnay/linkme/pull/88
ENV RUSTFLAGS='-C target-cpu=native -Zlinker-features=-lld'

RUN cargo pgo build -- --bin worker

# NOTE: cannot use a separate runtime environment, because the pgo-binary doesn't seem to be generating its profiling data (found during testing).
#FROM debian:bullseye-slim
#RUN apt-get update && apt-get install -y ca-certificates libjemalloc2
#COPY --from=builder ./target/x86_64-unknown-linux-gnu/release/worker /usr/local/bin/worker
#COPY pgo_worker_wrapper.py /usr/local/bin/pgo_worker_wrapper.py

# Install python3 and pip for the wrapper script
RUN apt-get install -y python3 python3-pip

# Install the google-cloud-storage dependency for the wrapper script
RUN pip3 install google-cloud-storage

# NOTE: the bucket name should be set WITHOUT the `gs://` prefix
# BONUS NOTE: should we create a different bucket just for .profraw files?
ENV GCS_UPLOAD_BUCKET=zkevm-csv
ENV WORKER_PATH=./target/x86_64-unknown-linux-gnu/release/worker
ENV PROFILE_DIRECTORY=./target/pgo-profiles/
# run the python wrapper, which will:
# 1. execute the pgo-worker binary
# 2. wait to receive a signal (either SIGTERM or SIGKILL), then sends a SIGTERM to the pgo-worker binary
# 3. upload the created pgo .profraw file to GCS
CMD ["python3", "zero_bin/pgo_worker_wrapper.py"]
Loading

0 comments on commit 96e65bd

Please sign in to comment.