diff --git a/.github/workflows/publish-docker-image.yml b/.github/workflows/publish-docker-image.yml index 81fcb8fdc62d1..552ef209f0cd2 100644 --- a/.github/workflows/publish-docker-image.yml +++ b/.github/workflows/publish-docker-image.yml @@ -37,18 +37,28 @@ jobs: run: |- # Remove -s option of tini. while gvisor does not support PR_SET_CHILD_SUBREAPER sed -i 's/tini -s/tini/' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh - # remove last line, 'USER command'. will add back - sed -i '/^USER/d' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile - # add pyspark and sparkR library. so we have single image that supports pyspark and sparkR - awk '/RUN mkdir/,/COPY python\/lib/' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile >> resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile - awk '/RUN mkdir/,/ENV R_HOME/' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile >> resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile + + # take Dockerfile before WORKDIR starts + awk '!p;/WORKDIR/{p=1}' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile | grep -v WORKDIR >> /tmp/Dockerfile + + # and then append pyspark, sparkR requirement + awk '/RUN mkdir/,/COPY python\/lib/' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile >> /tmp/Dockerfile + awk '/RUN mkdir/,/ENV R_HOME/' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile >> /tmp/Dockerfile + + # append Dockerfile from WORKDIR to ENTRYPOINT + awk '/WORKDIR/{p=1}/ENTRYPOINT/{print;p=0}p' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile >> /tmp/Dockerfile + # Add passwd entry. otherwise, entrypoint.sh will shows 'Container ENTRYPOINT failed to add passwd entry for anonymous UID' # and executor will fail with javax.security.auth.login.LoginException: java.lang.NullPointerException: invalid null input: name at com.sun.security.auth.UnixPrincipal.(UnixPrincipal.java:71) - echo 'RUN groupadd --gid $spark_uid spark && useradd -ms /bin/bash spark --uid $spark_uid --gid $spark_uid && chown -R spark:spark /opt/spark/work-dir' >> resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile - echo 'USER ${spark_uid}' >> resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile + echo 'RUN groupadd --gid $spark_uid spark && useradd -ms /bin/bash spark --uid $spark_uid --gid $spark_uid && chown -R spark:spark /opt/spark/work-dir' >> /tmp/Dockerfile + echo 'USER ${spark_uid}' >> /tmp/Dockerfile + + # rename + cat /tmp/Dockerfile + mv /tmp/Dockerfile resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile - name: Build distribution run: |- - ./dev/make-distribution.sh --name spark --pip --r --tgz -Psparkr -Phadoop-2.7 -Phive -Phive-thriftserver -Pkubernetes + ./dev/make-distribution.sh --name spark --tgz -Psparkr -Phadoop-2.7 -Phive -Phive-thriftserver -Pkubernetes env: DEBIAN_FRONTEND: noninteractive DEBCONF_NONINTERACTIVE_SEEN: true