Skip to content

Commit

Permalink
Construct Dockerfile that includes both pyspark and sparkR
Browse files Browse the repository at this point in the history
  • Loading branch information
Leemoonsoo committed Jun 23, 2020
1 parent d0e2cf4 commit 1e85abe
Showing 1 changed file with 18 additions and 8 deletions.
26 changes: 18 additions & 8 deletions .github/workflows/publish-docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,28 @@ jobs:
run: |-
# Remove -s option of tini. while gvisor does not support PR_SET_CHILD_SUBREAPER
sed -i 's/tini -s/tini/' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
# remove last line, 'USER command'. will add back
sed -i '/^USER/d' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
# add pyspark and sparkR library. so we have single image that supports pyspark and sparkR
awk '/RUN mkdir/,/COPY python\/lib/' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile >> resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
awk '/RUN mkdir/,/ENV R_HOME/' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile >> resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
# take Dockerfile before WORKDIR starts
awk '!p;/WORKDIR/{p=1}' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile | grep -v WORKDIR >> /tmp/Dockerfile
# and then append pyspark, sparkR requirement
awk '/RUN mkdir/,/COPY python\/lib/' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile >> /tmp/Dockerfile
awk '/RUN mkdir/,/ENV R_HOME/' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile >> /tmp/Dockerfile
# append Dockerfile from WORKDIR to ENTRYPOINT
awk '/WORKDIR/{p=1}/ENTRYPOINT/{print;p=0}p' resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile >> /tmp/Dockerfile
# Add passwd entry. otherwise, entrypoint.sh will shows 'Container ENTRYPOINT failed to add passwd entry for anonymous UID'
# and executor will fail with javax.security.auth.login.LoginException: java.lang.NullPointerException: invalid null input: name at com.sun.security.auth.UnixPrincipal.<init>(UnixPrincipal.java:71)
echo 'RUN groupadd --gid $spark_uid spark && useradd -ms /bin/bash spark --uid $spark_uid --gid $spark_uid && chown -R spark:spark /opt/spark/work-dir' >> resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
echo 'USER ${spark_uid}' >> resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
echo 'RUN groupadd --gid $spark_uid spark && useradd -ms /bin/bash spark --uid $spark_uid --gid $spark_uid && chown -R spark:spark /opt/spark/work-dir' >> /tmp/Dockerfile
echo 'USER ${spark_uid}' >> /tmp/Dockerfile
# rename
cat /tmp/Dockerfile
mv /tmp/Dockerfile resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
- name: Build distribution
run: |-
./dev/make-distribution.sh --name spark --pip --r --tgz -Psparkr -Phadoop-2.7 -Phive -Phive-thriftserver -Pkubernetes
./dev/make-distribution.sh --name spark --tgz -Psparkr -Phadoop-2.7 -Phive -Phive-thriftserver -Pkubernetes
env:
DEBIAN_FRONTEND: noninteractive
DEBCONF_NONINTERACTIVE_SEEN: true
Expand Down

0 comments on commit 1e85abe

Please sign in to comment.