Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Docker integ test with async API #1003

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docker/integ-test/.env
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ MASTER_UI_PORT=8080
MASTER_PORT=7077
UI_PORT=4040
SPARK_CONNECT_PORT=15002
PPL_JAR=../../ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar
FLINT_JAR=../../flint-spark-integration/target/scala-2.12/flint-spark-integration-assembly-0.7.0-SNAPSHOT.jar
PPL_JAR=./ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar
FLINT_JAR=./flint-spark-integration/target/scala-2.12/flint-spark-integration-assembly-0.7.0-SNAPSHOT.jar
SQL_APP_JAR=./spark-sql-application/target/scala-2.12/sql-job-assembly-0.7.0-SNAPSHOT.jar
OPENSEARCH_NODE_MEMORY=512m
OPENSEARCH_ADMIN_PASSWORD=C0rrecthorsebatterystaple.
OPENSEARCH_PORT=9200
Expand Down
102 changes: 85 additions & 17 deletions docker/integ-test/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
services:
metastore:
build: ./metastore
container_name: metastore
ports:
- "${THRIFT_PORT:-9083}:9083"
volumes:
- type: bind
source: ./metastore/hive-site.xml
target: /opt/apache-hive-2.3.9-bin/conf/hive-site.xml
- type: bind
source: ./metastore/hive-log4j2.properties
target: /opt/apache-hive-2.3.9-bin/conf/hive-log4j2.properties
networks:
- opensearch-net

spark:
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
container_name: spark
Expand All @@ -8,29 +23,35 @@ services:
- "${UI_PORT:-4040}:4040"
- "${SPARK_CONNECT_PORT}:15002"
entrypoint: /opt/bitnami/scripts/spark/master-entrypoint.sh
user: root
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@normanj-bitquill why is this mandatory ?

environment:
- SPARK_MODE=master
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
- SPARK_PUBLIC_DNS=localhost
- AWS_ENDPOINT_URL_S3=http://minio-S3
- OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
volumes:
- type: bind
source: ./spark-master-entrypoint.sh
source: ./spark/spark-master-entrypoint.sh
target: /opt/bitnami/scripts/spark/master-entrypoint.sh
- type: bind
source: ./spark-defaults.conf
source: ./spark/spark-defaults.conf
target: /opt/bitnami/spark/conf/spark-defaults.conf
- type: bind
source: ./log4j2.properties
source: ./spark/log4j2.properties
target: /opt/bitnami/spark/conf/log4j2.properties
- type: bind
source: $PPL_JAR
source: ../../$PPL_JAR
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
- type: bind
source: $FLINT_JAR
source: ../../$FLINT_JAR
target: /opt/bitnami/spark/jars/flint-spark-integration.jar
- type: bind
source: ./spark/s3.credentials
target: /opt/bitnami/spark/s3.credentials
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/"]
interval: 1m
Expand All @@ -40,6 +61,13 @@ services:
start_interval: 5s
networks:
- opensearch-net
depends_on:
metastore:
condition: service_started
opensearch:
condition: service_healthy
opensearch-dashboards:
condition: service_healthy

spark-worker:
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
Expand All @@ -56,33 +84,50 @@ services:
- SPARK_PUBLIC_DNS=localhost
volumes:
- type: bind
source: ./spark-defaults.conf
source: ./spark/spark-defaults.conf
target: /opt/bitnami/spark/conf/spark-defaults.conf
- type: bind
source: ./log4j2.properties
source: ./spark/log4j2.properties
target: /opt/bitnami/spark/conf/log4j2.properties
- type: bind
source: $PPL_JAR
source: ../../$PPL_JAR
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
- type: bind
source: $FLINT_JAR
source: ../../$FLINT_JAR
target: /opt/bitnami/spark/jars/flint-spark-integration.jar
networks:
- opensearch-net
depends_on:
- spark
metastore:
condition: service_started
spark:
condition: service_healthy

spark-submit:
build:
context: ../../
dockerfile: docker/integ-test/spark-submit/Dockerfile
args:
FLINT_JAR: ${FLINT_JAR}
PPL_JAR: ${PPL_JAR}
SQL_APP_JAR: ${SQL_APP_JAR}
depends_on:
metastore:
condition: service_completed_successfully

opensearch:
image: opensearchproject/opensearch:${OPENSEARCH_VERSION:-latest}
build: ./opensearch
container_name: opensearch
environment:
- cluster.name=opensearch-cluster
- node.name=opensearch
- discovery.seed_hosts=opensearch
- cluster.initial_cluster_manager_nodes=opensearch
- discovery.type=single-node
- bootstrap.memory_lock=true
- plugins.security.system_indices.enabled=false
- plugins.security.system_indices.permission.enabled=false
- plugins.security.ssl.http.enabled=false
- OPENSEARCH_JAVA_OPTS=-Xms${OPENSEARCH_NODE_MEMORY:-512m} -Xmx${OPENSEARCH_NODE_MEMORY:-512m}
- plugins.query.datasources.encryption.masterkey=9a515c99d4313f140a6607053502f4d6
- OPENSEARCH_JAVA_OPTS=-Xms${OPENSEARCH_NODE_MEMORY:-512m} -Xmx${OPENSEARCH_NODE_MEMORY:-512m} -DEMR_SERVERLESS_CLIENT_FACTORY_CLASS=org.opensearch.sql.spark.client.DockerEMRServerlessClientFactory
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
ulimits:
memlock:
Expand All @@ -92,12 +137,18 @@ services:
soft: 65536
hard: 65536
volumes:
- opensearch-data:/usr/share/opensearch/data
- type: volume
source: opensearch-data
target: /usr/share/opensearch/data
- type: bind
source: /var/run/docker.sock
target: /var/run/docker.sock
ports:
- ${OPENSEARCH_PORT:-9200}:9200
- 9600:9600
expose:
- "${OPENSEARCH_PORT:-9200}"
- "9300"
healthcheck:
test: ["CMD", "curl", "-f", "-u", "admin:${OPENSEARCH_ADMIN_PASSWORD}", "http://localhost:9200/_cluster/health"]
interval: 1m
Expand All @@ -107,6 +158,9 @@ services:
start_interval: 5s
networks:
- opensearch-net
depends_on:
minio:
condition: service_healthy

opensearch-dashboards:
image: opensearchproject/opensearch-dashboards:${DASHBOARDS_VERSION}
Expand All @@ -119,8 +173,16 @@ services:
OPENSEARCH_HOSTS: '["http://opensearch:9200"]'
networks:
- opensearch-net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5601/"]
interval: 1m
timeout: 5s
retries: 3
start_period: 30s
start_interval: 5s
depends_on:
- opensearch
opensearch:
condition: service_healthy

minio:
image: minio/minio
Expand All @@ -132,12 +194,18 @@ services:
- "9001:9001"
volumes:
- minio-data:/data
healthcheck:
test: ["CMD", "curl", "-q", "-f", "http://localhost:9000/minio/health/live"]
interval: 1m
timeout: 5s
retries: 3
start_period: 30s
start_interval: 5s
networks:
- opensearch-net

volumes:
opensearch-data:
minio-data:

networks:
opensearch-net:
23 changes: 23 additions & 0 deletions docker/integ-test/metastore/Dockerfile
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing license header

Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM openjdk:21-jdk-bookworm

WORKDIR /opt

ENV HADOOP_HOME=/opt/hadoop-3.3.4
ENV HIVE_HOME=/opt/apache-hive-2.3.9-bin

#RUN apt-get update
RUN curl -L https://archive.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz | tar zxf -
RUN curl -L https://archive.apache.org/dist/hadoop/common/hadoop-3.3.4/hadoop-3.3.4.tar.gz | tar zxf -
RUN cp $HADOOP_HOME/share/hadoop/client/hadoop-client-api-3.3.4.jar $HIVE_HOME/lib/
RUN cp $HADOOP_HOME/share/hadoop/client/hadoop-client-runtime-3.3.4.jar $HIVE_HOME/lib/
RUN cp $HADOOP_HOME/share/hadoop/tools/lib/hadoop-aws-3.3.4.jar $HIVE_HOME/lib/
RUN cp $HADOOP_HOME/share/hadoop/tools/lib/aws-java-sdk-bundle-1.12.262.jar $HIVE_HOME/lib/

RUN groupadd -f -r hive --gid=1000
RUN useradd -r -g hive --uid=1000 -d ${HIVE_HOME} hive
RUN chown hive:hive -R ${HIVE_HOME}

WORKDIR $HIVE_HOME
EXPOSE 9083
ENTRYPOINT ["/opt/apache-hive-2.3.9-bin/bin/hive", "--service", "metastore"]
USER hive
62 changes: 62 additions & 0 deletions docker/integ-test/metastore/hive-log4j2.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

status = INFO
name = HiveLog4j2
packages = org.apache.hadoop.hive.ql.log

# list of properties
property.hive.log.level = INFO
property.hive.root.logger = console
property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name}
property.hive.log.file = hive.log
property.hive.perflogger.log.level = INFO

# list of all appenders
appenders = console

# console appender
appender.console.type = Console
appender.console.name = console
appender.console.target = SYSTEM_ERR
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n

# list of all loggers
loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX, PerfLogger

logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn
logger.NIOServerCnxn.level = WARN

logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO
logger.ClientCnxnSocketNIO.level = WARN

logger.DataNucleus.name = DataNucleus
logger.DataNucleus.level = ERROR

logger.Datastore.name = Datastore
logger.Datastore.level = ERROR

logger.JPOX.name = JPOX
logger.JPOX.level = ERROR

logger.PerfLogger.name = org.apache.hadoop.hive.ql.log.PerfLogger
logger.PerfLogger.level = ${sys:hive.perflogger.log.level}

# root logger
rootLogger.level = ${sys:hive.log.level}
rootLogger.appenderRefs = root
rootLogger.appenderRef.root.ref = ${sys:hive.root.logger}
53 changes: 53 additions & 0 deletions docker/integ-test/metastore/hive-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>file:///tmp</value>
<description></description>
</property>
<property>
<name>fs.default.name</name>
<value>file:///tmp</value>
</property>
<property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:derby:;databaseName=metastore_db;create=true</value>
</property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.apache.derby.jdbc.EmbeddedDriver</value>
</property>
<property>
<name>datanucleus.schema.autoCreateTables</name>
<value>true</value>
</property>
<property>
<name>fs.s3a.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<value>true</value>
</property>
<property>
<name>fs.s3a.access.key</name>
<value>Vt7jnvi5BICr1rkfsheT</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>5NK3StGvoGCLUWvbaGN0LBUf9N6sjE94PEzLdqwO</value>
</property>
<property>
<name>fs.s3a.endpoint</name>
<value>http://minio-S3:9000</value>
</property>
<property>
<name>fs.s3a.connection.ssl.enabled</name>
<value>false</value>
</property>
</configuration>
41 changes: 41 additions & 0 deletions docker/integ-test/opensearch/Dockerfile
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing license header

Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
FROM opensearchproject/opensearch:latest

USER root

RUN mkdir /tmp/alter-emr-jar
WORKDIR /tmp/alter-emr-jar

ENV AWS_VERSION=1.12.651

RUN curl -O -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-emrserverless/${AWS_VERSION}/aws-java-sdk-emrserverless-${AWS_VERSION}.jar
RUN curl -O -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/${AWS_VERSION}/aws-java-sdk-core-${AWS_VERSION}.jar

COPY emr-src /tmp/alter-emr-jar/emr-src
WORKDIR /tmp/alter-emr-jar/emr-src
RUN /usr/share/opensearch/jdk/bin/javac -cp ../aws-java-sdk-emrserverless-${AWS_VERSION}.jar:../aws-java-sdk-core-${AWS_VERSION}.jar com/amazonaws/services/emrserverless/AWSEMRServerlessClientBuilder.java org/opensearch/spark/emrserverless/DockerEMRServerlessClient.java
RUN mkdir /tmp/alter-emr-jar/extracted
WORKDIR /tmp/alter-emr-jar/extracted
RUN /usr/share/opensearch/jdk/bin/jar -xf ../aws-java-sdk-emrserverless-${AWS_VERSION}.jar
RUN cp ../emr-src/com/amazonaws/services/emrserverless/AWSEMRServerlessClientBuilder.class com/amazonaws/services/emrserverless/
RUN mkdir -p org/opensearch/spark/emrserverless
RUN cp ../emr-src/org/opensearch/spark/emrserverless/DockerEMRServerlessClient.class org/opensearch/spark/emrserverless/
RUN /usr/share/opensearch/jdk/bin/jar -cfM /usr/share/opensearch/plugins/opensearch-sql/aws-java-sdk-emrserverless-*.jar META-INF/MANIFEST.MF *
RUN chown opensearch:opensearch /usr/share/opensearch/plugins/opensearch-sql/aws-java-sdk-emrserverless-*.jar
RUN rm -rf /tmp/alter-emr-jar

RUN yum install -y docker util-linux

COPY opensearch-docker-it-entrypoint.sh /usr/share/opensearch/opensearch-docker-it-entrypoint.sh
COPY docker-command-runner.sh /usr/share/opensearch/docker-command-runner.sh
COPY opensearch_security.policy /usr/share/opensearch/config/opensearch-performance-analyzer/opensearch_security.policy
COPY log4j2.properties /usr/share/opensearch/config/log4j2.properties

RUN chown opensearch:opensearch /usr/share/opensearch/config/opensearch-performance-analyzer/opensearch_security.policy
RUN chown opensearch:opensearch /usr/share/opensearch/config/log4j2.properties

WORKDIR /usr/share/opensearch
ENTRYPOINT ["./opensearch-docker-it-entrypoint.sh"]
CMD ["opensearch"]

EXPOSE 9200
EXPOSE 9300
Loading
Loading