Merge pull request #169 from julienrf/update-ansible-playbook

Update Ansible playbook
scylladb · Jul 24, 2024 · 169479c · 169479c
2 parents 8389e93 + b123c9e
commit 169479c
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 61 deletions.
diff --git a/ansible/scylla-migrator.yml b/ansible/scylla-migrator.yml
@@ -19,8 +19,8 @@
       become: true
       package: name={{ item }} state=present
       with_items:
-        - openjdk-8-jre
-        - openjdk-8-jdk
+        - openjdk-17-jre
+        - openjdk-17-jdk
         - unzip
         - python3-pip
 
@@ -64,32 +64,6 @@
         state: absent
         path: "{{ home_dir }}/aws"
 
-    - name: Download sbt
-      ansible.builtin.get_url:
-        url: https://github.com/coursier/coursier/releases/latest/download/cs-x86_64-pc-linux.gz
-        dest: "{{ home_dir }}"
-
-    - name: unarchive sbt
-      shell: gzip -d cs-x86_64-pc-linux.gz
-
-    - name: Delete /tmp/cs-x86_64-pc-linux.gz
-      ansible.builtin.file:
-        state: absent
-        path: "{{ home_dir }}/cs-x86_64-pc-linux.gz"
-
-    - name: rename cs-x86_64-pc-linux to cs
-      become: yes
-      shell: mv cs-x86_64-pc-linux cs
-
-    - name: Change permission on sbt
-      file:
-        path: cs
-        state: file
-        mode: 0755
-
-    - name: setup sbt
-      shell: "{{ home_dir }}/cs setup -y"
-
     - name: Download spark
       get_url: 
         url: https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3-scala2.13.tgz
@@ -115,7 +89,7 @@
       ansible.builtin.lineinfile:
         path: "{{ home_dir }}/.profile"
         regexp: '^JAVA_HOME='
-        line: export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
+        line: export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
 
     - name: Add JAVA_HOME to PATH
       ansible.builtin.lineinfile:
@@ -180,15 +154,15 @@
     home_dir: /home/ubuntu
     spark_home: /opt/spark
   environment:
-    JAVA_HOME: /usr/lib/jvm/java-8-openjdk-amd64
+    JAVA_HOME: /usr/lib/jvm/java-17-openjdk-amd64
     SPARK_HOME: /opt/spark
-    PATH: "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/home/ubuntu/.local/share/coursier/bin:/usr/lib/jvm/java-8-openjdk-amd64:/opt/spark/bin:/opt/spark/sbin"
+    PATH: "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/home/ubuntu/.local/share/coursier/bin:/usr/lib/jvm/java-17-openjdk-amd64:/opt/spark/bin:/opt/spark/sbin"
 
   tasks:
-    - name: Clone scylla-migrator repo
-      ansible.builtin.git:
-        repo: https://github.com/scylladb/scylla-migrator.git
-        dest: "{{ home_dir }}/scylla-migrator"
+    - name: Create scylla-migrator directory
+      ansible.builtin.file:
+        path: "{{ home_dir }}/scylla-migrator"
+        state: directory
 
     - name: copy master start/stop convenience scripts
       copy:
@@ -225,13 +199,8 @@
         - spark-env
         - config.dynamodb.yml
 
-    - name: build scylla-migrator
-      shell: "./build.sh"
-      register: build
-      args:
-        chdir: "{{ home_dir }}/scylla-migrator"
-
-    - debug: msg="{{ build.stdout }}"
-
-    - debug: msg="{{ build.stderr }}"
+    - name: download scylla-migrator
+      get_url:
+        url: https://github.com/scylladb/scylla-migrator/releases/latest/download/scylla-migrator-assembly.jar
+        dest: "{{ home_dir }}/scylla-migrator/scylla-migrator-assembly.jar"
 ...
diff --git a/ansible/templates/submit-alternator-job.sh b/ansible/templates/submit-alternator-job.sh
@@ -14,4 +14,4 @@ time spark-submit --class com.scylladb.migrator.Migrator \
 --conf spark.scylla.config=/home/ubuntu/scylla-migrator/config.dynamodb.yml \
 --conf spark.executor.memory=$MEMORY \
 --conf spark.driver.memory=64G \
-/home/ubuntu/scylla-migrator/migrator/target/scala-2.13/scylla-migrator-assembly.jar
+/home/ubuntu/scylla-migrator/scylla-migrator-assembly.jar
diff --git a/ansible/templates/submit-cql-job-validator.sh b/ansible/templates/submit-cql-job-validator.sh
@@ -15,4 +15,4 @@ time spark-submit --class com.scylladb.migrator.Validator \
   --num-executors $SPARK_WORKER_INSTANCES \
   --executor-memory $MEMORY \
   --conf spark.cassandra.connection.localConnectionsPerExecutor=4 \
-  migrator/target/scala-2.13/scylla-migrator-assembly.jar
+  /home/ubuntu/scylla-migrator/scylla-migrator-assembly.jar
diff --git a/ansible/templates/submit-cql-job.sh b/ansible/templates/submit-cql-job.sh
@@ -15,7 +15,7 @@ time spark-submit --class com.scylladb.migrator.Migrator \
   --num-executors $SPARK_WORKER_INSTANCES \
   --executor-memory $MEMORY \
   --conf spark.cassandra.connection.localConnectionsPerExecutor=4 \
-  migrator/target/scala-2.13/scylla-migrator-assembly.jar
+  /home/ubuntu/scylla-migrator/scylla-migrator-assembly.jar
 
 #sometimes you will need a tuning for driver memory size
 #add this config to above to tune it:
@@ -32,6 +32,6 @@ time spark-submit --class com.scylladb.migrator.Migrator \
 #  --executor-memory $MEMORY \
 #  --conf "spark.executor.extraJavaOptions=-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=64000 -XX:+HeapDumpOnOutOfMemoryError" \
 #  --conf spark.cassandra.connection.localConnectionsPerExecutor=4 \
-#  migrator/target/scala-2.13/scylla-migrator-assembly.jar
+#  /home/ubuntu/scylla-migrator/scylla-migrator-assembly.jar
 
 #-XX:+HeapDumpOnOutOfMemoryError -XX:+PrintGCDetails
diff --git a/docs/source/getting-started/ansible.rst b/docs/source/getting-started/ansible.rst
@@ -2,46 +2,55 @@
 Set Up a Spark Cluster with Ansible
 ===================================
 
-An `Ansible <https://www.ansible.com/>`_ playbook is provided in the `ansible folder <https://github.com/scylladb/scylla-migrator/tree/master/ansible>`_ of our Git repository.  The Ansible playbook will install the pre-requisites, Spark, on the master and workers added to the ``ansible/inventory/hosts`` file.  Scylla-migrator will be installed on the spark master node.
+An `Ansible <https://www.ansible.com/>`_ playbook is provided in the `ansible folder <https://github.com/scylladb/scylla-migrator/tree/master/ansible>`_ of our Git repository. The Ansible playbook will install the pre-requisites, Spark, on the master and workers added to the ``ansible/inventory/hosts`` file.  Scylla-migrator will be installed on the spark master node.
 
-1. Update ``ansible/inventory/hosts`` file with master and worker instances
-2. Update ``ansible/ansible.cfg`` with location of private key if necessary
-3. The ``ansible/template/spark-env-master-sample`` and ``ansible/template/spark-env-worker-sample`` contain environment variables determining number of workers, CPUs per worker, and memory allocations - as well as considerations for setting them.
-4. run ``ansible-playbook scylla-migrator.yml``
-5. On the Spark master node:
+The Ansible playbook expects to be run in an Ubuntu environment where the directory ``/home/ubuntu`` already exists.
+
+1. Clone the Migrator Git repository:
+
+   .. code-block:: bash
+
+     git clone https://github.com/scylladb/scylla-migrator.git
+     cd scylla-migrator/ansible
+
+2. Update ``ansible/inventory/hosts`` file with master and worker instances
+3. Update ``ansible/ansible.cfg`` with location of private key if necessary
+4. The ``ansible/template/spark-env-master-sample`` and ``ansible/template/spark-env-worker-sample`` contain environment variables determining number of workers, CPUs per worker, and memory allocations - as well as considerations for setting them.
+5. run ``ansible-playbook scylla-migrator.yml``
+6. On the Spark master node:
 
    .. code-block:: bash
 
      cd scylla-migrator
      ./start-spark.sh
 
-6. On the Spark worker nodes:
+7. On the Spark worker nodes:
 
    .. code-block:: bash
 
      ./start-slave.sh
 
-7. Open Spark web console
+8. Open Spark web console
 
    - Ensure networking is configured to allow you access spark master node via TCP ports 8080 and 4040
    - visit ``http://<spark-master-hostname>:8080``
 
-8. `Review and modify config.yaml <../#configure-the-migration>`_ based whether you're performing a migration to CQL or Alternator
+9. `Review and modify config.yaml <../#configure-the-migration>`_ based whether you're performing a migration to CQL or Alternator
 
    - If you're migrating to ScyllaDB CQL interface (from Apache Cassandra, ScyllaDB, or other CQL source), make a copy review the comments in ``config.yaml.example``, and edit as directed.
    - If you're migrating to Alternator (from DynamoDB or other ScyllaDB Alternator), make a copy, review the comments in ``config.dynamodb.yml``, and edit as directed.
 
-9. As part of ansible deployment, sample submit jobs were created.  You may edit and use the submit jobs.
+10. As part of ansible deployment, sample submit jobs were created.  You may edit and use the submit jobs.
 
    - For CQL migration: edit ``scylla-migrator/submit-cql-job.sh``, change line ``--conf spark.scylla.config=config.yaml \`` to point to the whatever you named the ``config.yaml`` in previous step.
    - For Alternator migration: edit ``scylla-migrator/submit-alternator-job.sh``, change line ``--conf spark.scylla.config=/home/ubuntu/scylla-migrator/config.dynamodb.yml \`` to reference the ``config.yaml`` file you created and modified in previous step.
 
-10. Ensure the table has been created in the target environment.
-11. Submit the migration by submitting the appropriate job
+11. Ensure the table has been created in the target environment.
+12. Submit the migration by submitting the appropriate job
 
     - CQL migration: ``./submit-cql-job.sh``
     - Alternator migration: ``./submit-alternator-job.sh``
 
-12. You can monitor progress by observing the Spark web console you opened in step 7. Additionally, after the job has started, you can track progress via ``http://<spark-master-hostname>:4040``.
+13. You can monitor progress by observing the Spark web console you opened in step 7. Additionally, after the job has started, you can track progress via ``http://<spark-master-hostname>:4040``.
 
     FYI: When no Spark jobs are actively running, the Spark progress page at port 4040 displays unavailable. It is only useful and renders when a Spark job is in progress.