Regenerate.

RumbleDB · Jul 10, 2024 · b4a49cb · b4a49cb
2 parents e124a2f + 2ed0c2c
commit b4a49cb
Show file tree

Hide file tree

Showing 445 changed files with 74,438 additions and 3,481 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -1,46 +1,107 @@
 image: marioarduini/rumble-source:2020-11-23
 
-build-rumble:
+stages:
+  - build
+  - tests2
+  - tests3
+
+Build:
   stage: build
   artifacts:
     paths:
       - target/
   script:
-    - ant -buildfile build_antlr_parser.xml generate-parser -Dantlr.jar=lib/antlr-4.7-complete.jar
+    - ant -buildfile build_antlr_parser.xml generate-parser -Dantlr.jar=lib/antlr-4.9.3-complete.jar
     - mvn clean compile assembly:single
 
-javaapi-test:
-  stage: test
+SparkRuntimeTest:
+  stage: tests2
+  script:
+    - mvn -Dtest=SparkRuntimeTests test
+
+SparkRuntimeTestsNativeDeactivated:
+  stage: tests2
+  script:
+    - mvn -Dtest=SparkRuntimeTestsNativeDeactivated test
+
+SparkRuntimeTestsDataFramesDeactivated:
+  stage: tests2
+  script:
+    - mvn -Dtest=SparkRuntimeTestsDataFramesDeactivated test
+
+SparkRuntimeTestsParallelismDeactivated:
+  stage: tests2
+  script:
+    - mvn -Dtest=SparkRuntimeTestsParallelismDeactivated test
+
+JavaAPITest:
+  stage: tests3
   script:
     - mvn -Dtest=JavaAPITest test
 
-frontend-test:
-  stage: test
+FrontendTests:
+  stage: tests3
   script:
     - mvn -Dtest=FrontendTests test
-    
-runtime-test:
-  stage: test
+
+RuntimeTests:
+  stage: tests3
   script:
     - mvn -Dtest=RuntimeTests test
 
-sparkruntime-test:
-  stage: test
+RuntimeTestsNoParallelism:
+  stage: tests3
   script:
-    - mvn -Dtest=SparkRuntimeTests test
+    - mvn -Dtest=RuntimeTestsNoParallelism test
 
-nativeflworruntime-test:
-  stage: test
+RuntimeTestsNoInlining:
+  stage: tests3
+  script:
+    - mvn -Dtest=RuntimeTestsNoInlining test
+
+NativeFLWORRuntimeTests:
+  stage: tests3
   script:
     - mvn -Dtest=NativeFLWORRuntimeTests test
 
-statictyping-test:
-  stage: test
+NativeFLWORRuntimeTestsNativeDeactivated:
+  stage: tests3
+  script:
+    - mvn -Dtest=NativeFLWORRuntimeTestsNativeDeactivated test
+
+NativeFLWORRuntimeTestsDataFramesDeactivated:
+  stage: tests3
+  script:
+    - mvn -Dtest=NativeFLWORRuntimeTestsDataFramesDeactivated test
+
+NativeFLWORRuntimeTestsParallelismDeactivated:
+  stage: tests3
+  script:
+    - mvn -Dtest=NativeFLWORRuntimeTestsParallelismDeactivated test
+
+StaticTypingTest:
+  stage: tests3
   script:
     - mvn -Dtest=StaticTypeTests test
 
-spotless-test:
-  stage: test
+SpotlessTest:
+  stage: tests3
   script:
     - mvn spotless:check
 
+MLTests:
+  stage: tests3
+  artifacts:
+    name: "ML Tests log"
+    paths:
+      - target/ml_test.log
+    when:
+      always
+    expire_in: 2 days
+  script:
+    - mvn -Dtest=MLTests test --log-file target/ml_test.log
+
+MLTestsNativeDeactivated:
+  stage: tests3
+  script:
+    - mvn -Dtest=MLTestsNativeDeactivated test
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -0,0 +1,19 @@
+# Read the Docs configuration file for MkDocs projects
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the version of Python and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.12"
+
+mkdocs:
+  configuration: mkdocs.yml
+
+# Optionally declare the Python requirements required to build your docs
+#python:
+#  install:
+#  - requirements: docs/requirements.txt
diff --git a/build_antlr_parser.xml b/build_antlr_parser.xml
@@ -25,7 +25,7 @@
     <!-- <property name="src.dir" value="${basedir}/src/main/java/sparksoniq"/> prepends full path as comment-->
     <property name="src.dir" value="./src/main/java/org/rumbledb"/>
     <property name="parser.dir" value="${src.dir}/parser"/>
-    <property name="antlr.jar" value="./lib/antlr-4.7-complete.jar"/>
+    <property name="antlr.jar" value="./lib/antlr-4.9.3-complete.jar"/>
 
 
     <target name="clean-parser">

diff --git a/build_xquery_antlr_parser.xml b/build_xquery_antlr_parser.xml
@@ -24,7 +24,7 @@
     <!-- Set project properties. -->
     <property name="src.dir" value="./src/main/java/org/rumbledb"/>
     <property name="parser.dir" value="${src.dir}/parser"/>
-    <property name="antlr.jar" value="/lib/antlr-4.7-complete.jar"/>
+    <property name="antlr.jar" value="/lib/antlr-4.9.3-complete.jar"/>
 
     <target name="clean-xquery-parser">
         <delete file="/org/rumbledb/parser/XQueryParser.tokens"/>

diff --git a/docs/Docker.md b/docs/Docker.md
@@ -2,7 +2,7 @@
 
 ## Known issue
 
-On occasion, the docker version of RumbleDB used to throw a Kryo NoSuchMethodError on some systems. This should be fixed with version 1.21.0, let us know if this is not the case.
+On occasion, the docker version of RumbleDB used to throw a Kryo NoSuchMethodError on some systems. This should be fixed with version 1.22.0, let us know if this is not the case.
 
 You can upgrade to the newest version with
 
@@ -29,7 +29,7 @@ The RumbleDB shell appears:
         ____                  __    __     ____  ____ 
        / __ \__  ______ ___  / /_  / /__  / __ \/ __ )
       / /_/ / / / / __ `__ \/ __ \/ / _ \/ / / / __  |  The distributed JSONiq engine
-     / _, _/ /_/ / / / / / / /_/ / /  __/ /_/ / /_/ /   1.21.0 "Hawthorn blossom" beta
+     / _, _/ /_/ / / / / / / /_/ / /  __/ /_/ / /_/ /   1.22.0 "Pyrenean oak" beta
     /_/ |_|\__,_/_/ /_/ /_/_.___/_/\___/_____/_____/  
 
 

diff --git a/docs/FAQ.md b/docs/FAQ.md
@@ -10,15 +10,15 @@ If you run RumbleDB with a standalone jar, then your laptop will allocate by def
 
 In order to increase the memory, you can use `-Xmx10g` (for 10 GB, but you can use any other value):
 
-    java -jar -Xmx10g rumbledb-1.21.0-standalone.jar ...
+    java -jar -Xmx10g rumbledb-1.22.0-standalone.jar ...
 
 If you run RumbleDB on your laptop (or a single machine) with the thin jar, then by default this is limited to around 2 GB, and you can change this with `--driver-memory`:
 
-    spark-submit --driver-memory 10G rumbledb-1.21.0-for-spark-3.1.jar ...
+    spark-submit --driver-memory 10G rumbledb-1.22.0-for-spark-3.1.jar ...
 
 If you run RumbleDB on a cluster, then the memory needs to be allocated to the executors, not the driver:
 
-    spark-submit --executor-memory 10G rumbledb-1.21.0-for-spark-3.1.jar ...
+    spark-submit --executor-memory 10G rumbledb-1.22.0-for-spark-3.1.jar ...
 
 Setting things up on a cluster requires more thinking because setting the executor memory should be done in conjunction with setting the total number of executors and the number of cores per executor. This highly depends on your cluster hardware.
 

diff --git a/docs/Getting started.md b/docs/Getting started.md
@@ -35,17 +35,17 @@ Do make sure it is not Java 17, which will not work.
 
 RumbleDB is just a download and no installation is required.
 
-In order to run RumbleDB, you simply need to download rumbledb-1.21.0-standalone.jar from the [download page](https://github.com/RumbleDB/rumble/releases) and put it in a directory of your choice, for example, right besides your data.
+In order to run RumbleDB, you simply need to download rumbledb-1.22.0-standalone.jar from the [download page](https://github.com/RumbleDB/rumble/releases) and put it in a directory of your choice, for example, right besides your data.
 
 Make sure to use the corresponding jar name accordingly in all our instructions in lieu of rumbledb.jar.
 
 You can test that it works with:
 
-    java -jar rumbledb-1.21.0-standalone.jar run -q '1+1'
+    java -jar rumbledb-1.22.0-standalone.jar run -q '1+1'
 
 or launch a JSONiq shell with:
 
-    java -jar rumbledb-1.21.0-standalone.jar repl
+    java -jar rumbledb-1.22.0-standalone.jar repl
 
 If you run out of memory, you can set allocate more memory to Java with an additional Java parameter, e.g., -Xmx10g
 
@@ -111,9 +111,9 @@ Like Spark, RumbleDB is just a download and no installation is required.
 
 In order to run RumbleDB, you simply need to download one of the small .jar files from the [download page](https://github.com/RumbleDB/rumble/releases) and put it in a directory of your choice, for example, right besides your data.
 
-If you use Spark 3.2+, use rumbledb-1.21.0-for-spark-3.2.jar.
+If you use Spark 3.2+, use rumbledb-1.22.0-for-spark-3.2.jar.
 
-If you use Spark 3.3+, use rumbledb-1.21.0-for-spark-3.3.jar.
+If you use Spark 3.3+, use rumbledb-1.22.0-for-spark-3.3.jar.
 
 These jars do not embed Spark, since you chose to set it up separately. They will work with your Spark installation with the spark-submit command.
 
@@ -158,7 +158,7 @@ The RumbleDB shell appears:
         ____                  __    __     ____  ____ 
        / __ \__  ______ ___  / /_  / /__  / __ \/ __ )
       / /_/ / / / / __ `__ \/ __ \/ / _ \/ / / / __  |  The distributed JSONiq engine
-     / _, _/ /_/ / / / / / / /_/ / /  __/ /_/ / /_/ /   1.21.0 "Hawthorn blossom" beta
+     / _, _/ /_/ / / / / / / /_/ / /  __/ /_/ / /_/ /   1.22.0 "Pyrenean oak" beta
     /_/ |_|\__,_/_/ /_/ /_/_.___/_/\___/_____/_____/  
 
 

diff --git a/docs/HTTPServer.md b/docs/HTTPServer.md
@@ -4,7 +4,7 @@
 
 RumbleDB can be run as an HTTP server that listens for queries. In order to do so, you can use the --server and --port parameters:
 
-    spark-submit rumbledb-1.21.0.jar serve -p 8001
+    spark-submit rumbledb-1.22.0.jar serve -p 8001
 
 This command will not return until you force it to (Ctrl+C on Linux and Mac). This is because the server has to run permanently to listen to incoming requests.
 
@@ -69,19 +69,19 @@ Then there are two options
 - Connect to the master with SSH with an extra parameter for securely tunneling the HTTP connection (for example `-L 8001:localhost:8001` or any port of your choosing)
 - Download the RumbleDB jar to the master node
 
-    wget https://github.com/RumbleDB/rumble/releases/download/v1.21.0/rumbledb-1.21.0.jar
+    wget https://github.com/RumbleDB/rumble/releases/download/v1.22.0/rumbledb-1.22.0.jar
 
 - Launch the HTTP server on the master node (it will be accessible under `http://localhost:8001/jsoniq`).
 
-    spark-submit rumbledb-1.21.0.jar serve -p 8001
+    spark-submit rumbledb-1.22.0.jar serve -p 8001
 
 - And then use Jupyter notebooks in the same way you would do it locally (it magically works because of the tunneling)
 
 ### With the EC2 hostname
 
 There is also another way that does not need any tunnelling: you can specify the hostname of your EC2 machine (copied over from the EC2 dashboard) with the --host parameter. For example, with the placeholder <ec2-hostname>:
 
-    spark-submit rumbledb-1.21.0.jar serve -p 8001 -h <ec2-hostname>
+    spark-submit rumbledb-1.22.0.jar serve -p 8001 -h <ec2-hostname>
 
 You also need to make sure in your EMR security group that the chosen port (e.g., 8001) is accessible from the machine in which you run your Jupyter notebook. Then, you can point your Jupyter notebook on this machine to `http://<ec2-hostname>:8001/jsoniq`.
 

diff --git a/docs/install.md b/docs/install.md
@@ -64,7 +64,7 @@ After successful completion, you can check the `target` directory, which should
 
 The most straightforward to test if the above steps were successful is to run the RumbleDB shell locally, like so:
 
-    $ spark-submit target/rumbledb-1.21.0.jar repl
+    $ spark-submit target/rumbledb-1.22.0.jar repl
 
 The RumbleDB shell should start:
 
@@ -73,7 +73,7 @@ The RumbleDB shell should start:
         ____                  __    __     ____  ____ 
        / __ \__  ______ ___  / /_  / /__  / __ \/ __ )
       / /_/ / / / / __ `__ \/ __ \/ / _ \/ / / / __  |  The distributed JSONiq engine
-     / _, _/ /_/ / / / / / / /_/ / /  __/ /_/ / /_/ /   1.21.0 "Hawthorn blossom" beta
+     / _, _/ /_/ / / / / / / /_/ / /  __/ /_/ / /_/ /   1.22.0 "Pyrenean oak" beta
     /_/ |_|\__,_/_/ /_/ /_/_.___/_/\___/_____/_____/  
 
     Master: local[2]