#cd /root/
#cp spark/scripts/config/config-template.yml spark/scripts/config/config.yml
Set the correct internal SUP password and well as the supervisor IP if the script is entented to run on a remote location
sup:
url: "https://127.0.0.1:2443"
login: "root"
password: ""
#mkdir -p /fs/spark/
#/root/spark_env/bin/python /root/spark/scripts/listkey.py DATA
#ls /fs/spark/listkeys-DATA.csv/*
#/root/spark_env/bin/python /root/spark/scripts/count-flag.py DATA
Output:
+---+----------+
|_c3|count(_c1)|
+---+----------+
| 16| 4403504|
| 48| 252|
| 32| 177258|
| 0| 253136044|
+---+----------+
#/root/spark_env/bin/python /root/spark/scripts/count-flag-uniq.py DATA
Output:
+---+-------------------+
|_c3|count(DISTINCT _c1)|
+---+-------------------+
| 16| 1046962|
| 48| 84|
| 32| 59085|
| 0| 61031785|
+---+-------------------+