From 83cf3d0b8135586912c9d21e5e1f7eced7dcc7e6 Mon Sep 17 00:00:00 2001 From: Esteban Miccio Date: Fri, 22 Mar 2024 15:56:37 -0300 Subject: [PATCH 1/3] Adding hostPath option and updating bytewax version --- README.md | 6 ++++-- charts/bytewax/Chart.yaml | 4 ++-- charts/bytewax/examples.tar | Bin 2617 -> 2597 bytes charts/bytewax/examples/k8s_basic.py | 2 +- charts/bytewax/examples/wikistream.py | 15 ++++++++------- charts/bytewax/templates/job.yaml | 10 ++++++++-- charts/bytewax/templates/pvc.yaml | 2 +- charts/bytewax/templates/statefulset.yaml | 12 ++++++++++++ charts/bytewax/values.yaml | 7 +++++-- 9 files changed, 41 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 3d2bf54..5011bff 100644 --- a/README.md +++ b/README.md @@ -46,8 +46,8 @@ The command removes all the Kubernetes components associated with the chart and | Parameter | Description | Default | |-------------------------------------------|-----------------------------------------------|---------------------------------------------------------| -| `image.repository` | Image repository | `bytewax.docker.scarf.sh/bytewax/bytewax` | -| `image.tag` | Image tag | `0.18.1-python3.9` | +| `image.repository` | Image repository | `bytewax/bytewax` | +| `image.tag` | Image tag | `0.19.0-python3.9` | | `image.pullPolicy` | Image pull policy | `Always` | | `imagePullSecrets` | Image pull secrets | `[]` | | `serviceAccount.create` | Create service account | `true` | @@ -92,6 +92,8 @@ The command removes all the Kubernetes components associated with the chart and | `configuration.recovery.persistence.finalizers` | PersistentVolumeClaim finalizers | `[ "kubernetes.io/pvc-protection" ]` | | `configuration.recovery.persistence.extraPvcLabels` | Extra labels to apply to the PVC | `{}` | | `configuration.recovery.persistence.storageClassName` | Type of persistent volume claim | `nil` | +| `configuration.recovery.persistence.hostPath.enabled` | Use hostPath instead of PersistentVolumeClaim | `false` | +| `configuration.recovery.persistence.hostPath.path` | Absolute path on the host to store recovery files | `` | | `customOtlpUrl` | OTLP Endpoint URL | `` | | `opentelemetry-collector.enabled` | Install OpenTelemetry Collector Helm Chart | `false` | | `jaeger.enabled` | Install Jaeger Helm Chart | `false` | diff --git a/charts/bytewax/Chart.yaml b/charts/bytewax/Chart.yaml index 4a0b308..34df56a 100644 --- a/charts/bytewax/Chart.yaml +++ b/charts/bytewax/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: bytewax description: A Helm chart for Bytewax type: application -version: 0.8.0 -appVersion: "0.18.1" +version: 0.8.1 +appVersion: "0.18.2" icon: https://bytewax.io/assets/img/favicon.png dependencies: - condition: opentelemetry-collector.enabled diff --git a/charts/bytewax/examples.tar b/charts/bytewax/examples.tar index a3e7e4307057fa73d7ccd5762879512241598878..00d054451024930489471c9d30c6e835e4b87c95 100644 GIT binary patch literal 2597 zcmV+=3flD_iwFP!000001MM1XZ{xTzpZzNcZ-JFtS+@La1EgQ}_KLoC54{Bz#Wv8< zGHtV#MM0!u`-1%U&XANW`4K0$jdQovAP6OLI2_J{GegoHp{e4Ei?}`X)JFZn zHBiB$(crZHNBjC8k9(m1?pN928TdcfzwgfY9rGJK-S!%N-S!&;-3}T<-3}Wg-HsZs z&2J4~7tZ;A?-KqGIp2Z*_iF#Y1o*C63v_>I(EFi5=Z6M;9~yLhXll>@HsPHA4_+(& z_iO*Z1VH>3S*qhdv^xGntK&bkI{rhe<3F_ZDe$eqIsflp#Q%qf{y(n${}SN4ik0KVfXPH~GDs82%XHd>9wl+B0|fyyATIh)qCMoF2<(wLae6-^we zQkQHznM7PUZ{4@87EINfyeanQG)wN$Y&CeC_%JLI51umRG@)s_1X0}`6AntyX&#NO zO^Z0x~J6;nJS@a)(u+EEj9>Pi3f<99cX>PfX2-QR4`*1 z2wfBPs;x0kRR(9(Gm6Nb|rJ-;k$U;~TPX zk1g9*wZD9uPO*blfoO^6CROU)S&;24X}(#Kn6uleONPg+208uA2u&H83m(s5$0U=C ze5RXFMK0NjjD$2MQ?ZUCL8pW(qrZhxiM;M~ay@d<$#KxI{*{4<77QqbVzr|26cigH zwUmIAS3IT>E|mfrC06!CfWX%RX2go}n5FU>+4~*AbH4qKcpcIX?`Ao=huv(WwJbH> zS~E=wTb;5b5?fLzSI_v5)nmII(-k9dO0@I!cSFfmEg*+RK^e4soxPyDn*zr|yFF!- zY!2;`v7}8Se#dT9nlb3v0*iq9o`wsu7U>dJTk|LaYXX^qpolg87?8Y9vDz^k{yir4 zWWC@(YC-Q9i9l8o{x2C!{^{d~_x?{FF7|1`wq_MrASM%`7O+gr<${1hP#JGwOfp8Q z=8rW*&;ySz|HS7w_ zi>8)K!;mcuqAD$|6P0u)>dA_xf~C{)qA2;EZ`kyQJOs0;tz2OZko7E!%zJ_0k_lDeBA{{` zhgG@MQg0-&RMIv0j6Y-4CQ4OmJBuw+q8qp=D{U=itw*VQuCa=pCEjBu5v6H2d zt)9)OPQ6O9(+H>=sb3B14t*vdwZ7A0j}K4~wwyr%qftD|Mi@bP0o*NlApqxuV7};@ z=>v%H)^e%24JLEuOh%~~avU_($z?KaoUiw}GH;Xw!%&Z#HTdFQTY`#UKMfxmD; z<2i$xE~oc~`~`|W$R@J{bF8k{8$zNLV)+5v_TgZIDu=D4SIj_$yg)0HVbNGptAiYs zTnv~8OPtCZBn!1Vl7liU-($=+$`CTUB<(+Ize9=K?zU`;OvH97iImvi5)l>CMydz;rz^5*n zSg>SUmjo%3*@^`cZRL=3)u6CqJ-F%F1wwPtbMl_2wEb2IQ{T~GlRtvT3JPwq& z$%BZ~>-PF!&H7LM%;TB3$zzF^snB?rU2;znc(eD0ICi_9-mXi`#^4yVn`>DyV(lEP z=*Dmg4@Mr>MPT~EV|D9-Z>;Ui96`scFV*7_b*XL}+C9wp9vS>^^b@R#IY?U*1tsjS zW5hn&aYne@^E{ja!*=p+7K`LjEZl8DuC%%lDg@3HxO8UCzd_1F=0cJC0_DLa0uExN zvxd|Bz993U>F5FA@7^x zd;1ry$M0D$kpFod$?9>92WD7h)KC$M?(T>=VH#8 z7c(Cqvt{{*+f#E-)t5TdyY^cQ3{2UkRD0-}t2~C;c!DnNg^`eKf6W90Se2vs3|Z02 zqq5tVR*}l?6@PX{5aV%PX1;hcx$a@%T2?R6y~&rUlDVm<@Wk%`^fB3NJsxYya)7e^ zR~}vR=pRV@0Q@Sy|LKl;{r%tnjK}rw|1JW*sr;{Udh5$8paL!%7C-VS9fHG3jk+O! zApWBXv84Z{BbO#V>F H08jt`xqluh literal 2617 zcmV-93dZ#xiwFP!000001MOSukK4Er-_QOl2&X{gR+cS4+Xc2?a=AkvNDjRKL6HVJ zTBdDvWl0d}wS9~I-}@U!*03F z;ldpZhofG9G#>RYx~|(D^)A@(`B}TTSV_fG#x8_ZV#1@7cn^(#qu_KTUnZgOrd;u7 zvW3U#e=v5(J=b-|(0{+*>(=@|2UPX%hY*KIzpx!VUH?O1)cQXM9M^xsQx&Knj@m;< zZPf2vgA_a(4NmKSw6Fj1*oFSPUu1_5!2h}aU3(VX2)EJGZLiVSZND+l?VvH#?XWS@ z?Wpn6{MPVg;UWL;ox}f!y#xR6j%)uv2e`Ic$K?KK$otWd^P?f(M?S@Wd>VD6ng(%B z>=!kCzf!*h(e+9ESr7`D<68{|_#sGXie0>*KJkU!Xf+yBF=HzB)CvciwP{^ylvJoy z8Zpzk^UadPtld^^HauoE#OX)Dcq-UD z4x%}B%vMsck9^~+&}Q3s#e5#IX}pfYm`_=tjQ$o%CGxV<$@Lh-og9Y@>t8E~Xdyt! zkC#gxO`+I`sHF^2UIq~lX{i)woMp0SF$BJjF(Xnuh(szciM>BCiu3I^%;~VUe>2O` z9qJYvt!1h4)|zQn*y>ayVZ3F9a`lY=u;+%%P}=Q?F9kywYUdmL!fO``C0n&X%1?SV zi{@h4%Ml*MKu#5buHBxB$!d=FT8O00!{A0-sdOdKvxU%s`j-0(wvN+lY;PTeA^ZbY z0@bt3og*MQor0lbUin8VRoHqFfYgHD2o^&75`V7+Cja{W-COrphZg&=5L>ef7Kqp+ zRtqc>1#-b)Vq{*gF(!pzRfndk$m@atVI3KAK@_BjYhnx!TrojAOVZdEQX4NuK_OS= zI*pasO*0&8PxZMJQ^RhodC}BzX&ADFi5Cu*9yp9P{_E95kWGt?@L3zqS2Cw|7yghl z*lvT2Vx1eF=015`1ou*OpGwgsCjU))h*u$^!;;UjEo}r8O?0T}Kv<|rO>mI{^EgEJ z4?er*m|Rq>5{FfvO~QDBC!u_Z@TGICuNkUl{5)$s;rCxdcckmWi#NLYBHV9DvXo z&S)OQltH$o6gYeW5h=q_*6*R;GyRTfawgy2;KdYczpq9;S|ei$qggN~H7qAcFXkUL zA~JSOPM0TIOnfdPYn6sswQNRpVp5WoMj%~C{c2Em=(8B9^oGegtJzOC2R?ag?$hB4pWguOrH$)oZQ@Md{y9i{cYQ$1H z#SCJ|@wD;;6^SJ^TEtPwrNBHaZz`{dENXKihq5Z)VhlHyJMiuM9P0#KiG#5Q5 z?|Dj_Zgalw%IsU#lp?)BxUDd4ShxPCehZ>me3hRi&Pw^l+p;D1BtbWO zub7o>*U{Uxnb{b^V79qdCPu0>y(QmR_|J&qvN)4I1(Et}!ztGJ2;E|k;p?-ChDmuU z^P^zK3L-3!d)`4*Fbp{t^z@hr=X=gg3(3KAs|8f%tW4ED&Zjm6|Lh1e@LX0wnl zCsTf@MPnDTKdLZV@laiG<0V&K6u_qlQS)cSS2lsY*(CG~xxCX^^R_ez!nv(ZyK7?A z>eZ@QKVxrSAa59});$`WjtuB3<+*Zyf``8U4)ph5%Kt}$-jMSD-ndu) z{^Kk_w`zq2Jn3tWf<#Jb{|EDWET?{4j{Jzs`|!**tkX?<;m^AR+bvG)3UgLUnVt2Q z3|a?YJ-%_5eiE4PzY{J4|Jg#o1OD%hdZXO`$KA2{|G(c<u6>vMW;Ny+W{31D2UD-b_w?Sj3i<4Z2tPGF38H z6{(*1Re%nf&DQ-VPgxEq>c6_PCO`c>Ngse; List[str]: + def next_batch(self) -> List[str]: return next(self._batcher) def snapshot(self) -> None: @@ -36,7 +36,7 @@ class WikiSource(FixedPartitionedSource[str, None]): def list_parts(self): return ["single-part"] - def build_part(self, _now, _for_key, _resume_state): + def build_part(self, step_id, for_key, _resume_state): return WikiPartition() @@ -62,7 +62,10 @@ def get_server_name(data_dict): # ("server.name", count_per_window) -def keep_max(max_count: Optional[int], new_count: int) -> Tuple[int, int]: +def keep_max( + max_count: Optional[int], new_window_count: Tuple[WindowMetadata, int] +) -> Tuple[Optional[int], int]: + _metadata, new_count = new_window_count if max_count is None: new_max = new_count else: @@ -71,9 +74,7 @@ def keep_max(max_count: Optional[int], new_count: int) -> Tuple[int, int]: return (new_max, new_max) -max_count_per_window = op.stateful_map( - "keep_max", server_counts, lambda: None, keep_max -) +max_count_per_window = op.stateful_map("keep_max", server_counts, keep_max) # ("server.name", max_per_window) diff --git a/charts/bytewax/templates/job.yaml b/charts/bytewax/templates/job.yaml index 9a96430..d27e13b 100644 --- a/charts/bytewax/templates/job.yaml +++ b/charts/bytewax/templates/job.yaml @@ -159,9 +159,9 @@ spec: - name: BYTEWAX_RECOVERY_DIRECTORY value: /var/recovery - name: BYTEWAX_SNAPSHOT_INTERVAL - value: {{ .Values.configuration.recovery.snapshotInterval }} + value: "{{ .Values.configuration.recovery.snapshotInterval }}" - name: BYTEWAX_RECOVERY_BACKUP_INTERVAL - value: {{ .Values.configuration.recovery.backupInterval }} + value: "{{ .Values.configuration.recovery.backupInterval }}" {{- end }} {{- range $key, $value := .Values.env }} - name: "{{ tpl $key $ }}" @@ -236,7 +236,13 @@ spec: {{- end }} {{- if .Values.configuration.recovery.enabled }} - name: recovery +{{- if .Values.configuration.recovery.persistence.hostPath.enabled }} + hostPath: + path: {{ .Values.configuration.recovery.persistence.hostPath.path | quote }} + type: DirectoryOrCreate +{{- else }} persistentVolumeClaim: claimName: {{ .Release.Name }}-recovery {{- end }} +{{- end }} {{- end }} \ No newline at end of file diff --git a/charts/bytewax/templates/pvc.yaml b/charts/bytewax/templates/pvc.yaml index 6108dfa..48ee5a0 100644 --- a/charts/bytewax/templates/pvc.yaml +++ b/charts/bytewax/templates/pvc.yaml @@ -1,4 +1,4 @@ -{{- if .Values.configuration.recovery.enabled}} +{{- if and .Values.configuration.recovery.enabled (not .Values.configuration.recovery.persistence.hostPath.enabled) }} apiVersion: v1 kind: PersistentVolumeClaim metadata: diff --git a/charts/bytewax/templates/statefulset.yaml b/charts/bytewax/templates/statefulset.yaml index 73cee52..e24223f 100644 --- a/charts/bytewax/templates/statefulset.yaml +++ b/charts/bytewax/templates/statefulset.yaml @@ -34,8 +34,10 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} serviceAccountName: {{ include "bytewax.serviceAccountName" . }} +{{- if not .Values.configuration.recovery.persistence.hostPath.enabled }} securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} +{{- end }} {{- if .Values.nodeSelector }} nodeSelector: {{ toYaml .Values.nodeSelector | indent 8 }} @@ -45,6 +47,7 @@ spec: initContainers: {{- if .Values.configuration.recovery.enabled }} - name: init-recovery +{{- if not .Values.configuration.recovery.persistence.hostPath.enabled }} securityContext: allowPrivilegeEscalation: false capabilities: @@ -53,6 +56,7 @@ spec: drop: - ALL readOnlyRootFilesystem: true +{{- end }} env: - name: BYTEWAX_RECOVERY_PARTS value: "{{ .Values.configuration.recovery.partsCount }}" @@ -206,8 +210,10 @@ spec: {{- end }} resources: {{- toYaml .Values.resources | nindent 12 }} +{{- if not .Values.configuration.recovery.persistence.hostPath.enabled }} securityContext: {{- toYaml .Values.securityContext | nindent 12 }} +{{- end }} volumeMounts: {{- if .Values.api.enabled }} - name: api-cache @@ -276,7 +282,13 @@ spec: {{- end }} {{- if .Values.configuration.recovery.enabled }} - name: recovery +{{- if .Values.configuration.recovery.persistence.hostPath.enabled }} + hostPath: + path: {{ .Values.configuration.recovery.persistence.hostPath.path | quote }} + type: DirectoryOrCreate +{{- else }} persistentVolumeClaim: claimName: {{ .Release.Name }}-recovery {{- end }} {{- end }} +{{- end }} diff --git a/charts/bytewax/values.yaml b/charts/bytewax/values.yaml index d55ed9f..5784337 100644 --- a/charts/bytewax/values.yaml +++ b/charts/bytewax/values.yaml @@ -3,10 +3,10 @@ # Declare variables to be passed into your templates. image: - repository: bytewax.docker.scarf.sh/bytewax/bytewax + repository: bytewax/bytewax pullPolicy: Always # Overrides the image tag whose default is the chart appVersion. - tag: "0.18.1-python3.9" + tag: "0.19.0-python3.9" imagePullSecrets: [] @@ -136,6 +136,9 @@ configuration: ## ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ ## persistence: + hostPath: + enabled: false + path: "" # storageClassName: default accessModes: - ReadWriteOnce From bf79af63548cda04f328bd73dcde864a2811ee36 Mon Sep 17 00:00:00 2001 From: Esteban Miccio Date: Fri, 22 Mar 2024 15:59:16 -0300 Subject: [PATCH 2/3] Changing app version to match bytewax version --- charts/bytewax/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/bytewax/Chart.yaml b/charts/bytewax/Chart.yaml index 34df56a..a8e7612 100644 --- a/charts/bytewax/Chart.yaml +++ b/charts/bytewax/Chart.yaml @@ -3,7 +3,7 @@ name: bytewax description: A Helm chart for Bytewax type: application version: 0.8.1 -appVersion: "0.18.2" +appVersion: "0.19.0" icon: https://bytewax.io/assets/img/favicon.png dependencies: - condition: opentelemetry-collector.enabled From 47710eb20c3e1bd096227b237ff2e7c4b59a8444 Mon Sep 17 00:00:00 2001 From: Esteban Miccio Date: Fri, 22 Mar 2024 16:06:42 -0300 Subject: [PATCH 3/3] Adding helm option to k8s_cluster.py example --- charts/bytewax/examples/k8s_cluster.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/charts/bytewax/examples/k8s_cluster.py b/charts/bytewax/examples/k8s_cluster.py index fbf8234..0ebb085 100644 --- a/charts/bytewax/examples/k8s_cluster.py +++ b/charts/bytewax/examples/k8s_cluster.py @@ -39,8 +39,16 @@ def to_tuple(x): # flow.output("out", DirOutput(output_dir, 5, assign_file=int)) -# We are going to use Waxctl, you can download it from https://bytewax.io/downloads -# Run these commands in your terminal to run a cluster of two containers: +# To run this example using helm you need to run the following +# helm upgrade --install k8s-cluster \ +# bytewax/bytewax \ +# --set configuration.pythonFileName=k8s_cluster.py \ +# --set configuration.processesCount=2 \ +# --set configuration.configMap.files.tarName=examples.tar \ +# --set configuration.keepAlive=true + +# Also, you could use Waxctl to run the example. For that you can download it from https://bytewax.io/downloads +# And then, run these commands in your terminal to run a cluster of two containers: # $ tar -C ./ -cvf cluster.tar examples # $ waxctl dataflow deploy ./cluster.tar \ @@ -48,6 +56,8 @@ def to_tuple(x): # --python-file-name examples/k8s_cluster.py \ # -p2 --debug --keep-alive=true --yes +# Regardless of how you have executed the example (helm CLI or waxctl): + # Each worker will read the files in # ./examples/sample_data/cluster/*.txt which have lines like # `ONE1`.