diff --git a/.circleci/config.yml b/.circleci/config.yml index 1aa7bfa1..7c902217 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -44,7 +44,7 @@ jobs: name: Run golang linter command: | hack/go-generate.sh - golangci-lint run --disable golint,funlen,godox,whitespace,stylecheck --build-tags fuse_cli --max-same-issues 0 --verbose + golangci-lint run --verbose - run: name: Lint shell scripts command: | diff --git a/.golangci.yml b/.golangci.yml index 59bb8d1b..5f0a8dee 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,3 +1,6 @@ +run: + build-tags: fuse_cli + linters-settings: govet: check-shadowing: true @@ -23,3 +26,11 @@ linters: - lll - gochecknoinits - gochecknoglobals + - golint + - godox + - whitespace + - funlen + - stylecheck + - unparam + +max-same-issues: 0 diff --git a/Makefile b/Makefile index 872520e1..c7dcb897 100644 --- a/Makefile +++ b/Makefile @@ -48,9 +48,9 @@ help: } \ { lastLine = $$0 }' $(MAKEFILE_LIST) -.PHONY: build-and-push-fuse-sidecar -## build sidecar container used in Argo workflows -build-and-push-fuse-sidecar: build-datamon-binaries +.PHONY: build-and-push-fuse-sidecar-img +## build FUSE sidecar container used in Argo workflows +build-and-push-fuse-sidecar-img: @echo 'building fuse sidecar container' docker build \ --progress plain \ @@ -64,6 +64,27 @@ build-and-push-fuse-sidecar: build-datamon-binaries . docker push gcr.io/onec-co/datamon-fuse-sidecar +.PHONY: build-and-push-fuse-sidecar +## build FUSE sidecar container used in Argo workflows +build-and-push-fuse-sidecar: build-datamon-binaries build-and-push-fuse-sidecar-img + +.PHONY: build-and-push-pg-sidecar-img +## build postgres sidecar container used in Argo workflows +build-and-push-pg-sidecar-img: + @echo 'building pg sidecar container' + docker build \ + --progress plain \ + -t gcr.io/onec-co/datamon-pg-sidecar \ + -t gcr.io/onec-co/datamon-pg-sidecar:${GITHUB_USER}-$$(date '+%Y%m%d') \ + -t gcr.io/onec-co/datamon-pg-sidecar:$(subst /,_,$(GIT_BRANCH)) \ + --ssh default \ + -f sidecar-pg.Dockerfile \ + . + docker push gcr.io/onec-co/datamon-pg-sidecar + +.PHONY: build-and-push-pg-sidecar +## build postgres sidecar container used in Argo workflows +build-and-push-pg-sidecar: build-datamon-binaries build-and-push-pg-sidecar-img .PHONY: build-and-push-datamover ## build sidecar container used in Argo workflows @@ -259,7 +280,7 @@ fuse-demo-ro: fuse-demo-build-shell fuse-demo-build-sidecar .PHONY: fuse-demo-coord-build-app ## build shell container used in fuse demo fuse-demo-coord-build-app: - @echo 'building fuse demo container' + @echo 'building fuse demo application container' docker build \ --progress plain \ -t gcr.io/onec-co/datamon-fuse-demo-coord-app \ @@ -271,7 +292,7 @@ fuse-demo-coord-build-app: .PHONY: fuse-demo-coord-build-datamon ## build shell container used in fuse demo fuse-demo-coord-build-datamon: - @echo 'building fuse demo container' + @echo 'building fuse demo sidecar container' docker build \ --progress plain \ -t gcr.io/onec-co/datamon-fuse-demo-coord-datamon \ @@ -280,6 +301,18 @@ fuse-demo-coord-build-datamon: . docker push gcr.io/onec-co/datamon-fuse-demo-coord-datamon +.PHONY: pg-demo-coord-build-app +## build shell container used in fuse demo +pg-demo-coord-build-app: + @echo 'building pg demo application container' + docker build \ + --progress plain \ + -t gcr.io/onec-co/datamon-pg-demo-coord-app \ + --ssh default \ + -f ./hack/fuse-demo/coord-app-pg.Dockerfile \ + . + docker push gcr.io/onec-co/datamon-pg-demo-coord-app + .PHONY: profile-metrics ## Build the metrics collection binary and write output profile-metrics: diff --git a/README.md b/README.md index c535e2fd..be483d24 100644 --- a/README.md +++ b/README.md @@ -119,28 +119,71 @@ multiple labels can refer to the same bundle via its commit hash. Current use of Datamon at One Concern with respect to intra-Argo workflow usage relies on the [kubernetes sidecar](https://kubernetes.io/docs/tasks/access-application-cluster/communicate-containers-same-pod-shared-volume/) -pattern where a shared volume is used as the transport layer for application layer +pattern wherein a shared volume (transport layer) ramifies application layer communication to coordinate between the _main container_, where a data-science program accesses data provided by Datamon and produces data for Datamon to upload, and the -_sidecar container_, where Datamon provides data for access (via streaming through -main memory directly from GCS) and then, after the main container is done outputting -data to a shared Kubernetes volume, uploads the results of the data-science program -to GCS. Ensuring that, for example, the streaming data is ready for access (sidecar to -main-container messaging) as well as notification that the data-science program has -produced output data to upload (main-container to sidecar messaging), is the responsibility -of a couple of shell scripts that both ship inside the `gcr.io/onec-co/datamon-fuse-sidecar` -container, which is versioned along with +_sidecar container_, where Datamon provides data for access (as hierarchical filesystems, +as SQL databases, etc.). +After the main container's DAG-node-specific data-science program outputs data +(to shared Kubernetes volume, to a PostgreSQL instance in the sidecar, and so on), +the sidecar container uploads the results of the data-science program to GCS. + +Ensuring that data is ready for access (sidecar to main-container messaging) +as well as notification that the data-science program has +produced output data to upload (main-container to sidecar messaging), +is the responsibility of a few shell scripts shipped as part and parcel of the +Docker images that practicably constitute sidecars. +While there's precisely one application container per Argo node, +a Kubernetes container created from an arbitrary image, +sidecars are additional containers in the same Kubernetes pod +-- or Argo DAG node, we can say, approximately synonymously -- +that concert datamon-based data-ferrying setups with the application container. + +_Aside_: as additional kinds of data sources and sinks are added, +we may also refer to "sidecars" as "batteries," and so on as semantic drift +of the shell scripts shears away feature creep in the application binary. + +There are currently two batteries-included® images + +* `gcr.io/onec-co/datamon-fuse-sidecar` + provides hierarchical filesystem access +* `gcr.io/onec-co/datamon-pg-sidecar` + provides PostgreSQL database access + +Both are versioned along with [github releases](https://github.com/oneconcern/datamon/releases/) -of the desktop binary: to access release `0.4` as listed on the github releases page, -use the tag `v0.4` as in `gcr.io/onec-co/datamon-fuse-sidecar:v0.4` when -writing Dockerfiles or Kubernetes-like YAML that accesses the sidecar container image. +of the +[desktop binary](#os-x-install-guide). +to access recent releases listed on the github releases page, +use the git tag as the Docker image tag: +At time of writing, +[v0.7](https://github.com/oneconcern/datamon/releases/tag/v0.7) +is the latest release tag, and (with some elisions) +```yaml +spec: + ... + containers: + - name: datamon-sidecar + - image: gcr.io/onec-co/datamon-fuse-sidecar:v0.7 + ... +``` +would be the corresponding Kubernetes YAML to access the sidecar container image. + +_Aside_: historically, and in case it's necessary to roll back to an now-ancient +version of the sidecar image, releases were tagged in git without the `v` prefix, +and Docker tags prepended `v` to the git tag. +For instance, `0.4` is listed on the github releases page, while +the tag `v0.4` as in `gcr.io/onec-co/datamon-fuse-sidecar:v0.4` was used when writing +Dockerfiles or Kubernetes-like YAML to accesses the sidecar container image. Users need only place the `wrap_application.sh` script located in the root directory -of the sidecar container within the main container. This can be accomplished via -an `initContainer` without duplicating version of the Datamon sidecar image in -both the main application Dockerfile as well as the YAML. When using a block-storage GCS -product, we might've specified a data-science application's Argo DAG node with something -like +of each of the sidecar containers within the main container. +This +[can be accomplished](https://github.com/oneconcern/datamon/blob/master/hack/k8s/example-coord.template.yaml#L15-L24) +via an `initContainer` without duplicating version of the Datamon sidecar +image in both the main application Dockerfile as well as the YAML. +When using a block-storage GCS product, we might've specified a data-science application's +Argo DAG node with something like ```yaml command: ["app"] @@ -151,35 +194,49 @@ whereas with `wrap_application.sh` in place, this would be something to the effe ```yaml command: ["/path/to/wrap_application.sh"] -args: ["-c", "/path/to/coordination_directory", "--", "app", "param1", "param2"] +args: ["-c", "/path/to/coordination_directory", "-b", "fuse", "--", "app", "param1", "param2"] ``` That is, `wrap_application.sh` has the following usage ```shell -wrap_application.sh -c -- +wrap_application.sh -c -b -- ``` -where `` is an empty directory in a shared volume -(an -[`emptyDir`](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir) -using memory-backed storage suffices). In the case of Argo workflows in particular, -the empty directory (and not necessarily the volume) ought to be specific to a -particular DAG node (i.e. Kubernetes pod). Each node uses a unique directory. -Meanwhile, `` is the data-science application command exactly as it -would appear without the wrapper script. -That is, the wrapper script, relies the -[conventional UNIX syntax](http://zsh.sourceforge.net/Guide/zshguide02.html#l11) -for stating that options to a command are done being declared. - -Meanwhile, `wrap_datamon.sh` similarly accepts a single `-c` option to specify the -location of the coordination directory. -Additionally, `wrap_datamon.sh` accepts a `-d` option. The parameters to this option are -among the standard Datamon CLI commands: - -* `config` -* `bundle mount` -* `bundle upload` +where +* `` is an empty directory in a shared volume + (an + [`emptyDir`](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir) + using memory-backed storage suffices). each coordination directory (not necessarily the volume) + corresponds to a particular DAG node (i.e. Kubernetes pod) and vice-versa. +* `` is in correspondence with the containers specified in the YAML + and may be among + - `fuse` + - `postgres` +* `` is the data-science application command exactly as it + would appear without the wrapper script. That is, the wrapper script, relies the + [conventional UNIX syntax](http://zsh.sourceforge.net/Guide/zshguide02.html#l11) + for stating that options to a command are done being declared. + +Meanwhile, each sidecar's datamon-specific batteries have their corresponding usages. + +##### `gcr.io/onec-co/datamon-fuse-sidecar` -- `wrap_datamon.sh` + +Provides filesystem representations (i.e. a folder) of [datamon bundles](#data-modeling). +Since bundles' filelists are serialized filesystem representations, +the `wrap_datamon.sh` interface is tightly coupled to that of the self-documenting +`datamon` binary itself. + +```shell +./wrap_datamon.sh -c -d -d ... +``` + +* `-c` the same coordination directory passed to `wrap_application.sh` +* `-d` all parameters, exactly as passed to the datamon binary, except as a + single scalar (quoted) parameter, for one of the following commands + - `config` sets user information associated with any bundles created by the node + - `bundle mount` provides sources for data-science applications + - `bundle upload` provides sinks for data-science applications Multiple (or none) `bundle mount` and `bundle upload` commands may be specified, and at most one `config` command is allowed so that an example `wrap_datamon.sh` @@ -187,17 +244,87 @@ YAML might be ```yaml command: ["./wrap_datamon.sh"] -args: ["-c", "/tmp/coord", "-d", "config create --name \"Coord\" --email coord-bot@oneconcern.com", "-d", "bundle upload --path /tmp/upload --message \"result of container coordination demo\" --repo ransom-datamon-test-repo --label coordemo", "-d", "bundle mount --repo ransom-datamon-test-repo --label testlabel --destination /tmp --mount /tmp/mount --stream"] +args: ["-c", "/tmp/coord", "-d", "config create --name \"Coord\" --email coord-bot@oneconcern.com", "-d", "bundle upload --path /tmp/upload --message \"result of container coordination demo\" --repo ransom-datamon-test-repo --label coordemo", "-d", "bundle mount --repo ransom-datamon-test-repo --label testlabel --mount /tmp/mount --stream"] ``` or from the shell ```shell -./wrap_datamon.sh -c /tmp/coord -d 'config create --name "Coord" --email coord-bot@oneconcern.com' -d 'bundle upload --path /tmp/upload --message "result of container coordination demo" --repo ransom-datamon-test-repo --label coordemo' -d 'bundle mount --repo ransom-datamon-test-repo --label testlabel --destination /tmp --mount /tmp/mount --stream' +./wrap_datamon.sh -c /tmp/coord -d 'config create --name "Coord" --email coord-bot@oneconcern.com' -d 'bundle upload --path /tmp/upload --message "result of container coordination demo" --repo ransom-datamon-test-repo --label coordemo' -d 'bundle mount --repo ransom-datamon-test-repo --label testlabel --mount /tmp/mount --stream' +``` + +##### `gcr.io/onec-co/datamon-pg-sidecar` -- `wrap_datamon_pg.sh` + +Provides Postgres databases as bundles and vice versa. +Since the datamon binary does not include any Postgres-specific notions, +the UI here is more decoupled than that of `wrap_datamon.sh`. +The UI is specified via environment variables +such that `wrap_datamon.sh` is invoked without parameters. + +The script looks for precisely one `dm_pg_opts` environment variable +specifying global options for the entire script and any number of +`dm_pg_db_` variables, one per database. + +---- + +_Aside on serialization format_ + +Each of these environment variables each contain a serialized dictionary +according the the following format + +``` +... ``` -where, in particular, the `-d` (Datamon) options passed to the shell wrapper are -scalars. +where `` and `` are each a single +character, anything other than a `.`, and each `` is of one of +two forms, either `