From 1bfaa23fbf77676e171e9a78504c32d0144418bd Mon Sep 17 00:00:00 2001 From: Rafid K Date: Fri, 19 Apr 2024 17:00:48 +0000 Subject: [PATCH] Addressing feedback on PR #41 + other minor changes * Checked for major version in verify_python_version * More documentation in `generate_base_dockerfile` * Bumped version to 2.9.0 * Support passing SSL mode for Postgres connection. * Downgraded to Python 3.11.9 since we don't want to go to Python 3.12 before sufficient adoption. * Remove version pinning for Amazon providers since this is covered by the Airflow constraints file. * Update the `requirements.txt` used for development. Removed all but the requirements we want, and left the rest for pip to intsall automatically. This makes updating the file easier. * `db_lock` method: renamed `timeout` to `timeout_ms` for clarity. * Check for both `pip install` and `pip3 install` in `pip_install_check.py`. --- .github/workflows/quality-checks.yaml | 12 +- create_venvs.py | 7 +- images/airflow/2.8.0/Dockerfile.base.j2 | 11 +- .../airflow/2.8.0/Dockerfile.derivatives.j2 | 2 +- images/airflow/2.8.0/Dockerfiles/Dockerfile | 4 +- .../airflow/2.8.0/Dockerfiles/Dockerfile-dev | 4 +- .../2.8.0/Dockerfiles/Dockerfile-explorer | 4 +- .../2.8.0/Dockerfiles/Dockerfile-explorer-dev | 4 +- .../Dockerfile-explorer-privileged | 4 +- .../Dockerfile-explorer-privileged-dev | 4 +- .../airflow/2.8.0/Dockerfiles/Dockerfile.base | 13 +- .../2.8.0/bin/airflow-user/safe-pip-install | 2 +- images/airflow/2.8.0/build.sh | 4 +- images/airflow/2.8.0/docker-compose.yaml | 3 +- images/airflow/2.8.0/explore-image.sh | 2 +- images/airflow/2.8.0/install_pip_packages.sh | 5 +- .../2.8.0/python/mwaa/config/airflow.py | 2 +- .../2.8.0/python/mwaa/config/database.py | 7 +- .../airflow/2.8.0/python/mwaa/config/sqs.py | 4 +- .../airflow/2.8.0/python/mwaa/entrypoint.py | 12 +- images/airflow/2.8.0/requirements.txt | 214 ++---------------- images/airflow/2.8.0/run.sh.template | 2 +- images/airflow/generate-dockerfiles.py | 12 +- quality-checks/pip_install_check.py | 2 +- 24 files changed, 89 insertions(+), 251 deletions(-) diff --git a/.github/workflows/quality-checks.yaml b/.github/workflows/quality-checks.yaml index 35b721c..9e00815 100644 --- a/.github/workflows/quality-checks.yaml +++ b/.github/workflows/quality-checks.yaml @@ -17,8 +17,8 @@ jobs: # gcc, libcurl-devel: For compiling pycurl (required for our Airflow setup.) # gzip: Requiring by actions/checkout@v2 to gunzip the source code. # postgresql-devel: Required for our Airflow setup. - # python3-devel: Required for building some Python modules, e.g. pycurl. - # python3: Self explanatory. + # python3.11-devel: Required for building some Python modules, e.g. pycurl. + # python3.11: Self explanatory. # tar, wget, xz: For downloading and extracting ShellCheck dnf update -y dnf install -y \ @@ -26,8 +26,8 @@ jobs: gzip \ libcurl-devel \ postgresql-devel \ - python3 \ - python3-devel \ + python3.11 \ + python3.11-devel \ tar \ wget \ xz @@ -40,7 +40,7 @@ jobs: uses: actions/checkout@v2 - name: Create the necessary Python virtual environments... - run: python3 ./create_venvs.py + run: python3.11 ./create_venvs.py - name: Run quality checks... - run: python3 ./quality-checks/run_all.py + run: python3.11 ./quality-checks/run_all.py diff --git a/create_venvs.py b/create_venvs.py index f128151..07d8008 100644 --- a/create_venvs.py +++ b/create_venvs.py @@ -21,9 +21,10 @@ def verify_python_version(): """Check if the current Python version is at least 3.9.""" - _major, minor, *_ = sys.version_info - if minor < 9: - print("Python 3.9 or higher is required.") + major, minor, *_ = sys.version_info + + if major != 3 or minor < 11: + print("Python 3.11 or higher is required.") sys.exit(1) diff --git a/images/airflow/2.8.0/Dockerfile.base.j2 b/images/airflow/2.8.0/Dockerfile.base.j2 index 0cbd167..f053fe9 100644 --- a/images/airflow/2.8.0/Dockerfile.base.j2 +++ b/images/airflow/2.8.0/Dockerfile.base.j2 @@ -2,23 +2,20 @@ FROM public.ecr.aws/amazonlinux/amazonlinux:2023 # Environment variables -# Temporarily downgrading to 2.7.2 to make it easier to test the Docker image -# within Amazon MWAA since 2.7.2 is a version we support. -ENV AIRFLOW_VERSION=2.7.2 -ENV AIRFLOW_AMAZON_PROVIDERS_VERSION=8.7.1 +ENV AIRFLOW_VERSION=2.9.0 -ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.7.2/constraints-3.11.txt" +ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.9.0/constraints-3.11.txt" ENV AIRFLOW_USER_HOME=/usr/local/airflow ENV AIRFLOW_HOME=${AIRFLOW_USER_HOME} ENV MWAA_HOME=/usr/local/mwaa -ENV PYTHON_VERSION=3.11.7 +ENV PYTHON_VERSION=3.11.9 # We don't want those variables to stay in the final image, so we use ARG instead of ENV. ARG AIRFLOW_USER_LOCAL_PATH=${AIRFLOW_USER_HOME}/.local ARG AIRFLOW_USER_LOCAL_BIN_PATH=${AIRFLOW_USER_LOCAL_PATH}/bin ARG PATH_DEFAULT=${PATH} ARG PATH_AIRFLOW_USER=${AIRFLOW_USER_LOCAL_BIN_PATH}:${PATH_DEFAULT} -ARG PYTHON_MD5_CHECKSUM=d96c7e134c35a8c46236f8a0e566b69c +ARG PYTHON_MD5_CHECKSUM=22ea467e7d915477152e99d5da856ddc ARG MARIADB_DOWNLOAD_BASE_URL=https://mirror.mariadb.org/yum/11.1/fedora38-amd64/rpms ARG MARIADB_RPM_COMMON=MariaDB-common-11.1.2-1.fc38.x86_64.rpm ARG MARIADB_RPM_COMMON_CHECKSUM=e87371d558efa97724f3728fb214cf19 diff --git a/images/airflow/2.8.0/Dockerfile.derivatives.j2 b/images/airflow/2.8.0/Dockerfile.derivatives.j2 index b499484..5999882 100644 --- a/images/airflow/2.8.0/Dockerfile.derivatives.j2 +++ b/images/airflow/2.8.0/Dockerfile.derivatives.j2 @@ -1,4 +1,4 @@ -FROM amazon-mwaa/airflow:2.8.0-base +FROM amazon-mwaa/airflow:2.9.0-base {% if bootstrapping_scripts_dev %} diff --git a/images/airflow/2.8.0/Dockerfiles/Dockerfile b/images/airflow/2.8.0/Dockerfiles/Dockerfile index ee70bfa..ac1909d 100644 --- a/images/airflow/2.8.0/Dockerfiles/Dockerfile +++ b/images/airflow/2.8.0/Dockerfiles/Dockerfile @@ -3,10 +3,10 @@ # the Jinja2-templated Dockerfile.j2 file, so you need to change that file # instead. # -# This file was generated on 2024-02-22 19:42:44.935774 +# This file was generated on 2024-04-19 02:30:48.359587 # -FROM amazon-mwaa/airflow:2.8.0-base +FROM amazon-mwaa/airflow:2.9.0-base USER airflow diff --git a/images/airflow/2.8.0/Dockerfiles/Dockerfile-dev b/images/airflow/2.8.0/Dockerfiles/Dockerfile-dev index 0ae295d..09a91d6 100644 --- a/images/airflow/2.8.0/Dockerfiles/Dockerfile-dev +++ b/images/airflow/2.8.0/Dockerfiles/Dockerfile-dev @@ -3,10 +3,10 @@ # the Jinja2-templated Dockerfile.j2 file, so you need to change that file # instead. # -# This file was generated on 2024-02-22 19:42:44.927521 +# This file was generated on 2024-04-19 02:30:48.351336 # -FROM amazon-mwaa/airflow:2.8.0-base +FROM amazon-mwaa/airflow:2.9.0-base # Copy bootstrapping files. COPY ./bootstrap-dev /bootstrap-dev diff --git a/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer b/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer index df2a928..3e65a89 100644 --- a/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer +++ b/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer @@ -3,10 +3,10 @@ # the Jinja2-templated Dockerfile.j2 file, so you need to change that file # instead. # -# This file was generated on 2024-02-22 19:42:44.938417 +# This file was generated on 2024-04-19 02:30:48.362267 # -FROM amazon-mwaa/airflow:2.8.0-base +FROM amazon-mwaa/airflow:2.9.0-base USER airflow diff --git a/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-dev b/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-dev index a229408..31f0c48 100644 --- a/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-dev +++ b/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-dev @@ -3,10 +3,10 @@ # the Jinja2-templated Dockerfile.j2 file, so you need to change that file # instead. # -# This file was generated on 2024-02-22 19:42:44.930305 +# This file was generated on 2024-04-19 02:30:48.354110 # -FROM amazon-mwaa/airflow:2.8.0-base +FROM amazon-mwaa/airflow:2.9.0-base # Copy bootstrapping files. COPY ./bootstrap-dev /bootstrap-dev diff --git a/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-privileged b/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-privileged index dfb4196..bd75226 100644 --- a/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-privileged +++ b/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-privileged @@ -3,10 +3,10 @@ # the Jinja2-templated Dockerfile.j2 file, so you need to change that file # instead. # -# This file was generated on 2024-02-22 19:42:44.941098 +# This file was generated on 2024-04-19 02:30:48.364902 # -FROM amazon-mwaa/airflow:2.8.0-base +FROM amazon-mwaa/airflow:2.9.0-base USER root diff --git a/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-privileged-dev b/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-privileged-dev index c4676f4..2fadd2d 100644 --- a/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-privileged-dev +++ b/images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-privileged-dev @@ -3,10 +3,10 @@ # the Jinja2-templated Dockerfile.j2 file, so you need to change that file # instead. # -# This file was generated on 2024-02-22 19:42:44.933100 +# This file was generated on 2024-04-19 02:30:48.356865 # -FROM amazon-mwaa/airflow:2.8.0-base +FROM amazon-mwaa/airflow:2.9.0-base # Copy bootstrapping files. COPY ./bootstrap-dev /bootstrap-dev diff --git a/images/airflow/2.8.0/Dockerfiles/Dockerfile.base b/images/airflow/2.8.0/Dockerfiles/Dockerfile.base index 095bd05..6bbe400 100644 --- a/images/airflow/2.8.0/Dockerfiles/Dockerfile.base +++ b/images/airflow/2.8.0/Dockerfiles/Dockerfile.base @@ -3,30 +3,27 @@ # the Jinja2-templated Dockerfile.j2 file, so you need to change that file # instead. # -# This file was generated on 2024-02-22 19:42:44.924226 +# This file was generated on 2024-04-19 02:30:48.348008 # FROM public.ecr.aws/amazonlinux/amazonlinux:2023 # Environment variables -# Temporarily downgrading to 2.7.2 to make it easier to test the Docker image -# within Amazon MWAA since 2.7.2 is a version we support. -ENV AIRFLOW_VERSION=2.7.2 -ENV AIRFLOW_AMAZON_PROVIDERS_VERSION=8.7.1 +ENV AIRFLOW_VERSION=2.9.0 -ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.7.2/constraints-3.11.txt" +ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.9.0/constraints-3.11.txt" ENV AIRFLOW_USER_HOME=/usr/local/airflow ENV AIRFLOW_HOME=${AIRFLOW_USER_HOME} ENV MWAA_HOME=/usr/local/mwaa -ENV PYTHON_VERSION=3.11.7 +ENV PYTHON_VERSION=3.11.9 # We don't want those variables to stay in the final image, so we use ARG instead of ENV. ARG AIRFLOW_USER_LOCAL_PATH=${AIRFLOW_USER_HOME}/.local ARG AIRFLOW_USER_LOCAL_BIN_PATH=${AIRFLOW_USER_LOCAL_PATH}/bin ARG PATH_DEFAULT=${PATH} ARG PATH_AIRFLOW_USER=${AIRFLOW_USER_LOCAL_BIN_PATH}:${PATH_DEFAULT} -ARG PYTHON_MD5_CHECKSUM=d96c7e134c35a8c46236f8a0e566b69c +ARG PYTHON_MD5_CHECKSUM=22ea467e7d915477152e99d5da856ddc ARG MARIADB_DOWNLOAD_BASE_URL=https://mirror.mariadb.org/yum/11.1/fedora38-amd64/rpms ARG MARIADB_RPM_COMMON=MariaDB-common-11.1.2-1.fc38.x86_64.rpm ARG MARIADB_RPM_COMMON_CHECKSUM=e87371d558efa97724f3728fb214cf19 diff --git a/images/airflow/2.8.0/bin/airflow-user/safe-pip-install b/images/airflow/2.8.0/bin/airflow-user/safe-pip-install index 7fb324c..8adf703 100644 --- a/images/airflow/2.8.0/bin/airflow-user/safe-pip-install +++ b/images/airflow/2.8.0/bin/airflow-user/safe-pip-install @@ -2,7 +2,7 @@ # Define an array of required packages REQUIRED_PACKAGES=( - "apache-airflow-providers-amazon[aiobotocore]==${AIRFLOW_AMAZON_PROVIDERS_VERSION}" + "apache-airflow-providers-amazon[aiobotocore]" "apache-airflow[celery,statsd]==${AIRFLOW_VERSION}" "celery[sqs]" psycopg2 diff --git a/images/airflow/2.8.0/build.sh b/images/airflow/2.8.0/build.sh index 3e7a8cb..8625377 100755 --- a/images/airflow/2.8.0/build.sh +++ b/images/airflow/2.8.0/build.sh @@ -8,13 +8,13 @@ python3 ../generate-dockerfiles.py deactivate # Build the base image. -docker build -f ./Dockerfiles/Dockerfile.base -t amazon-mwaa/airflow:2.8.0-base ./ +docker build -f ./Dockerfiles/Dockerfile.base -t amazon-mwaa/airflow:2.9.0-base ./ # Build the derivatives. for dev in "True" "False"; do for build_type in "standard" "explorer" "explorer-privileged"; do dockerfile_name="Dockerfile" - tag_name="amazon-mwaa/airflow:2.8.0" + tag_name="amazon-mwaa/airflow:2.9.0" if [[ "$build_type" != "standard" ]]; then dockerfile_name="${dockerfile_name}-${build_type}" diff --git a/images/airflow/2.8.0/docker-compose.yaml b/images/airflow/2.8.0/docker-compose.yaml index 4f0b912..d411111 100644 --- a/images/airflow/2.8.0/docker-compose.yaml +++ b/images/airflow/2.8.0/docker-compose.yaml @@ -1,7 +1,7 @@ version: "3.8" x-airflow-common: &airflow-common - image: amazon-mwaa/airflow:2.8.0 + image: amazon-mwaa/airflow:2.9.0 restart: always environment: # AWS credentials @@ -20,6 +20,7 @@ x-airflow-common: &airflow-common MWAA__DB__POSTGRES_USER: "airflow" MWAA__DB__POSTGRES_PASSWORD: "airflow" MWAA__DB__POSTGRES_DB: "airflow" + MWAA__DB__POSTGRES_SSLMODE: "prefer" # SQS configuration MWAA__SQS__CREATE_QUEUE: True diff --git a/images/airflow/2.8.0/explore-image.sh b/images/airflow/2.8.0/explore-image.sh index 6e316d3..5792fd3 100755 --- a/images/airflow/2.8.0/explore-image.sh +++ b/images/airflow/2.8.0/explore-image.sh @@ -1,2 +1,2 @@ #!/bin/bash -docker container run -it amazon-mwaa/airflow:2.8.0-explorer-dev +docker container run -it amazon-mwaa/airflow:2.9.0-explorer-dev diff --git a/images/airflow/2.8.0/install_pip_packages.sh b/images/airflow/2.8.0/install_pip_packages.sh index ec8dd18..2a28375 100755 --- a/images/airflow/2.8.0/install_pip_packages.sh +++ b/images/airflow/2.8.0/install_pip_packages.sh @@ -1,7 +1,6 @@ #!/bin/bash -AIRFLOW_AMAZON_PROVIDERS_VERSION=8.13.0 -AIRFLOW_VERSION=2.8.0 +AIRFLOW_VERSION=2.9.1 PYTHON_MAJOR_MINOR_VERSION=3.11 CONSTRAINT_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt" @@ -12,5 +11,5 @@ pip3 install --constraint "${CONSTRAINT_FILE}" \ psycopg2 \ "celery[sqs]" \ "apache-airflow[celery,statsd]==${AIRFLOW_VERSION}" \ - "apache-airflow-providers-amazon[aiobotocore]==${AIRFLOW_AMAZON_PROVIDERS_VERSION}" \ + "apache-airflow-providers-amazon[aiobotocore]" \ watchtower \ No newline at end of file diff --git a/images/airflow/2.8.0/python/mwaa/config/airflow.py b/images/airflow/2.8.0/python/mwaa/config/airflow.py index 3962ebd..2ec0318 100644 --- a/images/airflow/2.8.0/python/mwaa/config/airflow.py +++ b/images/airflow/2.8.0/python/mwaa/config/airflow.py @@ -34,8 +34,8 @@ def get_airflow_celery_config() -> Dict[str, str]: "AIRFLOW__CELERY__CELERY_CONFIG_OPTIONS": celery_config_module_path, "AIRFLOW__CELERY__RESULT_BACKEND": f"db+{get_db_connection_string()}", "AIRFLOW__CELERY__WORKER_ENABLE_REMOTE_CONTROL": "False", - "AIRFLOW__CORE__EXECUTOR": "CeleryExecutor", # Not a Celery config per-se, but is used by the Celery executor. + "AIRFLOW__CORE__EXECUTOR": "CeleryExecutor", "AIRFLOW__OPERATORS__DEFAULT_QUEUE": get_sqs_queue_name(), } diff --git a/images/airflow/2.8.0/python/mwaa/config/database.py b/images/airflow/2.8.0/python/mwaa/config/database.py index adad460..76bdb27 100644 --- a/images/airflow/2.8.0/python/mwaa/config/database.py +++ b/images/airflow/2.8.0/python/mwaa/config/database.py @@ -67,12 +67,14 @@ def get_db_connection_string() -> str: "MWAA__DB__POSTGRES_HOST", "MWAA__DB__POSTGRES_PORT", "MWAA__DB__POSTGRES_DB", + "MWAA__DB__POSTGRES_SSLMODE", ] try: ( postgres_host, postgres_port, postgres_db, + postgres_sslmode, ) = itemgetter(*env_vars_names)(os.environ) (postgres_user, postgres_password) = get_db_credentials() except Exception as e: @@ -84,8 +86,11 @@ def get_db_connection_string() -> str: f"following exception: {e}" ) + if not postgres_sslmode: + postgres_sslmode = 'require' + protocol = "postgresql+psycopg2" creds = f"{postgres_user}:{postgres_password}" addr = f"{postgres_host}:{postgres_port}" # TODO We need to do what is the necessary to enforce 'require'. - return f"{protocol}://{creds}@{addr}/{postgres_db}?sslmode=prefer" + return f"{protocol}://{creds}@{addr}/{postgres_db}?sslmode={postgres_sslmode}" diff --git a/images/airflow/2.8.0/python/mwaa/config/sqs.py b/images/airflow/2.8.0/python/mwaa/config/sqs.py index f022745..b9be186 100644 --- a/images/airflow/2.8.0/python/mwaa/config/sqs.py +++ b/images/airflow/2.8.0/python/mwaa/config/sqs.py @@ -79,7 +79,9 @@ def _get_queue_name_from_url(queue_url: str) -> str: :returns The name of the queue or None if the URL is invalid. """ try: - # Validate the protocol. + # Validate the protocol (to flag accidentally passing of sqs:// + # protocol which is just a Celery convention, rather than an + # actual protocol.) if not queue_url.startswith("http://") and not queue_url.startswith("https://"): raise ValueError( f"URL {queue_url} is should start with http:// or https://" diff --git a/images/airflow/2.8.0/python/mwaa/entrypoint.py b/images/airflow/2.8.0/python/mwaa/entrypoint.py index 922d45f..3c3824c 100644 --- a/images/airflow/2.8.0/python/mwaa/entrypoint.py +++ b/images/airflow/2.8.0/python/mwaa/entrypoint.py @@ -53,7 +53,7 @@ def abort(err_msg: str, exit_code: int = 1): F = TypeVar("F", bound=Callable[..., Any]) -def db_lock(lock_id: int, timeout: int = 300 * 1000) -> Callable[[F], F]: +def db_lock(lock_id: int, timeout_ms: int = 300 * 1000) -> Callable[[F], F]: """ Generate a decorator that can be used to protect a function by a database lock. @@ -66,8 +66,8 @@ def db_lock(lock_id: int, timeout: int = 300 * 1000) -> Callable[[F], F]: same lock ID, only one process will be granted the lock at one time. However, if the processes have different lock IDs, they will be granted the locks at the same time. - :param timeout: The maximum time the process is allowed to hold the lock. After this - time expires, the lock is automatically released. + :param timeout_ms: The maximum time, in milliseconds, the process is allowed to hold + the lock. After this time expires, the lock is automatically released. :returns A decorator that can be applied to a function to protect it with a DB lock. """ @@ -75,13 +75,13 @@ def decorator(func: F) -> F: def wrapper(*args: Any, **kwargs: Any) -> Any: func_name: str = func.__name__ db_engine: Engine = create_engine( - get_db_connection_string() # Assuming this is defined elsewhere + get_db_connection_string() ) print(f"Obtaining lock for {func_name}...") with db_engine.connect() as conn: # type: ignore try: conn.execute( # type: ignore - text("SET LOCK_TIMEOUT to :timeout"), {"timeout": timeout} + text("SET LOCK_TIMEOUT to :timeout"), {"timeout": timeout_ms} ) conn.execute( # type: ignore text("SELECT pg_advisory_lock(:id)"), {"id": lock_id} @@ -93,7 +93,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: except Exception as e: abort( f"Failed while executing {func_name}. " + f"Error: {e}." - ) # Assuming abort is defined elsewhere + ) except Exception as e: abort( f"Failed to obtain DB lock for {func_name}. " + f"Error: {e}." diff --git a/images/airflow/2.8.0/requirements.txt b/images/airflow/2.8.0/requirements.txt index e4d7fea..72fffce 100644 --- a/images/airflow/2.8.0/requirements.txt +++ b/images/airflow/2.8.0/requirements.txt @@ -3,197 +3,23 @@ # the requirements of Airflow within the image. Still, they are largely similar # apart from some additional requirements for type checking, e.g. boto3-stubs # or similar stuff for aiding build and development, e.g. pydocstyle. ---constraint https://raw.githubusercontent.com/apache/airflow/constraints-2.7.2/constraints-3.11.txt -aiobotocore==2.6.0 -aiohttp==3.8.6 -aioitertools==0.11.0 -aiosignal==1.3.1 -alembic==1.12.0 -amqp==5.1.1 -annotated-types==0.6.0 -anyio==4.0.0 -apache-airflow==2.7.2 -apache-airflow-providers-amazon==8.7.1 -apache-airflow-providers-celery==3.3.4 -apache-airflow-providers-common-sql==1.7.2 -apache-airflow-providers-ftp==3.5.2 -apache-airflow-providers-http==4.5.2 -apache-airflow-providers-imap==3.3.2 -apache-airflow-providers-sqlite==3.4.3 -apispec==6.3.0 -argcomplete==3.1.2 -asgiref==3.7.2 -asn1crypto==1.5.1 -async-timeout==4.0.3 -attrs==23.1.0 -Babel==2.13.0 -backoff==1.10.0 -beautifulsoup4==4.12.2 -billiard==4.1.0 -blinker==1.6.3 -boto3==1.28.17 -boto3-stubs==1.28.85 -botocore==1.31.17 -botocore-stubs==1.34.41 -cachelib==0.9.0 -cattrs==23.1.2 -celery==5.3.4 -certifi==2023.7.22 -cffi==1.16.0 -charset-normalizer==3.3.0 -click==8.1.7 -click-didyoumean==0.3.0 -click-plugins==1.1.1 -click-repl==0.3.0 -clickclick==20.10.2 -colorama==0.4.6 -colorlog==4.8.0 -ConfigUpdater==3.1.1 -connexion==2.14.2 -cron-descriptor==1.4.0 -croniter==1.4.1 -cryptography==41.0.4 -Deprecated==1.2.14 -dill==0.3.1.1 -dnspython==2.4.2 -docutils==0.20.1 -email-validator==1.3.1 -Flask==2.2.5 -Flask-AppBuilder==4.3.6 -Flask-Babel==2.0.0 -Flask-Caching==2.0.2 -Flask-JWT-Extended==4.5.3 -Flask-Limiter==3.5.0 -Flask-Login==0.6.2 -Flask-Session==0.5.0 -Flask-SQLAlchemy==2.5.1 -Flask-WTF==1.2.1 -flower==2.0.1 -frozenlist==1.4.0 -fsspec==2023.12.2 -google-re2==1.1 -googleapis-common-protos==1.60.0 -graphviz==0.20.1 -greenlet==3.0.0 -grpcio==1.59.0 -gunicorn==21.2.0 -h11==0.14.0 -httpcore==0.16.3 -httpx==0.23.3 -humanize==4.8.0 -idna==3.4 -importlib-metadata==6.8.0 -importlib-resources==6.1.0 -inflection==0.5.1 -itsdangerous==2.1.2 -Jinja2==3.1.2 -jmespath==0.10.0 -jsonpath-ng==1.6.0 -jsonschema==4.19.1 -jsonschema-specifications==2023.7.1 -kombu==5.3.2 -lazy-object-proxy==1.9.0 -limits==3.6.0 -linkify-it-py==2.0.2 -lockfile==0.12.2 -lxml==4.9.3 -Mako==1.2.4 -Markdown==3.5 -markdown-it-py==3.0.0 -MarkupSafe==2.1.3 -marshmallow==3.20.1 -marshmallow-oneofschema==3.0.1 -marshmallow-sqlalchemy==0.26.1 -mdit-py-plugins==0.4.0 -mdurl==0.1.2 -multidict==6.0.4 -mypy==1.2.0 -mypy-boto3-cloudformation==1.28.83 -mypy-boto3-dynamodb==1.28.73 -mypy-boto3-ec2==1.28.85 -mypy-boto3-lambda==1.28.83 -mypy-boto3-rds==1.28.61 -mypy-boto3-s3==1.28.55 -mypy-boto3-sqs==1.28.82 -mypy-extensions==1.0.0 -nodeenv==1.8.0 -opentelemetry-api==1.20.0 -opentelemetry-exporter-otlp==1.20.0 -opentelemetry-exporter-otlp-proto-common==1.20.0 -opentelemetry-exporter-otlp-proto-grpc==1.20.0 -opentelemetry-exporter-otlp-proto-http==1.20.0 -opentelemetry-proto==1.20.0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -ordered-set==4.1.0 -packaging==23.2 -pathspec==0.11.2 -pendulum==2.1.2 -pluggy==1.3.0 -ply==3.11 -prison==0.2.1 -prometheus-client==0.17.1 -prompt-toolkit==3.0.39 -protobuf==4.21.12 -psutil==5.9.5 -psycopg2==2.9.9 -pycparser==2.21 -pycurl==7.45.3 -pydantic==2.4.2 -pydantic_core==2.10.1 -pydocstyle==6.3.0 -Pygments==2.16.1 -PyJWT==2.8.0 -pyright==1.1.351 -python-daemon==3.0.1 -python-dateutil==2.8.2 -python-nvd3==0.15.0 -python-slugify==8.0.1 -pytz==2023.3.post1 -pytzdata==2020.1 -PyYAML==6.0.1 -redshift-connector==2.0.914 -referencing==0.30.2 -requests==2.31.0 -requests-toolbelt==1.0.0 -rfc3339-validator==0.1.4 -rfc3986==1.5.0 -rich==13.6.0 -rich-argparse==1.3.0 -rpds-py==0.10.4 -ruff==0.0.292 -s3transfer==0.6.2 -scramp==1.4.4 -setproctitle==1.3.3 -six==1.16.0 -sniffio==1.3.0 -snowballstemmer==2.2.0 -soupsieve==2.5 -SQLAlchemy==1.4.49 -SQLAlchemy-JSONField==1.0.1.post0 -sqlalchemy-redshift==0.8.14 -sqlalchemy-stubs==0.4 -SQLAlchemy-Utils==0.41.1 -sqlparse==0.4.4 -statsd==4.0.1 -tabulate==0.9.0 -tenacity==8.2.3 -termcolor==2.3.0 -text-unidecode==1.3 -tornado==6.3.3 -types-awscrt==0.20.3 -types-s3transfer==0.10.0 -typing_extensions==4.8.0 -tzdata==2023.3 -uc-micro-py==1.0.2 -unicodecsv==0.14.1 -universal-pathlib==0.1.4 -urllib3==1.26.17 -vine==5.0.0 -watchtower==2.0.1 -wcwidth==0.2.8 -Werkzeug==2.2.3 -wrapt==1.15.0 -WTForms==3.0.1 -yarl==1.9.2 -zipp==3.17.0 +--constraint https://raw.githubusercontent.com/apache/airflow/constraints-2.9.0/constraints-3.11.txt +# Main Airflow packages. +# NOTE: We always specify the version here. +apache-airflow[celery,statsd]==2.9.0 +# Additional packages for MWAA Airflow. +# NOTE: We don't specify the version here since the constraints file take care +# of this, making it easier to port the code to other versions. +apache-airflow-providers-amazon[aiobotocore] +celery[sqs] +psycopg2 +pycurl +watchtower +# Additional packages for development +# NOTE: Like above, we don't specify the version here. +boto3-stubs +botocore-stubs +pydocstyle +pyright +ruff +sqlalchemy-stubs diff --git a/images/airflow/2.8.0/run.sh.template b/images/airflow/2.8.0/run.sh.template index 6126b24..d963756 100644 --- a/images/airflow/2.8.0/run.sh.template +++ b/images/airflow/2.8.0/run.sh.template @@ -15,4 +15,4 @@ export MWAA__SQS__QUEUE_URL="The URL of the SQS key to use with Celery>" # Build the Docker image ./build.sh -docker compose up \ No newline at end of file +docker compose up diff --git a/images/airflow/generate-dockerfiles.py b/images/airflow/generate-dockerfiles.py index 5dae6e2..da370ca 100644 --- a/images/airflow/generate-dockerfiles.py +++ b/images/airflow/generate-dockerfiles.py @@ -101,7 +101,17 @@ def generate_dockerfile( def generate_base_dockerfile(image_root_dir: Path) -> None: """Generate the Dockerfile.base file based on the Dockerfile.base.j2 - template.""" + template. + + We generate multiple Docker images for different purposes, as explained below under + the documentation of `generate_derivative_dockerfiles`. However, these derivative + images actually share most of the setup. So, to reduce build time and avoid + duplication, we generate a "base" Docker image, and then derive the rest of the + images from them. + + :param image_root_dir: The root directory of the Docker image, i.e. where the + `Dockerfile` resides. + """ # Template data data = { "bootstrapping_scripts_root_firstpass": sorted( diff --git a/quality-checks/pip_install_check.py b/quality-checks/pip_install_check.py index 0a8c213..694398f 100755 --- a/quality-checks/pip_install_check.py +++ b/quality-checks/pip_install_check.py @@ -24,7 +24,7 @@ def check_file_for_pip_install(filepath: str) -> bool: """ with open(filepath, "r") as file: for line in file: - if "pip install" in line: + if "pip install" in line or "pip3 install" in line: return False return True