Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merging dev-rafidka into main #41

Merged
merged 12 commits into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .code-workspace
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"folders": [
{
"name": "amazon-mwaa-docker-images",
"path": "./"
},
{
"name": "airflow-2.8.0",
"path": "./images/airflow/2.8.0"
}
],
"settings": {}
}
18 changes: 0 additions & 18 deletions .github/workflows/bash-lint.yaml

This file was deleted.

23 changes: 0 additions & 23 deletions .github/workflows/python-lint.yaml

This file was deleted.

46 changes: 46 additions & 0 deletions .github/workflows/quality-checks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Quality Checks Workflow

on: [push, pull_request]

jobs:
quality_checks:
name: Quality Checks Job
runs-on: ubuntu-latest

container:
image: public.ecr.aws/amazonlinux/amazonlinux:2023

steps:
- name: Install required packages...
run: |
# Update packages and install required dependencies:
# gcc, libcurl-devel: For compiling pycurl (required for our Airflow setup.)
# gzip: Requiring by actions/checkout@v2 to gunzip the source code.
# postgresql-devel: Required for our Airflow setup.
# python3.11-devel: Required for building some Python modules, e.g. pycurl.
# python3.11: Self explanatory.
# tar, wget, xz: For downloading and extracting ShellCheck
dnf update -y
dnf install -y \
gcc \
gzip \
libcurl-devel \
postgresql-devel \
python3.11 \
python3.11-devel \
tar \
wget \
xz
# Download and install shellcheck for linting shell scripts
wget https://github.com/koalaman/shellcheck/releases/download/stable/shellcheck-stable.linux.x86_64.tar.xz
tar -xvf shellcheck-stable.linux.x86_64.tar.xz
cp shellcheck-stable/shellcheck /usr/bin/

- name: Check out code...
uses: actions/checkout@v2

- name: Create the necessary Python virtual environments...
run: python3.11 ./create_venvs.py

- name: Run quality checks...
run: python3.11 ./quality-checks/run_all.py
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
venv
.venv
18 changes: 3 additions & 15 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,8 @@
repos:
- repo: https://github.com/hhatto/autopep8
rev: v2.0.4
hooks:
- id: autopep8
args: ["--in-place", "--aggressive", "--aggressive"]

- repo: local
hooks:
- id: lint-bash
name: Lint Bash Scripts
entry: ./lint_bash.sh
- id: quality-checks
rafidka marked this conversation as resolved.
Show resolved Hide resolved
name: Code Quality Checks
entry: ./quality-checks/run_all.py
language: script
types: [shell]

- id: lint-python
name: Lint Python Scripts
entry: ./lint_python.sh
language: script
types: [python]
2 changes: 2 additions & 0 deletions .pydocstyle
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pydocstyle]
match_dir = ^(?!.venv|images).*$
15 changes: 15 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Generate Airflow Dockerfiles",
"type": "debugpy",
"request": "launch",
"program": "./images/airflow/generate-dockerfiles.py",
"console": "integratedTerminal"
}
]
}
30 changes: 30 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"files.exclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"files.watcherExclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"search.exclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"python.defaultInterpreterPath": "./.venv/bin/python"
}
87 changes: 87 additions & 0 deletions create_venvs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""
Create the virtual environments required to develop with this package.

This module should be executed after cloning the repository to create the following
virtual environments:

- One virtual environment at the root package.
- One per each Docker image

Those environments are used for many tasks, most importantly allow the IDE to use the
right Python environment for the different folders in this repository. This is necessary
since the Python packages required to develop the different Airflow versions are
different from the packages that we need for the various scripts in this repository.
"""
import os
import subprocess
import sys
import venv
from pathlib import Path


def verify_python_version():
"""Check if the current Python version is at least 3.9."""
major, minor, *_ = sys.version_info

if major != 3 or minor < 11:
print("Python 3.11 or higher is required.")
sys.exit(1)


def create_venv(path: Path):
"""
Create a venv in the given directory and install requirements if present.

:param dir_path: The path to create the venv in.
"""
venv_path = path / ".venv"

if not venv_path.exists():
print(f"Creating virtualenv in directory: {path}")
venv.create(venv_path, with_pip=True)
else:
print(f"Virtualenv already exists in {venv_path}")

requirements_path = path / "requirements.txt"
pip_install(venv_path, requirements_path)


def pip_install(venv_dir: Path, requirements_file: Path):
"""
Install dependencies from requirements.txt if it exists.

:param venv_dir: The path to the venv directory.
:param venv_dir: The path to the requirements.txt file.
"""
if os.path.exists(requirements_file):
print(f"Installing dependencies from {requirements_file}...")
subprocess.run(
[
os.path.join(venv_dir, "bin", "python"),
"-m",
"pip",
rafidka marked this conversation as resolved.
Show resolved Hide resolved
"install",
"-U",
"-r",
str(requirements_file),
"pip", # Upgrade pip as well.
],
check=True,
)


def main():
"""Start execution of the script."""
verify_python_version()
project_dirs = [
Path("."),
Path("./images/mockwatch-logs"),
*Path("./images").glob("airflow/*"),
] # Include main project dir and each image dir
for dir_path in project_dirs:
if dir_path.is_dir() and (dir_path / "requirements.txt").exists():
create_venv(dir_path)


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions images/airflow/2.8.0/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.venv
1 change: 1 addition & 0 deletions images/airflow/2.8.0/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
run.sh
1 change: 1 addition & 0 deletions images/airflow/2.8.0/.pydocstyle
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[pydocstyle]
30 changes: 30 additions & 0 deletions images/airflow/2.8.0/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"files.exclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"files.watcherExclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"search.exclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"python.defaultInterpreterPath": "./.venv/bin/python"
}
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
FROM public.ecr.aws/amazonlinux/amazonlinux:2023

# Environment variables
ENV AIRFLOW_AMAZON_PROVIDERS_VERSION=8.13.0

ENV AIRFLOW_VERSION=2.9.0

ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.9.0/constraints-3.11.txt"
ENV AIRFLOW_USER_HOME=/usr/local/airflow
ENV AIRFLOW_HOME=${AIRFLOW_USER_HOME}
ENV AIRFLOW_VERSION=2.8.0
ENV MWAA_HOME=/usr/local/mwaa
ENV PYTHON_VERSION=3.11.7
ENV PYTHON_VERSION=3.11.9

# We don't want those variables to stay in the final image, so we use ARG instead of ENV.
ARG AIRFLOW_USER_LOCAL_PATH=${AIRFLOW_USER_HOME}/.local
ARG AIRFLOW_USER_LOCAL_BIN_PATH=${AIRFLOW_USER_LOCAL_PATH}/bin
ARG PATH_DEFAULT=${PATH}
ARG PATH_AIRFLOW_USER=${AIRFLOW_USER_HOME}/.local/bin:${PATH_DEFAULT}
ARG PYTHON_MD5_CHECKSUM=d96c7e134c35a8c46236f8a0e566b69c
ARG PATH_AIRFLOW_USER=${AIRFLOW_USER_LOCAL_BIN_PATH}:${PATH_DEFAULT}
ARG PYTHON_MD5_CHECKSUM=22ea467e7d915477152e99d5da856ddc
ARG MARIADB_DOWNLOAD_BASE_URL=https://mirror.mariadb.org/yum/11.1/fedora38-amd64/rpms
ARG MARIADB_RPM_COMMON=MariaDB-common-11.1.2-1.fc38.x86_64.rpm
ARG MARIADB_RPM_COMMON_CHECKSUM=e87371d558efa97724f3728fb214cf19
Expand All @@ -36,7 +40,11 @@ RUN {{ filepath }}
# END marker for root user, first pass bootstrapping steps.
#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<


# Copy airflow user's bin/ directory before starting the bootstrapping steps for
# airflow user.
COPY ./bin/airflow-user $AIRFLOW_USER_LOCAL_BIN_PATH
RUN chmod -R +x ${AIRFLOW_USER_LOCAL_BIN_PATH}/* && \
chown -R airflow: ${AIRFLOW_USER_LOCAL_PATH}

#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# > BEGINNING marker for airflow user bootstrapping steps.
Expand Down Expand Up @@ -84,19 +92,18 @@ RUN rm -rf /bootstrap
# is created by the `001-create-mwaa-dir.sh` script.
VOLUME ["${MWAA_HOME}"]

# TODO We should only expose this port if the comand is 'webserver'.
# TODO We should only expose this port if the command is 'webserver'.
EXPOSE 8080

ENV PATH=${PATH_AIRFLOW_USER}
ENV PYTHONPATH="/python"
ENV PYTHONUNBUFFERED=1

WORKDIR ${AIRFLOW_USER_HOME}

COPY entrypoint.py /entrypoint.py
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh

USER airflow

ENTRYPOINT ["/entrypoint.sh"]
# Copy python files.
COPY ./python /python

CMD /bin/bash
# TODO Move this to the bin folder under airflow's home folder.
COPY healthcheck.sh /healthcheck.sh
RUN chmod +x /healthcheck.sh
Loading
Loading