Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merging dev-rafidka into main #41

Merged
merged 12 commits into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .code-workspace
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"folders": [
{
"name": "amazon-mwaa-docker-images",
"path": "./"
},
{
"name": "airflow-2.8.0",
"path": "./images/airflow/2.8.0"
}
],
"settings": {}
}
18 changes: 0 additions & 18 deletions .github/workflows/bash-lint.yaml

This file was deleted.

23 changes: 0 additions & 23 deletions .github/workflows/python-lint.yaml

This file was deleted.

46 changes: 46 additions & 0 deletions .github/workflows/quality-checks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Quality Checks Workflow

on: [push, pull_request]

jobs:
quality_checks:
name: Quality Checks Job
runs-on: ubuntu-latest

container:
image: public.ecr.aws/amazonlinux/amazonlinux:2023

steps:
- name: Install required packages...
run: |
# Update packages and install required dependencies:
# gcc, libcurl-devel: For compiling pycurl (required for our Airflow setup.)
# gzip: Requiring by actions/checkout@v2 to gunzip the source code.
# postgresql-devel: Required for our Airflow setup.
# python3-devel: Required for building some Python modules, e.g. pycurl.
# python3: Self explanatory.
# tar, wget, xz: For downloading and extracting ShellCheck
dnf update -y
dnf install -y \
gcc \
gzip \
libcurl-devel \
postgresql-devel \
python3 \
python3-devel \
tar \
wget \
xz
# Download and install shellcheck for linting shell scripts
wget https://github.com/koalaman/shellcheck/releases/download/stable/shellcheck-stable.linux.x86_64.tar.xz
tar -xvf shellcheck-stable.linux.x86_64.tar.xz
cp shellcheck-stable/shellcheck /usr/bin/

- name: Check out code...
uses: actions/checkout@v2

- name: Create the necessary Python virtual environments...
run: python3 ./create_venvs.py

- name: Run quality checks...
run: python3 ./quality-checks/run_all.py
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
venv
.venv
18 changes: 3 additions & 15 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,8 @@
repos:
- repo: https://github.com/hhatto/autopep8
rev: v2.0.4
hooks:
- id: autopep8
args: ["--in-place", "--aggressive", "--aggressive"]

- repo: local
hooks:
- id: lint-bash
name: Lint Bash Scripts
entry: ./lint_bash.sh
- id: quality-checks
rafidka marked this conversation as resolved.
Show resolved Hide resolved
name: Code Quality Checks
entry: ./quality-checks/run_all.py
language: script
types: [shell]

- id: lint-python
name: Lint Python Scripts
entry: ./lint_python.sh
language: script
types: [python]
15 changes: 15 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Generate Airflow Dockerfiles",
"type": "debugpy",
"request": "launch",
"program": "./images/airflow/generate-dockerfiles.py",
"console": "integratedTerminal"
}
]
}
30 changes: 30 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"files.exclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"files.watcherExclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"search.exclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"python.defaultInterpreterPath": "./.venv/bin/python"
}
68 changes: 68 additions & 0 deletions create_venvs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import os
import subprocess
import sys
import venv
from pathlib import Path


def verify_python_version():
"""Check if the current Python version is at least 3.9."""
_major, minor, *_ = sys.version_info
if minor < 9:
print("Python 3.9 or higher is required.")
rafidka marked this conversation as resolved.
Show resolved Hide resolved
sys.exit(1)


def create_venv(path: Path):
"""Create a virtual environment in the given directory and install
requirements if `requirements.txt` is present.

:param dir_path: The path to create the venv in."""
venv_path = path / ".venv"

if not venv_path.exists():
print(f"Creating virtualenv in directory: {path}")
venv.create(venv_path, with_pip=True)
else:
print(f"Virtualenv already exists in {venv_path}")

requirements_path = path / "requirements.txt"
pip_install(venv_path, requirements_path)


def pip_install(venv_dir: Path, requirements_file: Path):
"""Install dependencies from requirements.txt if it exists.

:param venv_dir: The path to the venv directory.
:param venv_dir: The path to the requirements.txt file."""
if os.path.exists(requirements_file):
print(f"Installing dependencies from {requirements_file}...")
subprocess.run(
[
os.path.join(venv_dir, "bin", "python"),
"-m",
"pip",
rafidka marked this conversation as resolved.
Show resolved Hide resolved
"install",
"-U",
"-r",
str(requirements_file),
"pip", # Upgrade pip as well.
],
check=True,
)


def main():
"""Main entrypoint of the script."""
verify_python_version()
project_dirs = [
Path("."),
*Path("./images").glob("airflow/*"),
] # Include main project dir and each image dir
for dir_path in project_dirs:
if dir_path.is_dir() and (dir_path / "requirements.txt").exists():
create_venv(dir_path)


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions images/airflow/2.8.0/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.venv
1 change: 1 addition & 0 deletions images/airflow/2.8.0/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
run.sh
30 changes: 30 additions & 0 deletions images/airflow/2.8.0/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"files.exclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"files.watcherExclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"search.exclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"python.defaultInterpreterPath": "./.venv/bin/python"
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
FROM public.ecr.aws/amazonlinux/amazonlinux:2023

# Environment variables
ENV AIRFLOW_AMAZON_PROVIDERS_VERSION=8.13.0

# Temporarily downgrading to 2.7.2 to make it easier to test the Docker image
# within Amazon MWAA since 2.7.2 is a version we support.
ENV AIRFLOW_VERSION=2.7.2
ENV AIRFLOW_AMAZON_PROVIDERS_VERSION=8.7.1

ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.7.2/constraints-3.11.txt"
ENV AIRFLOW_USER_HOME=/usr/local/airflow
ENV AIRFLOW_HOME=${AIRFLOW_USER_HOME}
ENV AIRFLOW_VERSION=2.8.0
ENV MWAA_HOME=/usr/local/mwaa
ENV PYTHON_VERSION=3.11.7

# We don't want those variables to stay in the final image, so we use ARG instead of ENV.
ARG AIRFLOW_USER_LOCAL_PATH=${AIRFLOW_USER_HOME}/.local
ARG AIRFLOW_USER_LOCAL_BIN_PATH=${AIRFLOW_USER_LOCAL_PATH}/bin
ARG PATH_DEFAULT=${PATH}
ARG PATH_AIRFLOW_USER=${AIRFLOW_USER_HOME}/.local/bin:${PATH_DEFAULT}
ARG PATH_AIRFLOW_USER=${AIRFLOW_USER_LOCAL_BIN_PATH}:${PATH_DEFAULT}
ARG PYTHON_MD5_CHECKSUM=d96c7e134c35a8c46236f8a0e566b69c
ARG MARIADB_DOWNLOAD_BASE_URL=https://mirror.mariadb.org/yum/11.1/fedora38-amd64/rpms
ARG MARIADB_RPM_COMMON=MariaDB-common-11.1.2-1.fc38.x86_64.rpm
Expand All @@ -36,7 +43,11 @@ RUN {{ filepath }}
# END marker for root user, first pass bootstrapping steps.
#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<


# Copy airflow user's bin/ directory before starting the bootstrapping steps for
# airflow user.
COPY ./bin/airflow-user $AIRFLOW_USER_LOCAL_BIN_PATH
RUN chmod -R +x ${AIRFLOW_USER_LOCAL_BIN_PATH}/* && \
chown -R airflow: ${AIRFLOW_USER_LOCAL_PATH}

#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# > BEGINNING marker for airflow user bootstrapping steps.
Expand Down Expand Up @@ -84,19 +95,18 @@ RUN rm -rf /bootstrap
# is created by the `001-create-mwaa-dir.sh` script.
VOLUME ["${MWAA_HOME}"]

# TODO We should only expose this port if the comand is 'webserver'.
# TODO We should only expose this port if the command is 'webserver'.
EXPOSE 8080

ENV PATH=${PATH_AIRFLOW_USER}
ENV PYTHONPATH="/python"
ENV PYTHONUNBUFFERED=1

WORKDIR ${AIRFLOW_USER_HOME}

COPY entrypoint.py /entrypoint.py
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh

USER airflow

ENTRYPOINT ["/entrypoint.sh"]
# Copy python files.
COPY ./python /python

CMD /bin/bash
# TODO Move this to the bin folder under airflow's home folder.
COPY healthcheck.sh /healthcheck.sh
RUN chmod +x /healthcheck.sh
Loading
Loading