Skip to content

Commit

Permalink
🎨 Adding tracing to agent and dynamic-sidecar (🏗️ DEVOPS) (ITISFo…
Browse files Browse the repository at this point in the history
…undation#6691)

Co-authored-by: Andrei Neagu <[email protected]>
  • Loading branch information
GitHK and Andrei Neagu authored Nov 14, 2024
1 parent 0781e63 commit 9e6ca99
Show file tree
Hide file tree
Showing 12 changed files with 33 additions and 0 deletions.
1 change: 1 addition & 0 deletions .env-devel
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ AGENT_VOLUMES_CLEANUP_S3_ENDPOINT=http://172.17.0.1:9001
AGENT_VOLUMES_CLEANUP_S3_PROVIDER=MINIO
AGENT_VOLUMES_CLEANUP_S3_REGION=us-east-1
AGENT_VOLUMES_CLEANUP_S3_SECRET_KEY=12345678
AGENT_TRACING={}

API_SERVER_DEV_FEATURES_ENABLED=0
API_SERVER_LOGLEVEL=INFO
Expand Down
4 changes: 4 additions & 0 deletions services/agent/src/simcore_service_agent/core/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
get_common_oas_options,
override_fastapi_openapi_method,
)
from servicelib.fastapi.tracing import setup_tracing
from servicelib.logging_utils import config_all_loggers

from .._meta import (
Expand Down Expand Up @@ -59,6 +60,9 @@ def create_app() -> FastAPI:
setup_rest_api(app)
setup_rpc_api_routes(app)

if settings.AGENT_TRACING:
setup_tracing(app, settings.AGENT_TRACING, APP_NAME)

async def _on_startup() -> None:
print(APP_STARTED_BANNER_MSG, flush=True) # noqa: T201

Expand Down
5 changes: 5 additions & 0 deletions services/agent/src/simcore_service_agent/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from settings_library.base import BaseCustomSettings
from settings_library.r_clone import S3Provider
from settings_library.rabbit import RabbitSettings
from settings_library.tracing import TracingSettings
from settings_library.utils_logging import MixinLoggingSettings


Expand Down Expand Up @@ -77,6 +78,10 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings):
auto_default_from_env=True, description="settings for service/rabbitmq"
)

AGENT_TRACING: TracingSettings | None = Field(
auto_default_from_env=True, description="settings for opentelemetry tracing"
)

@validator("LOGLEVEL")
@classmethod
def valid_log_level(cls, value) -> LogLevel:
Expand Down
1 change: 1 addition & 0 deletions services/agent/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def mock_environment(
"RABBIT_SECURE": "false",
"RABBIT_USER": "test",
"AGENT_DOCKER_NODE_ID": docker_node_id,
"AGENT_TRACING": "null",
},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ def _get_environment_variables(
"S3_SECRET_KEY": r_clone_settings.R_CLONE_S3.S3_SECRET_KEY,
"SC_BOOT_MODE": f"{app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR.DYNAMIC_SIDECAR_SC_BOOT_MODE}",
"SSL_CERT_FILE": app_settings.DIRECTOR_V2_SELF_SIGNED_SSL_FILENAME,
"DYNAMIC_SIDECAR_TRACING": (
app_settings.DIRECTOR_V2_TRACING.json()
if app_settings.DIRECTOR_V2_TRACING
else "null"
),
# For background info on this special env-var above, see
# - https://stackoverflow.com/questions/31448854/how-to-force-requests-use-the-certificates-on-my-ubuntu-system#comment78596389_37447847
"SIMCORE_HOST_NAME": scheduler_data.service_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"DY_SIDECAR_USER_SERVICES_HAVE_INTERNET_ACCESS",
"DYNAMIC_SIDECAR_COMPOSE_NAMESPACE",
"DYNAMIC_SIDECAR_LOG_LEVEL",
"DYNAMIC_SIDECAR_TRACING",
"NODE_PORTS_400_REQUEST_TIMEOUT_ATTEMPTS",
"POSTGRES_DB",
"POSTGRES_ENDPOINT",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ def expected_dynamic_sidecar_spec(
"FORWARD_ENV_DISPLAY": ":0",
"NODE_PORTS_400_REQUEST_TIMEOUT_ATTEMPTS": "3",
"DYNAMIC_SIDECAR_LOG_LEVEL": "DEBUG",
"DYNAMIC_SIDECAR_TRACING": "null",
"DY_DEPLOYMENT_REGISTRY_SETTINGS": (
'{"REGISTRY_AUTH": false, "REGISTRY_PATH": null, '
'"REGISTRY_URL": "foo.bar.com", "REGISTRY_USER": '
Expand Down
1 change: 1 addition & 0 deletions services/docker-compose-ops.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ services:
- "4318:4318" # OTLP HTTP receiver
networks:
- simcore_default
- interactive_services_subnet
environment:
TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE: ${TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE}
TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE: ${TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE}
Expand Down
4 changes: 4 additions & 0 deletions services/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1053,6 +1053,10 @@ services:
RABBIT_USER: ${RABBIT_USER}
RABBIT_SECURE: ${RABBIT_SECURE}

AGENT_TRACING: ${AGENT_TRACING}
TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT}
TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT}

dask-sidecar:
image: ${DOCKER_REGISTRY:-itisfoundation}/dask-sidecar:${DOCKER_IMAGE_TAG:-latest}
init: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
get_common_oas_options,
override_fastapi_openapi_method,
)
from servicelib.fastapi.tracing import setup_tracing
from servicelib.logging_utils import config_all_loggers
from simcore_sdk.node_ports_common.exceptions import NodeNotFound

Expand Down Expand Up @@ -190,6 +191,9 @@ def create_app():
if application_settings.are_prometheus_metrics_enabled:
setup_prometheus_metrics(app)

if application_settings.DYNAMIC_SIDECAR_TRACING:
setup_tracing(app, application_settings.DYNAMIC_SIDECAR_TRACING, PROJECT_NAME)

# ERROR HANDLERS ------------
app.add_exception_handler(NodeNotFound, node_not_found_error_handler)
app.add_exception_handler(BaseDynamicSidecarError, http_error_handler)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from settings_library.resource_usage_tracker import (
DEFAULT_RESOURCE_USAGE_HEARTBEAT_INTERVAL,
)
from settings_library.tracing import TracingSettings
from settings_library.utils_logging import MixinLoggingSettings


Expand Down Expand Up @@ -167,6 +168,10 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings):

SYSTEM_MONITOR_SETTINGS: SystemMonitorSettings = Field(auto_default_from_env=True)

DYNAMIC_SIDECAR_TRACING: TracingSettings | None = Field(
auto_default_from_env=True, description="settings for opentelemetry tracing"
)

@property
def are_prometheus_metrics_enabled(self) -> bool:
return self.DY_SIDECAR_CALLBACKS_MAPPING.metrics is not None
Expand Down
1 change: 1 addition & 0 deletions services/dynamic-sidecar/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ def base_mock_envs(
"REGISTRY_SSL": "false",
}
),
"DYNAMIC_SIDECAR_TRACING": "null",
}


Expand Down

0 comments on commit 9e6ca99

Please sign in to comment.