Skip to content

Commit

Permalink
feat: ops[tracing]
Browse files Browse the repository at this point in the history
  • Loading branch information
dimaqq committed Jan 27, 2025
1 parent b1ea782 commit 3c060af
Show file tree
Hide file tree
Showing 21 changed files with 966 additions and 16 deletions.
12 changes: 12 additions & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ click==8.1.8
# via uvicorn
colorama==0.4.6
# via sphinx-autobuild
deprecated==1.2.15
# via opentelemetry-api
docutils==0.21.2
# via
# canonical-sphinx-extensions
Expand All @@ -51,6 +53,10 @@ idna==3.10
# requests
imagesize==1.4.1
# via sphinx
importlib-metadata==8.5.0
# via
# opentelemetry-api
# ops (pyproject.toml)
jinja2==3.1.5
# via
# myst-parser
Expand All @@ -73,6 +79,8 @@ mdurl==0.1.2
# via markdown-it-py
myst-parser==4.0.0
# via ops (pyproject.toml)
opentelemetry-api==1.29.0
# via ops (pyproject.toml)
packaging==24.2
# via sphinx
pygments==2.19.1
Expand Down Expand Up @@ -165,4 +173,8 @@ websocket-client==1.8.0
# via ops (pyproject.toml)
websockets==14.1
# via sphinx-autobuild
wrapt==1.17.2
# via deprecated
zipp==3.21.0
# via importlib-metadata
./testing/
61 changes: 61 additions & 0 deletions dont-merge/fake-charm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python
# Copyright 2025 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""FIXME dummy_load docstring."""

from __future__ import annotations

import time

import opentelemetry.trace

import ops

tracer = opentelemetry.trace.get_tracer(__name__)


class FakeCharm(ops.CharmBase):
"""Dummy docstring."""

def __init__(self, framework: ops.Framework):
"""Dummy docstring."""
super().__init__(framework)
self.framework.observe(self.on.start, self._on_start)
self.framework.observe(self.on.collect_app_status, self._on_collect_app_status)
self.framework.observe(self.on.collect_unit_status, self._on_collect_unit_status)

def _on_start(self, event: ops.StartEvent) -> None:
"""Dummy docstring."""
ops.configure_tracing_destination('http://localhost:4318/v1/traces')
self.dummy_load(event, 0.0025)

def _on_collect_app_status(self, event: ops.CollectStatusEvent) -> None:
"""Dummy docstring."""
self.dummy_load(event)
event.add_status(ops.ActiveStatus('app seems ready'))

def _on_collect_unit_status(self, event: ops.CollectStatusEvent) -> None:
"""Dummy docstring."""
self.dummy_load(event)
event.add_status(ops.ActiveStatus('unit ready'))

@tracer.start_as_current_span('FakeCharm.dummy_load') # type: ignore
def dummy_load(self, event: ops.EventBase, duration: float = 0.001) -> None:
"""Dummy docstring."""
print(event)
time.sleep(duration)


if __name__ == '__main__':
ops.main(FakeCharm)
1 change: 1 addition & 0 deletions dont-merge/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
name: testmetest
25 changes: 25 additions & 0 deletions dont-merge/otel-collector-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
receivers:
otlp:
protocols:
grpc:
endpoint: "[::]:4317"
http:
endpoint: "[::]:4318"

processors:
batch:

exporters:
debug:
verbosity: detailed
jaeger:
endpoint: jaeger:14250
tls:
insecure: true

service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [debug]
114 changes: 114 additions & 0 deletions dont-merge/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
### Usage

Recommended for traces of moderate and high complexity:

```command
dima@colima-ahh /c/operator (feat-otel)> docker run --rm --name jaeger \
-p 16686:16686 \
-p 4317:4317 \
-p 4318:4318 \
-p 5778:5778 \
-p 9411:9411 \
jaegertracing/jaeger:2.2.0
```

After which, you should be able to:
- open http://192.168.107.4:16686/ in your browser
- select the correct **Service** (`testapp-charm` at current branch state)
- click Search at the bottom of the form

Note: the `jaeger` container keeps traces in memory, and your Service can't be selected
until it has sent some data to `jaeger`.

Alternatively, text-based:

```command
dima@colima-ahh /c/operator (feat-otel)> docker run -it --rm \
-v (pwd)/dont-merge/otel-collector-config.yaml:/etc/otel-collector-config.yaml \
-p 4317:4317 \
-p 4318:4318 \
otel/opentelemetry-collector:latest \
--config=/etc/otel-collector-config.yaml
```

and then

```command
dima@colima-ahh /c/operator (feat-otel)> uv venv --seed .ahh-venv
Using CPython 3.13.0
Creating virtual environment with seed packages at: .ahh-venv

dima@colima-ahh /c/operator (feat-otel)> . .ahh-venv/bin/activate.fish
(.ahh-venv) dima@colima-ahh /c/operator (feat-otel)>

(.ahh-venv) dima@colima-ahh /c/operator (feat-otel)> uv pip install -e .[tracing] -U
Using Python 3.13.0 environment at .ahh-venv
Resolved 21 packages in 907ms
Prepared 18 packages in 72ms
...

(.ahh-venv) dima@colima-ahh /c/operator (feat-otel)> python dont-merge/send-traces.py
Span created and exported to the collector!
```

### Hacking

Or, trying to run code outside of a charm.

Somehow I'm not getting anything, because the `juju-log` hook tool is missing.

Let's fix that.

```command
> ln -s (which echo) juju-log
```

Generate some tracing data:

```command
(venv) > JUJU_UNIT_NAME=testapp/42 JUJU_CHARM_DIR=dont-merge/ PATH=$PATH:. JUJU_VERSION=3.5.4 ./dont-merge/start
```

OTEL collector debug output would look like this:

```
2025-01-15T08:46:23.229Z info Traces {"kind": "exporter", "data_type": "traces", "name": "debug", "resource spans": 1, "spans": 1}
2025-01-15T08:46:23.229Z info ResourceSpans #0
Resource SchemaURL:
Resource attributes:
-> telemetry.sdk.language: Str(python)
-> telemetry.sdk.name: Str(opentelemetry)
-> telemetry.sdk.version: Str(1.29.0)
-> service.name: Str(testapp-charm)
-> compose_service: Str(testapp-charm)
-> charm_type: Str(CharmBase)
-> juju_unit: Str(testapp/42)
-> juju_application: Str(testapp)
-> juju_model: Str()
-> juju_model_uuid: Str()
ScopeSpans #0
ScopeSpans SchemaURL:
InstrumentationScope ops
Span #0
Trace ID : 8c3f292c89f29c59f1b37fe59ba0abbc
Parent ID :
ID : e0253a03ef694a4f
Name : ops.main
Kind : Internal
Start time : 2025-01-15 08:46:23.175916835 +0000 UTC
End time : 2025-01-15 08:46:23.182329655 +0000 UTC
Status code : Error
Status message : RuntimeError: command not found: is-leader
Events:
SpanEvent #0
-> Name: exception
-> Timestamp: 2025-01-15 08:46:23.182316071 +0000 UTC
-> DroppedAttributesCount: 0
-> Attributes::
-> exception.type: Str(RuntimeError)
-> exception.message: Str(command not found: is-leader)
-> exception.stacktrace: Str(Traceback (most recent call last):
...
-> exception.escaped: Str(False)
{"kind": "exporter", "data_type": "traces", "name": "debug"}
```
63 changes: 63 additions & 0 deletions dont-merge/send-traces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright 2025 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""FIXME dummy docstring."""

from __future__ import annotations

import logging

import opentelemetry.trace
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor

# The default ProxyTracer allows tracers to be declared ahead of time like loggers
logger = logging.getLogger(__name__)
tracer = opentelemetry.trace.get_tracer(__name__)

# 1. Create a tracer provider with a "service.name" resource attribute
opentelemetry.trace.set_tracer_provider(
TracerProvider(resource=Resource.create({'service.name': 'example-service'}))
)

# 2. Configure the OTLP HTTP exporter (defaults to protobuf format)
otlp_exporter = OTLPSpanExporter(
endpoint='http://localhost:4318/v1/traces'
# If you needed headers or auth, you could add them like:
# headers={"Authorization": "Bearer <TOKEN>"},
)

# 3. Create a span processor (BatchSpanProcessor recommended for production)
span_processor = BatchSpanProcessor(otlp_exporter)
opentelemetry.trace.get_tracer_provider().add_span_processor(span_processor) # type: ignore


@tracer.start_as_current_span('some label') # type: ignore
def main(foo: int = 42):
"""Do something."""
# can't add attributes to a decorator, if needed use the below instead
#
# with tracer.start_as_current_span("some label") as span:
# span.set_attribute('foo', 'bar')
# span.add_event('sample_event', {'event_attr': 123})

logger.info('Span created and will be exported to the collector soon!')


if __name__ == '__main__':
logging.basicConfig(level='INFO')
main()
# from typing_extensions import reveal_type
# reveal_type(main)
7 changes: 7 additions & 0 deletions ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@
# that those symbols are part of the public API, so we have to add __all__.
__all__ = [ # noqa: RUF022 `__all__` is not sorted
'__version__',
'configure_tracing_buffer',
'configure_tracing_destination',
'main',
'pebble',
# From charm.py
Expand Down Expand Up @@ -333,6 +335,11 @@
# NOTE: don't import testing or Harness here, as that's a test-time concern
# rather than a runtime concern.

from .tracing import (
configure_tracing_buffer,
configure_tracing_destination,
)

from .version import version as __version__


Expand Down
21 changes: 18 additions & 3 deletions ops/_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast

import opentelemetry.trace

import ops.charm
import ops.framework
import ops.model
import ops.storage
import ops.tracing
from ops.charm import CharmMeta
from ops.jujucontext import _JujuContext
from ops.log import setup_root_logging
Expand All @@ -35,6 +38,7 @@


logger = logging.getLogger()
tracer = opentelemetry.trace.get_tracer(__name__)


def _exe_path(path: Path) -> Optional[Path]:
Expand Down Expand Up @@ -212,6 +216,8 @@ class _Dispatcher:
"""

event_name: str

def __init__(self, charm_dir: Path, juju_context: _JujuContext):
self._juju_context = juju_context
self._charm_dir = charm_dir
Expand Down Expand Up @@ -268,7 +274,9 @@ def run_any_legacy_hook(self):
argv[0] = str(dispatch_path)
logger.info('Running legacy %s.', self._dispatch_path)
try:
subprocess.run(argv, check=True)
with tracer.start_as_current_span('ops.run_legacy_hook') as span: # type: ignore
span.set_attribute('argv', ' '.join(argv)) # type: ignore
subprocess.run(argv, check=True)
except subprocess.CalledProcessError as e:
logger.warning('Legacy %s exited with status %d.', self._dispatch_path, e.returncode)
raise _Abort(e.returncode) from e
Expand Down Expand Up @@ -552,9 +560,16 @@ def main(charm_class: Type[ops.charm.CharmBase], use_juju_for_storage: Optional[
See `ops.main() <#ops-main-entry-point>`_ for details.
"""
ops.tracing.setup_tracing(charm_class.__name__)

# opentelemetry-api types are broken
# https://github.com/open-telemetry/opentelemetry-python/issues/3836
try:
manager = _Manager(charm_class, use_juju_for_storage=use_juju_for_storage)
with tracer.start_as_current_span('ops.main'): # type: ignore
manager = _Manager(charm_class, use_juju_for_storage=use_juju_for_storage)

manager.run()
manager.run()
except _Abort as e:
sys.exit(e.exit_code)
finally:
ops.tracing.shutdown_tracing()
Loading

0 comments on commit 3c060af

Please sign in to comment.