Skip to content

Commit

Permalink
Extend gather_automation_controller_billing_data (#8)
Browse files Browse the repository at this point in the history
Gather and ship billing data to console.redhat.com with automatically collecting gap, by storing a last collected
timestamp and always collecting from that last succesfully collected timestamp
  • Loading branch information
Ladas authored Jan 22, 2024
1 parent 5274bbd commit 6357e51
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 54 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@
## 0.0.2

- gather_automation_controller_billing_data command

## 0.0.3

- gather_automation_controller_billing_data command extension
Adding ability to run without "since specified", collecting any
gap automatically.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,13 @@ Gather and ship billing data to console.redhat.com for a dynamic datetime range:
# You need to set 'Red Hat customer username/password' under Automation Controller 'Miscellaneous System' settings
# This will collect and ship data for yesterday, interval <2 days ago, 1 day ago>
metrics-utility gather_automation_controller_billing_data --ship --since=2d --until=1d
```

Gather and ship billing data to console.redhat.com with automatically collecting gap, by storing a last collected
timestamp and always collecting from that last succesfully collected timestamp. To be on the safe side, we can
collect interval <last_collected_timestamp_or_4_weeks_back, 10_minutes_ago> to give all records time to insert.
```
# You need to set 'Red Hat customer username/password' under Automation Controller 'Miscellaneous System' settings
# This will collect and ship data for interval <last_collected_timestamp_or_4_weeks_back, 10_minutes_ago>
metrics-utility gather_automation_controller_billing_data --ship --until=10m
```
57 changes: 23 additions & 34 deletions metrics_utility/automation_controller_billing/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@

import insights_analytics_collector as base

# from django.core.serializers.json import DjangoJSONEncoder
from django.core.serializers.json import DjangoJSONEncoder
# from awx.conf.license import get_license
# from awx.main.models import Job
# from awx.main.access import access_registry
# from rest_framework.exceptions import PermissionDenied
from metrics_utility.automation_controller_billing.package import Package
# from awx.main.utils import datetime_hook
from awx.main.utils import datetime_hook
from awx.main.utils.pglock import advisory_lock

logger = logging.getLogger('awx.main.analytics')
Expand Down Expand Up @@ -107,44 +107,33 @@ def _pg_advisory_lock(self, key, wait=False):
yield lock

def _last_gathering(self):
# TODO: fill in later, when integrated with consumption based billing in Controller

# return settings.AUTOMATION_ANALYTICS_LAST_GATHER
return {}
# Not needed in this implementation, but we need to define an abstract method
pass

def _load_last_gathered_entries(self):
# TODO: fill in later, when integrated with consumption based billing in Controller
# We are reusing Settings used by Analytics, so we don't have to backport changes into analytics
# We can safely do this, by making sure we use the same lock as Analytics, before we persist
# these settings.
from awx.conf.models import Setting

# from awx.conf.models import Setting
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_gathered_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
return last_gathered_entries

# last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
# last_gathered_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
# return last_gathered_entries

return {}
def _gather_finalize(self):
"""Persisting timestamps (manual/schedule mode only)"""
if self.is_shipping_enabled():
# We need to wait on analytics lock, to update the last collected timestamp settings
# so we don't clash with analytics job collection.
with self._pg_advisory_lock("gather_analytics_lock", wait=True) as acquired:
# We need to load fresh settings again as we're obtaning the lock, since
# Analytics job could have changed this on the background and we'd be resetting
# the Analytics values here.
self._load_last_gathered_entries()
self._update_last_gathered_entries()

def _save_last_gathered_entries(self, last_gathered_entries):
# TODO: fill in later, when integrated with consumption based billing in Controller

# settings.AUTOMATION_ANALYTICS_LAST_ENTRIES = json.dumps(last_gathered_entries, cls=DjangoJSONEncoder)
pass

def _save_last_gather(self):
# TODO: fill in later, when integrated with consumption based billing in Controller
# from awx.main.signals import disable_activity_stream

# with disable_activity_stream():
# if not settings.AUTOMATION_ANALYTICS_LAST_GATHER or self.gather_until > settings.AUTOMATION_ANALYTICS_LAST_GATHER:
# # `AUTOMATION_ANALYTICS_LAST_GATHER` is set whether collection succeeds or fails;
# # if collection fails because of a persistent, underlying issue and we do not set last_gather,
# # we risk the collectors hitting an increasingly greater workload while the underlying issue
# # remains unresolved. Put simply, if collection fails, we just move on.

# # All that said, `AUTOMATION_ANALYTICS_LAST_GATHER` plays a much smaller role in determining
# # what is actually collected than it used to; collectors now mostly rely on their respective entry
# # under `last_entries` to determine what should be collected.
# settings.AUTOMATION_ANALYTICS_LAST_GATHER = self.gather_until
pass
settings.AUTOMATION_ANALYTICS_LAST_ENTRIES = json.dumps(last_gathered_entries, cls=DjangoJSONEncoder)

@staticmethod
def _package_class():
Expand Down
26 changes: 13 additions & 13 deletions metrics_utility/automation_controller_billing/collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,21 @@ def something(since):
"""


def trivial_slicing(key, last_gather, **kwargs):
def trivial_slicing(key, _, **kwargs):
since, until = kwargs.get('since', None), kwargs.get('until', now())
if since is not None:
return [(since, until)]

return [(since, until)]
# TODO: load last collected timestamp once we support that path
# if since is not None:
# return [(since, until)]
from awx.conf.models import Setting

# from awx.conf.models import Setting

# horizon = until - timedelta(weeks=4)
# last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
# last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
# last_entry = max(last_entries.get(key) or last_gather, horizon)
# return [(last_entry, until)]
horizon = until - timedelta(weeks=4)
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
if last_entries.get(key):
last_entry = max(last_entries.get(key), horizon)
else:
last_entry = horizon
return [(last_entry, until)]

# TODO: implement daily slicing for billing collection?
# def four_hour_slicing(key, last_gather, **kwargs):
Expand Down Expand Up @@ -149,7 +149,7 @@ def _copy_table_aap_2_5_and_above(cursor, query, file):


@register('job_host_summary', '1.0', format='csv', description=_('Data for billing'), fnc_slicing=trivial_slicing)
def unified_jobs_table(since, full_path, until, **kwargs):
def job_host_summary_table(since, full_path, until, **kwargs):
# TODO: controler needs to have an index on main_jobhostsummary.modified
query = '''
(SELECT main_jobhostsummary.id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,12 @@ def handle(self, *args, **options):

# Process since argument
since = None
if opt_since.endswith('d'):
if opt_since and opt_since.endswith('d'):
days_ago = int(opt_since[0:-1])
since = (datetime.datetime.now() - datetime.timedelta(days=days_ago-1)).replace(hour=0, minute=0, second=0, microsecond=0)
elif opt_since and opt_since.endswith('m'):
minutes_ago = int(opt_since[0:-1])
since = (datetime.datetime.now() - datetime.timedelta(minutes=minutes_ago))
else:
since = parser.parse(opt_since) if opt_since else None
# Add default utc timezone
Expand All @@ -64,19 +67,18 @@ def handle(self, *args, **options):

# Process until argument
until = None
if opt_until.endswith('d'):
if opt_until and opt_until.endswith('d'):
days_ago = int(opt_until[0:-1])
until = (datetime.datetime.now() - datetime.timedelta(days=days_ago-1)).replace(hour=0, minute=0, second=0, microsecond=0)
elif opt_until and opt_until.endswith('m'):
minutes_ago = int(opt_until[0:-1])
until = (datetime.datetime.now() - datetime.timedelta(minutes=minutes_ago))
else:
until = parser.parse(opt_until) if opt_until else None
# Add default utc timezone
if until and until.tzinfo is None:
until = until.replace(tzinfo=timezone.utc)

if since is None or until is None:
self.logger.error('Both --since and --until arguments must be passed')
return

if opt_ship and opt_dry_run:
self.logger.error('Arguments --ship and --dry-run cannot be processed at the same time, set only one of these.')
return
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = metrics_utility
author = Red Hat
author_email = [email protected]
version = 0.0.2
version = 0.0.3

[options]
packages = find:
Expand Down

0 comments on commit 6357e51

Please sign in to comment.