From 6357e5188eadb0061d3713a3a06e35ca7d482b5a Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Mon, 22 Jan 2024 12:01:52 +0100 Subject: [PATCH] Extend gather_automation_controller_billing_data (#8) Gather and ship billing data to console.redhat.com with automatically collecting gap, by storing a last collected timestamp and always collecting from that last succesfully collected timestamp --- CHANGELOG.md | 6 ++ README.md | 9 +++ .../collector.py | 57 ++++++++----------- .../collectors.py | 26 ++++----- ...ther_automation_controller_billing_data.py | 14 +++-- setup.cfg | 2 +- 6 files changed, 60 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c336f4..736901e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,3 +7,9 @@ ## 0.0.2 - gather_automation_controller_billing_data command + +## 0.0.3 + +- gather_automation_controller_billing_data command extension + Adding ability to run without "since specified", collecting any + gap automatically. diff --git a/README.md b/README.md index e13e0a4..499ece3 100644 --- a/README.md +++ b/README.md @@ -66,4 +66,13 @@ Gather and ship billing data to console.redhat.com for a dynamic datetime range: # You need to set 'Red Hat customer username/password' under Automation Controller 'Miscellaneous System' settings # This will collect and ship data for yesterday, interval <2 days ago, 1 day ago> metrics-utility gather_automation_controller_billing_data --ship --since=2d --until=1d +``` + +Gather and ship billing data to console.redhat.com with automatically collecting gap, by storing a last collected +timestamp and always collecting from that last succesfully collected timestamp. To be on the safe side, we can +collect interval to give all records time to insert. +``` +# You need to set 'Red Hat customer username/password' under Automation Controller 'Miscellaneous System' settings +# This will collect and ship data for interval +metrics-utility gather_automation_controller_billing_data --ship --until=10m ``` \ No newline at end of file diff --git a/metrics_utility/automation_controller_billing/collector.py b/metrics_utility/automation_controller_billing/collector.py index 25310a3..dc11fbc 100644 --- a/metrics_utility/automation_controller_billing/collector.py +++ b/metrics_utility/automation_controller_billing/collector.py @@ -6,13 +6,13 @@ import insights_analytics_collector as base -# from django.core.serializers.json import DjangoJSONEncoder +from django.core.serializers.json import DjangoJSONEncoder # from awx.conf.license import get_license # from awx.main.models import Job # from awx.main.access import access_registry # from rest_framework.exceptions import PermissionDenied from metrics_utility.automation_controller_billing.package import Package -# from awx.main.utils import datetime_hook +from awx.main.utils import datetime_hook from awx.main.utils.pglock import advisory_lock logger = logging.getLogger('awx.main.analytics') @@ -107,44 +107,33 @@ def _pg_advisory_lock(self, key, wait=False): yield lock def _last_gathering(self): - # TODO: fill in later, when integrated with consumption based billing in Controller - - # return settings.AUTOMATION_ANALYTICS_LAST_GATHER - return {} + # Not needed in this implementation, but we need to define an abstract method + pass def _load_last_gathered_entries(self): - # TODO: fill in later, when integrated with consumption based billing in Controller + # We are reusing Settings used by Analytics, so we don't have to backport changes into analytics + # We can safely do this, by making sure we use the same lock as Analytics, before we persist + # these settings. + from awx.conf.models import Setting - # from awx.conf.models import Setting + last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first() + last_gathered_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook) + return last_gathered_entries - # last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first() - # last_gathered_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook) - # return last_gathered_entries - - return {} + def _gather_finalize(self): + """Persisting timestamps (manual/schedule mode only)""" + if self.is_shipping_enabled(): + # We need to wait on analytics lock, to update the last collected timestamp settings + # so we don't clash with analytics job collection. + with self._pg_advisory_lock("gather_analytics_lock", wait=True) as acquired: + # We need to load fresh settings again as we're obtaning the lock, since + # Analytics job could have changed this on the background and we'd be resetting + # the Analytics values here. + self._load_last_gathered_entries() + self._update_last_gathered_entries() def _save_last_gathered_entries(self, last_gathered_entries): - # TODO: fill in later, when integrated with consumption based billing in Controller - - # settings.AUTOMATION_ANALYTICS_LAST_ENTRIES = json.dumps(last_gathered_entries, cls=DjangoJSONEncoder) - pass - - def _save_last_gather(self): - # TODO: fill in later, when integrated with consumption based billing in Controller - # from awx.main.signals import disable_activity_stream - - # with disable_activity_stream(): - # if not settings.AUTOMATION_ANALYTICS_LAST_GATHER or self.gather_until > settings.AUTOMATION_ANALYTICS_LAST_GATHER: - # # `AUTOMATION_ANALYTICS_LAST_GATHER` is set whether collection succeeds or fails; - # # if collection fails because of a persistent, underlying issue and we do not set last_gather, - # # we risk the collectors hitting an increasingly greater workload while the underlying issue - # # remains unresolved. Put simply, if collection fails, we just move on. - - # # All that said, `AUTOMATION_ANALYTICS_LAST_GATHER` plays a much smaller role in determining - # # what is actually collected than it used to; collectors now mostly rely on their respective entry - # # under `last_entries` to determine what should be collected. - # settings.AUTOMATION_ANALYTICS_LAST_GATHER = self.gather_until - pass + settings.AUTOMATION_ANALYTICS_LAST_ENTRIES = json.dumps(last_gathered_entries, cls=DjangoJSONEncoder) @staticmethod def _package_class(): diff --git a/metrics_utility/automation_controller_billing/collectors.py b/metrics_utility/automation_controller_billing/collectors.py index f45349c..011f098 100644 --- a/metrics_utility/automation_controller_billing/collectors.py +++ b/metrics_utility/automation_controller_billing/collectors.py @@ -33,21 +33,21 @@ def something(since): """ -def trivial_slicing(key, last_gather, **kwargs): +def trivial_slicing(key, _, **kwargs): since, until = kwargs.get('since', None), kwargs.get('until', now()) + if since is not None: + return [(since, until)] - return [(since, until)] - # TODO: load last collected timestamp once we support that path - # if since is not None: - # return [(since, until)] + from awx.conf.models import Setting - # from awx.conf.models import Setting - - # horizon = until - timedelta(weeks=4) - # last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first() - # last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook) - # last_entry = max(last_entries.get(key) or last_gather, horizon) - # return [(last_entry, until)] + horizon = until - timedelta(weeks=4) + last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first() + last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook) + if last_entries.get(key): + last_entry = max(last_entries.get(key), horizon) + else: + last_entry = horizon + return [(last_entry, until)] # TODO: implement daily slicing for billing collection? # def four_hour_slicing(key, last_gather, **kwargs): @@ -149,7 +149,7 @@ def _copy_table_aap_2_5_and_above(cursor, query, file): @register('job_host_summary', '1.0', format='csv', description=_('Data for billing'), fnc_slicing=trivial_slicing) -def unified_jobs_table(since, full_path, until, **kwargs): +def job_host_summary_table(since, full_path, until, **kwargs): # TODO: controler needs to have an index on main_jobhostsummary.modified query = ''' (SELECT main_jobhostsummary.id, diff --git a/metrics_utility/management/commands/gather_automation_controller_billing_data.py b/metrics_utility/management/commands/gather_automation_controller_billing_data.py index 080d2db..a5b1970 100644 --- a/metrics_utility/management/commands/gather_automation_controller_billing_data.py +++ b/metrics_utility/management/commands/gather_automation_controller_billing_data.py @@ -53,9 +53,12 @@ def handle(self, *args, **options): # Process since argument since = None - if opt_since.endswith('d'): + if opt_since and opt_since.endswith('d'): days_ago = int(opt_since[0:-1]) since = (datetime.datetime.now() - datetime.timedelta(days=days_ago-1)).replace(hour=0, minute=0, second=0, microsecond=0) + elif opt_since and opt_since.endswith('m'): + minutes_ago = int(opt_since[0:-1]) + since = (datetime.datetime.now() - datetime.timedelta(minutes=minutes_ago)) else: since = parser.parse(opt_since) if opt_since else None # Add default utc timezone @@ -64,19 +67,18 @@ def handle(self, *args, **options): # Process until argument until = None - if opt_until.endswith('d'): + if opt_until and opt_until.endswith('d'): days_ago = int(opt_until[0:-1]) until = (datetime.datetime.now() - datetime.timedelta(days=days_ago-1)).replace(hour=0, minute=0, second=0, microsecond=0) + elif opt_until and opt_until.endswith('m'): + minutes_ago = int(opt_until[0:-1]) + until = (datetime.datetime.now() - datetime.timedelta(minutes=minutes_ago)) else: until = parser.parse(opt_until) if opt_until else None # Add default utc timezone if until and until.tzinfo is None: until = until.replace(tzinfo=timezone.utc) - if since is None or until is None: - self.logger.error('Both --since and --until arguments must be passed') - return - if opt_ship and opt_dry_run: self.logger.error('Arguments --ship and --dry-run cannot be processed at the same time, set only one of these.') return diff --git a/setup.cfg b/setup.cfg index 3b73e85..ec4759d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,7 +2,7 @@ name = metrics_utility author = Red Hat author_email = info@ansible.com -version = 0.0.2 +version = 0.0.3 [options] packages = find: