Skip to content

Commit

Permalink
Pull request update/250113
Browse files Browse the repository at this point in the history
7ff0c49 OSN-555. Add notification for report import failure
6dc3da4 OSN-296. Fixed missing report_import_failed email on creating
employee + fixed migration tree
faaee24 OS-7903. Update events pagination, add polling
cc2154b OS-6140. Send report import constantly fail to org manager
  • Loading branch information
tm-hystax authored Jan 13, 2025
2 parents 0bc38e9 + 7ff0c49 commit a957646
Show file tree
Hide file tree
Showing 11 changed files with 409 additions and 161 deletions.
64 changes: 25 additions & 39 deletions diworker/diworker/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

import urllib3
from threading import Thread
from optscale_client.config_client.client import Client as ConfigClient
from datetime import datetime
from etcd import Lock as EtcdLock
from kombu import Exchange, Queue, Connection as QConnection
from kombu.pools import producers
Expand All @@ -14,15 +12,16 @@
from kombu.utils.debug import setup_logging
from pymongo import MongoClient
from urllib3.exceptions import InsecureRequestWarning
from optscale_client.rest_api_client.client_v2 import Client as RestClient
from clickhouse_driver import Client as ClickHouseClient

from optscale_client.config_client.client import Client as ConfigClient
from optscale_client.rest_api_client.client_v2 import Client as RestClient
from tools.optscale_time.optscale_time import startday, utcfromtimestamp

from diworker.diworker.importers.base import BaseReportImporter
from diworker.diworker.importers.factory import get_importer_class
from diworker.diworker.migrator import Migrator

from optscale_client.herald_client.client_v2 import Client as HeraldClient

ACTIVITIES_EXCHANGE_NAME = 'activities-tasks'
ALERT_THRESHOLD = 60 * 60 * 24
EXCHANGE_NAME = 'billing-reports'
Expand Down Expand Up @@ -83,12 +82,13 @@ def clickhouse_cl(self):
return self._clickhouse_cl

def publish_activities_task(self, organization_id, object_id, object_type,
action, routing_key):
action, routing_key, meta=None):
task = {
'organization_id': organization_id,
'object_id': object_id,
'object_type': object_type,
'action': action
'action': action,
'meta': meta
}
queue_conn = QConnection('amqp://{user}:{pass}@{host}:{port}'.format(
**self.config_cl.read_branch('/rabbit')))
Expand Down Expand Up @@ -144,11 +144,13 @@ def report_import(self, task):
'recalculate': is_recalculation}
importer = None
ca = None
previous_attempt_ts = 0
try:
_, ca = self.rest_cl.cloud_account_get(
importer_params.get('cloud_account_id'))
organization_id = ca.get('organization_id')
start_last_import_ts = ca.get('last_import_at', 0)
previous_attempt_ts = ca.get('last_import_attempt_at', 0)
cc_type = ca.get('type')
importer = get_importer_class(cc_type)(**importer_params)
importer.import_report()
Expand All @@ -171,51 +173,35 @@ def report_import(self, task):
if hasattr(exc, 'details'):
# pylint: disable=E1101
LOG.error('Mongo exception details: %s', exc.details)
reason = str(exc)
self.rest_cl.report_import_update(
self.report_import_id,
{'state': 'failed', 'state_reason': str(exc)}
{'state': 'failed', 'state_reason': reason}
)
now = int(time.time())
if not importer:
importer = BaseReportImporter(**importer_params)
importer.update_cloud_import_attempt(now, str(exc))
# TODO: OS-6259: temporary mute service email
# if ca:
# self.send_service_email(ca, now, str(exc))
importer.update_cloud_import_attempt(now, reason)
self.send_report_failed_email(ca, previous_attempt_ts, now)
raise

def send_service_email(self, cloud_account, now, reason):
def send_report_failed_email(self, cloud_account, previous_attempt_ts,
now):
last_import_at = cloud_account['last_import_at']
if not last_import_at:
last_import_at = cloud_account['created_at']
if now - last_import_at < ALERT_THRESHOLD:
return

_, organization = self.rest_cl.organization_get(
cloud_account['organization_id'])
recipient = self.config_cl.optscale_error_email_recipient()
if not recipient:
return
title = "Report import failed"
subject = '[%s] %s' % (self.config_cl.public_ip(), title)
template_params = {
'texts': {
'organization': {
'id': organization['id'],
'name': organization['name']},
'cloud_account': {
'id': cloud_account['id'],
'name': cloud_account['name'],
'type': cloud_account['type'],
'last_import_at': datetime.fromtimestamp(
last_import_at).strftime('%m/%d/%Y %H:%M:%S UTC')
},
'reason': reason
}}
HeraldClient(url=self.config_cl.herald_url(),
secret=self.config_cl.cluster_secret()).email_send(
[recipient], subject, template_params=template_params,
template_type="report_import_failed")
if last_import_at < previous_attempt_ts:
# previous import failed too
if startday(utcfromtimestamp(previous_attempt_ts)) == startday(
utcfromtimestamp(now)):
# email already sent today during previous report import fails
return
self.publish_activities_task(
cloud_account['organization_id'], cloud_account['id'],
'cloud_account', 'report_import_failed',
'organization.report_import.failed')

def process_task(self, body, message):
try:
Expand Down
47 changes: 45 additions & 2 deletions docker_images/herald_executor/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
routing_key='organization.recommendation.new_security_recommendation'),
binding(TASK_EXCHANGE,
routing_key='organization.recommendation.saving_spike'),
binding(TASK_EXCHANGE, routing_key='organization.report_import.passed'),
binding(TASK_EXCHANGE, routing_key='organization.report_import.#'),
binding(TASK_EXCHANGE, routing_key='insider.error.sslerror'),
binding(TASK_EXCHANGE, routing_key='arcee.system.#')
])
Expand All @@ -70,6 +70,7 @@ class HeraldTemplates(Enum):
RESOURCE_OWNER_VIOLATION_ALERT = 'resource_owner_violation_alert'
TAGGING_POLICY = 'organization_policy_tagging'
REPORT_IMPORT_PASSED = 'report_imports_passed_for_org'
REPORT_IMPORT_FAILED = 'report_import_failed'
INSIDER_SSLERROR = 'insider_prices_sslerror'
FIRST_TASK_CREATED = 'first_task_created'
FIRST_RUN_STARTED = 'first_run_started'
Expand Down Expand Up @@ -336,7 +337,8 @@ def _prepare_acquire_release_data(booking_id, action):
status_changed_info = {
'environment_status_changed': {
'changed_environment': {
'name': resource.get('name') or resource.get('cloud_resource_id'),
'name': resource.get('name') or resource.get(
'cloud_resource_id'),
'id': resource['id'],
'status': updated_value
}
Expand Down Expand Up @@ -783,6 +785,45 @@ def execute_first_run_started(self, run_id, run_name, profiling_token,
}
self._send_service_email(title, template_type, template_params)

def execute_report_import_failed(self, cloud_account_id, organization_id):
_, organization = self.rest_cl.organization_get(organization_id)
_, cloud_account = self.rest_cl.cloud_account_get(cloud_account_id)
title = "Report import failed"
subject = '[%s] %s' % (self.config_cl.public_ip(), title)
template_params = {
'texts': {
'organization': {
'id': organization['id'],
'name': organization['name']},
'cloud_account': {
'id': cloud_account['id'],
'name': cloud_account['name'],
'type': cloud_account['type'],
'last_import_at': datetime.fromtimestamp(
cloud_account['last_import_at']).strftime(
'%m/%d/%Y %H:%M:%S UTC')
},
'reason': cloud_account['last_import_attempt_error']
}}

_, employees = self.rest_cl.employee_list(organization_id, roles=True)
employees_emails = set()
for emp in employees['employees']:
assignments = [
(assign['assignment_resource_type'], assign['purpose'])
for assign in emp['assignments']
]
if ('organization', 'optscale_manager') in assignments and (
self.is_email_enabled(
emp['id'], HeraldTemplates.REPORT_IMPORT_FAILED.value)):
employees_emails.add(emp['user_email'])
if employees_emails:
for email in employees_emails:
self.herald_cl.email_send(
[email], title,
template_type=HeraldTemplates.REPORT_IMPORT_FAILED.value,
template_params=template_params)

def execute(self, task):
organization_id = task.get('organization_id')
action = task.get('action')
Expand All @@ -804,6 +845,7 @@ def execute(self, task):
'organization_constraint_violated': task_params,
'new_security_recommendation': [organization_id, meta],
'saving_spike': [object_id, meta],
'report_import_failed': task_params,
'report_import_passed': [object_id],
'insider_prices_sslerror': [],
'first_task_created': [object_id, object_name, profiling_token],
Expand All @@ -824,6 +866,7 @@ def execute(self, task):
self.execute_organization_constraint_violated,
'new_security_recommendation':
self.execute_new_security_recommendation,
'report_import_failed': self.execute_report_import_failed,
'saving_spike': self.execute_saving_spike,
'report_import_passed': self.execute_report_imports_passed_for_org,
'insider_prices_sslerror': self.execute_insider_prices,
Expand Down
1 change: 1 addition & 0 deletions ngui/server/graphql/schemas/keeper.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ input EventsRequestParams {
includeRead: Boolean = true
readOnGet: Boolean = true
descriptionLike: String
requestId: String
}

type Query {
Expand Down
2 changes: 1 addition & 1 deletion ngui/ui/src/components/ApolloProvider/ApolloProvider.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ const ApolloClientProvider = ({ children }) => {

/* Just log network errors for now.
We rely on custom error codes that are returned in graphQLErrors.
It might be usefult to cache networkError errors to display alerts as well.
It might be useful to cache networkError errors to display alerts as well.
*/
if (networkError) {
console.error(`[Network error]: ${networkError}`);
Expand Down
97 changes: 52 additions & 45 deletions ngui/ui/src/components/Events/Events.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@ import { EN_FULL_FORMAT, formatUTC } from "utils/datetime";
import { SPACING_1, SPACING_2, SPACING_3 } from "utils/layouts";
import { getQueryParams, updateQueryParams, removeQueryParam } from "utils/network";

const actionBarDefinition = {
title: {
messageId: "events",
dataTestId: "lbl_events"
}
};

const Loader = () => (
<Box width="100%" textAlign="center" pt={2}>
<CircularProgress />
Expand Down Expand Up @@ -128,7 +121,23 @@ const EventIcon = ({ eventLevel }) =>
[EVENT_LEVEL.DEBUG]: <PestControlIcon fontSize="small" color="info" />
})[eventLevel];

const Events = ({ eventLevel, includeDebugEvents, descriptionLike, onScroll, applyFilter, events, isLoading = false }) => {
const Events = ({
eventLevel,
descriptionLike,
includeDebugEvents,
onScroll,
applyFilter,
events,
isLoading = false,
isFetchingMore = false
}) => {
const actionBarDefinition = {
title: {
messageId: "events",
dataTestId: "lbl_events"
}
};

const [expanded, setExpanded] = useState("");
const queryParams = getQueryParams();

Expand Down Expand Up @@ -249,27 +258,43 @@ const Events = ({ eventLevel, includeDebugEvents, descriptionLike, onScroll, app
const renderEventList = () => {
const noEvents = isEmpty(events);

if (isLoading) {
return <Loader />;
}

if (noEvents) {
return isLoading ? (
<Loader />
) : (
<Typography>
<FormattedMessage id="noEvents" />
</Typography>
);
return <FormattedMessage id="noEvents" />;
}

return (
<Box>
<Stack spacing={SPACING_3}>
{Object.entries(getEventsGroupedByTime(events)).map(([groupKey, groupData], index) => (
<Box key={groupKey}>
<Typography>{groupKey}</Typography>
{renderAccordion(groupData, index)}
</Box>
))}
</Stack>
{isLoading ? <Loader /> : null}
<Box
onScroll={onScroll}
display="flex"
flexDirection="column"
flexGrow={1}
flexBasis="0px"
overflow="auto"
/**
* Set an approximate maximum height for the events section to ensure it remains scrollable on large screens.
* The maximum height should be determined based on the height of the container when all events belong to a single date.
* In this scenario, the container's height will be close to its minimum possible value.
*
* EVENTS_LIMIT represents the maximum number of events that can be fetched in a single request.
* Each event is assumed to occupy approximately 25 pixels in height.
*/
maxHeight={`${EVENTS_LIMIT * 25}px`}
>
<Box>
<Stack spacing={SPACING_3}>
{Object.entries(getEventsGroupedByTime(events)).map(([groupKey, groupData], index) => (
<Box key={groupKey}>
<Typography>{groupKey}</Typography>
{renderAccordion(groupData, index)}
</Box>
))}
</Stack>
{isFetchingMore ? <Loader /> : null}
</Box>
</Box>
);
};
Expand All @@ -281,7 +306,7 @@ const Events = ({ eventLevel, includeDebugEvents, descriptionLike, onScroll, app
<Stack spacing={SPACING_1} height="100%">
<Box display="flex" flexWrap="wrap" gap={SPACING_2}>
<Box display="flex" gap={2}>
<EventLevelSelector eventLevel={eventLevel} onApply={applyFilter} showDebugEvent={includeDebugEvents} />
<EventLevelSelector eventLevel={eventLevel} onApply={applyFilter} />
<FormControlLabel
control={
<Checkbox
Expand Down Expand Up @@ -337,25 +362,7 @@ const Events = ({ eventLevel, includeDebugEvents, descriptionLike, onScroll, app
</Box>
</Box>
</Box>
<Box
onScroll={onScroll}
display="flex"
flexDirection="column"
flexGrow={1}
flexBasis="0px"
overflow="auto"
/**
* Set an approximate maximum height for the events section to ensure it remains scrollable on large screens.
* The maximum height should be determined based on the height of the container when all events belong to a single date.
* In this scenario, the container's height will be close to its minimum possible value.
*
* EVENTS_LIMIT represents the maximum number of events that can be fetched in a single request.
* Each event is assumed to occupy approximately 25 pixels in height.
*/
maxHeight={`${EVENTS_LIMIT * 25}px`}
>
{renderEventList()}
</Box>
{renderEventList()}
</Stack>
</PageContentWrapper>
</>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ const EMAIL_TEMPLATES = {
report_imports_passed_for_org: {
title: "emailTemplates.systemNotifications.report_imports_passed_for_org.title",
description: "emailTemplates.systemNotifications.report_imports_passed_for_org.description"
},
report_import_failed: {
title: "emailTemplates.systemNotifications.report_import_failed.title",
description: "emailTemplates.systemNotifications.report_import_failed.description"
}
},
accountManagement: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export type ApiEmployeeEmail = {
| "new_security_recommendation"
| "environment_changes"
| "report_imports_passed_for_org"
| "report_import_failed"
| "invite";
enabled: boolean;
employee_id: string;
Expand Down
Loading

0 comments on commit a957646

Please sign in to comment.