-
Notifications
You must be signed in to change notification settings - Fork 2.7k
/
Copy pathfailures-config.yaml
47 lines (46 loc) · 1.53 KB
/
failures-config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
metric: failures
description: This query finds jobs that have been failing continuously for a long time.
query: |
#standardSQL
select /* Find jobs that have not passed in a long time */
jobs.job,
latest_pass, /* how recently did this job pass */
weekly_builds, /* how many times a week does it run */
first_run, /* when is the first time it ran */
latest_run, /* when is the most recent run */
DATE_DIFF(current_date(), if(latest_pass is null, first_run, date(latest_pass)), DAY) broken_days
from (
select /* filter to jobs that ran this week */
job,
count(1) weekly_builds
from `kubernetes-public.k8s_infra_kettle.all`
where
started > timestamp_sub(current_timestamp(), interval 7 day)
group by job
order by job
) jobs
left join (
select /* find the oldest, newest run of each job */
job,
date(min(started)) first_run,
date(max(started)) latest_run
from `kubernetes-public.k8s_infra_kettle.all`
group by job
) runs
on jobs.job = runs.job
left join (
select /* find the most recent time each job passed (may not be this week) */
job,
max(started) latest_pass
from `kubernetes-public.k8s_infra_kettle.all`
where
result = 'SUCCESS'
group by job
) passes
on jobs.job = passes.job
order by broken_days desc, latest_pass, first_run, weekly_builds desc, jobs.job
jqfilter: |
[(.[] | select((.latest_pass|length) == 0 or (.broken_days|tonumber) > 30)
| {(.job): {
failing_days: (.broken_days|tonumber)
}})] | add