Skip to content

Commit

Permalink
enh(bam): sometimes kpi creation may break BA's consistency
Browse files Browse the repository at this point in the history
REFS: MON-34895
  • Loading branch information
bouda1 authored Mar 18, 2024
1 parent cb7d8af commit 303887d
Show file tree
Hide file tree
Showing 6 changed files with 195 additions and 4 deletions.
2 changes: 1 addition & 1 deletion broker/bam/src/ba.cc
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ void ba::update_from(computable* child, io::stream* visitor) {
kpi_child->get_id());
bool changed = _apply_changes(kpi_child, new_hard_impact, new_soft_impact,
kpi_in_downtime);
SPDLOG_LOGGER_TRACE(log_v2::bam(), "BA has changed: {}", changed);
SPDLOG_LOGGER_TRACE(log_v2::bam(), "BA {} has changed: {}", _id, changed);

// Check for inherited downtimes.
_compute_inherited_downtime(visitor);
Expand Down
8 changes: 5 additions & 3 deletions broker/bam/src/configuration/applier/kpi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ void applier::kpi::apply(bam::configuration::state::kpis const& my_kpis,
bam::configuration::state::kpis to_create(my_kpis);

// Iterate through configuration.
for (bam::configuration::state::kpis::iterator it(to_create.begin()),
end(to_create.end());
for (bam::configuration::state::kpis::iterator it = to_create.begin(),
end = to_create.end();
it != end;) {
std::map<uint32_t, applied>::iterator cfg_it(to_delete.find(it->first));
std::map<uint32_t, applied>::iterator cfg_it = to_delete.find(it->first);
// Found = modify (or not).
if (cfg_it != to_delete.end()) {
// Configuration mismatch, modify object
Expand Down Expand Up @@ -354,6 +354,8 @@ void applier::kpi::_resolve_kpi(configuration::kpi const& cfg,

my_ba->add_impact(kpi);
kpi->add_parent(my_ba);
/* The propagation is forced to be sure all the ba is coherent. */
my_ba->notify_parents_of_change(nullptr);
}

/**
Expand Down
22 changes: 22 additions & 0 deletions init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
# A little script to source before compiling.
# It just sets VCPKG_ROOT and PATH so that make/ninja won't fail.

# To be sure the script is sourced.
(return 0 2>/dev/null) && sourced=1 || sourced=0

if (( sourced == 0 )) ; then
echo "Please execute this script with:"
echo ". init.sh"
exit 1
fi

# To be sure the script has not already been launched.
if [[ "$PATH" =~ "vcpkg" ]] ; then
echo "Already initialized"
exit 2
fi

# The main purpose.
export VCPKG_ROOT=$PWD/vcpkg
export PATH=$VCPKG_ROOT:$PATH
141 changes: 141 additions & 0 deletions tests/bam/bam_pb.robot
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,147 @@ BA_RATIO_PERCENT_BA_4_SERVICE
[Teardown] Run Keywords Ctn Stop engine AND Ctn Kindly Stop Broker


BA_CHANGED
[Documentation] A BA of type worst is configured with one service kpi.
... Then it is modified so that the service kpi is replaced
... by a boolean rule kpi. When cbd is reloaded, the BA is
... well updated.
[Tags] MON-34895
Ctn Bam Init

@{svc} Set Variable ${{ [("host_16", "service_302")] }}
${ba} Ctn Create Ba With Services test worst ${svc}

Ctn Start Broker
${start} Get Current Date
Ctn Start Engine
# Let's wait for the external command check start
${content} Create List check_for_external_commands()
${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 60
Should Be True ${result} A message telling check_for_external_commands() should be available.

# Both services ${state} => The BA parent is ${state}
Ctn Process Service Result Hard
... host_16
... service_302
... 0
... output OK for service 302

${result} Ctn Check Ba Status With Timeout test 0 30
Ctn Dump Ba On Error ${result} ${ba[0]}
Should Be True ${result} The BA test is not OK as expected

Ctn Remove Service Kpi ${ba[0]} host_16 service_302
Ctn Add Boolean Kpi
... ${ba[0]}
... {host_16 service_302} {IS} {OK}
... False
... 100

Ctn Reload Broker
Remove File /tmp/ba.dot
Ctn Broker Get Ba 51001 ${ba[0]} /tmp/ba.dot
Wait Until Created /tmp/ba.dot
${result} Grep File /tmp/ba.dot Boolean exp
Should Not Be Empty ${result}

Ctn Add Boolean Kpi
... ${ba[0]}
... {host_16 service_303} {IS} {WARNING}
... False
... 100

Ctn Reload Broker
Remove File /tmp/ba.dot
Ctn Broker Get Ba 51001 ${ba[0]} /tmp/ba.dot
Wait Until Created /tmp/ba.dot
${result} Grep File /tmp/ba.dot BOOL Service (16, 303)
Should Not Be Empty ${result}
[Teardown] Run Keywords Ctn Stop engine AND Ctn Kindly Stop Broker


BA_IMPACT_IMPACT
[Documentation] A BA of type impact is defined with two BAs of type impact
... as children. The first child has an impact of 90 and the
... second one of 10. When they are impacting both, the
... parent should be critical. When they are not impacting,
... the parent should be ok.
[Tags] MON-34895
Ctn Bam Init

${parent_ba} Ctn Create Ba parent impact 20 99
@{svc1} Set Variable ${{ [("host_16", "service_302")] }}
${child1_ba} Ctn Create Ba child1 impact 20 99
Ctn Add Service Kpi host_16 service_302 ${child1_ba[0]} 100 2 3
${child2_ba} Ctn Create Ba child2 impact 20 99
Ctn Add Service Kpi host_16 service_303 ${child2_ba[0]} 100 2 3

Ctn Add Ba Kpi ${child1_ba[0]} ${parent_ba[0]} 90 2 3
Ctn Add Ba Kpi ${child2_ba[0]} ${parent_ba[0]} 10 2 3

Ctn Start Broker
${start} Get Current Date
Ctn Start Engine
# Let's wait for the external command check start
${content} Create List check_for_external_commands()
${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 60
Should Be True ${result} A message telling check_for_external_commands() should be available.

FOR ${state} ${value} IN
... OK 0
... CRITICAL 2
... OK 0
... CRITICAL 2
# Both services ${state} => The BA parent is ${state}
Ctn Process Service Result Hard
... host_16
... service_302
... ${value}
... output ${state} for service 302

Ctn Process Service Result Hard
... host_16
... service_303
... ${value}
... output ${state} for service 302

${result} Ctn Check Service Status With Timeout host_16 service_302 ${value} 60 HARD
Should Be True ${result} The service (host_16,service_302) is not ${state} as expected
${result} Ctn Check Service Status With Timeout host_16 service_303 ${value} 60 HARD
Should Be True ${result} The service (host_16,service_303) is not ${state} as expected

${result} Ctn Check Ba Status With Timeout child1 ${value} 30
Ctn Dump Ba On Error ${result} ${child1_ba[0]}
Should Be True ${result} The BA child1 is not ${state} as expected

${result} Ctn Check Ba Status With Timeout child2 ${value} 30
Ctn Dump Ba On Error ${result} ${child2_ba[0]}
Should Be True ${result} The BA child2 is not ${state} as expected

${result} Ctn Check Ba Status With Timeout parent ${value} 30
Ctn Dump Ba On Error ${result} ${parent_ba[0]}
Should Be True ${result} The BA parent is not ${state} as expected

Remove Files /tmp/parent1.dot /tmp/parent2.dot
Ctn Broker Get Ba 51001 ${parent_ba[0]} /tmp/parent1.dot
Wait Until Created /tmp/parent1.dot

${start} Get Current Date
Ctn Reload Broker
${content} Create List Inherited downtimes and BA states restored
${result} Ctn Find In Log With Timeout ${centralLog} ${start} ${content} 60
Should Be True ${result} It seems that no cache has been restored into BAM.

Ctn Broker Get Ba 51001 ${parent_ba[0]} /tmp/parent2.dot
Wait Until Created /tmp/parent2.dot

${result} Ctn Compare Dot Files /tmp/parent1.dot /tmp/parent2.dot
Should Be True ${result} The BA changed during Broker reload.
END

[Teardown] Run Keywords Ctn Stop engine AND Ctn Kindly Stop Broker


*** Keywords ***
Ctn BAM Setup
Ctn Stop Processes
Expand Down
13 changes: 13 additions & 0 deletions tests/resources/Engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1339,6 +1339,19 @@ def ctn_add_service_kpi(host: str, serv: str, id_ba: int, critical_impact: int,
host, serv, id_ba, critical_impact, warning_impact, unknown_impact)


def ctn_remove_service_kpi(id_ba: int, host: str, svc: str):
"""
Remove a service kpi given by hostname/service description from a ba given by its id.
Args:
id_ba: The BA ID.
host: the host name.
svc: the service description.
"""
global dbconf
dbconf.ctn_remove_service_kpi(id_ba, host, svc)


def ctn_get_command_id(service: int):
"""
Get the command ID of the service with the given ID.
Expand Down
13 changes: 13 additions & 0 deletions tests/resources/db_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,19 @@ def ctn_add_service_kpi(self, host: str, serv: str, id_ba: int, critical_impact:

connection.commit()

def ctn_remove_service_kpi(self, id_ba: int, host: str, svc: str):
connection = pymysql.connect(host=DB_HOST,
user=DB_USER,
password=DB_PASS,
database=DB_NAME_CONF,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)

with connection:
with connection.cursor() as cursor:
cursor.execute(f"DELETE FROM mod_bam_kpi WHERE host_id={self.host[host]} AND service_id={self.service[svc]} AND id_ba={id_ba}")
connection.commit()

def ctn_add_boolean_kpi(self, id_ba: int, expression: str, impact_if: bool, critical_impact: int):
connection = pymysql.connect(host=DB_HOST,
user=DB_USER,
Expand Down

2 comments on commit 303887d

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Robot Results

✅ Passed ❌ Failed ⏭️ Skipped Total Pass % ⏱️ Duration
6 1 0 7 85.71 0s

Failed Tests

Name Message ⏱️ Duration Suite
BENCH_10000STATUS AttributeError: 'NoneType' object has no attribute 'query_read_bytes' 0.000 s Bench

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Robot Results

✅ Passed ❌ Failed ⏭️ Skipped Total Pass % ⏱️ Duration
6 1 0 7 85.71 0s

Failed Tests

Name Message ⏱️ Duration Suite
NetworkDbFail5 RRD Broker not correctly stopped: -11 != 0 0.000 s networkFailure

Please sign in to comment.