Skip to content

Commit

Permalink
Mon 147936 native windows drivesize (#1866) (#1898)
Browse files Browse the repository at this point in the history
* implement abstract test

* implement windows test

* fix agent test memory corruption,  add drive_size ut

* pass test variables in a json

* add robot test

* fix tests

* fix agent installer test

* agent accepts empty filters
  • Loading branch information
jean-christophe81 authored Nov 27, 2024
1 parent dfcbbed commit 9326ccd
Show file tree
Hide file tree
Showing 31 changed files with 2,087 additions and 131 deletions.
19 changes: 13 additions & 6 deletions .github/scripts/agent_installer_test.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,12 @@ function test_args_to_registry {
exit 1
}

#let time to windows to flush registry
Start-Sleep -Seconds 2
for (($i = 0); $i -lt 10; $i++) {
Start-Sleep -Seconds 1
if (Get-ItemProperty -Path HKLM:\Software\Centreon\CentreonMonitoringAgent) {
break
}
}

foreach ($value_name in $expected_registry_values.Keys) {
$expected_value = $($expected_registry_values[$value_name])
Expand Down Expand Up @@ -95,12 +99,15 @@ if ($process_info.ExitCode -ne 0) {
exit 1
}

Start-Sleep -Seconds 5

Get-Process | Select-Object -Property ProcessName | Select-String centagent
for (($i = 0); $i -lt 10; $i++) {
Start-Sleep -Seconds 1
$info = Get-Process | Select-Object -Property ProcessName | Select-String centagent
if (! $info) {
break
}
}

$info = Get-Process | Select-Object -Property ProcessName | Select-String centagent
#$info = Get-Process centagent 2>$null
if ($info) {
Write-Host "centagent.exe running"
exit 1
Expand Down
23 changes: 22 additions & 1 deletion .github/scripts/agent_robot_test.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,28 @@ Set-ItemProperty -Path HKLM:\SOFTWARE\Centreon\CentreonMonitoringAgent -Name lo

Start-Process -FilePath build_windows\agent\Release\centagent.exe -ArgumentList "--standalone" -RedirectStandardOutput reports\encrypted_reversed_centagent_stdout.log -RedirectStandardError reports\encrypted_reversed_centagent_stderr.log

wsl cd $wsl_path `&`& .github/scripts/wsl-collect-test-robot.sh broker-engine/cma.robot $my_host_name $my_ip $pwsh_path ${current_dir}.replace('\','/')
$uptime = (Get-WmiObject -Class Win32_OperatingSystem).LastBootUpTime #dtmf format
$d_uptime = [Management.ManagementDateTimeConverter]::ToDateTime($uptime) #datetime format
$ts_uptime = ([DateTimeOffset]$d_uptime).ToUnixTimeSeconds() #timestamp format

$test_param = @{
'host'= $my_host_name
'ip'= $my_ip
'wsl_path'= $wsl_path
'pwsh_path'= $pwsh_path
'drive' = @()
'current_dir' = $current_dir.replace('\','/')
'uptime' = $ts_uptime
}

Get-PSDrive -PSProvider FileSystem | Select Name, Used, Free | ForEach-Object -Process {$test_param.drive += $_}

$json_test_param = $test_param | ConvertTo-Json -Compress

Write-Host "json_test_param" $json_test_param
$quoted_json_test_param = "'" + $json_test_param + "'"

wsl cd $wsl_path `&`& .github/scripts/wsl-collect-test-robot.sh broker-engine/cma.robot $quoted_json_test_param

#something wrong in robot test => exit 1 => failure
if (Test-Path -Path 'reports\windows-cma-failed' -PathType Container) {
Expand Down
12 changes: 7 additions & 5 deletions .github/scripts/wsl-collect-test-robot.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ set -x
test_file=$1

export RUN_ENV=WSL
export HOST_NAME=$2
export USED_ADDRESS=$3
export PWSH_PATH=$4
export WINDOWS_PROJECT_PATH=$5
export JSON_TEST_PARAMS=$2
export USED_ADDRESS=`echo $JSON_TEST_PARAMS | jq -r .ip`
export HOST_NAME=`echo $JSON_TEST_PARAMS | jq -r .host`
export PWSH_PATH=`echo $JSON_TEST_PARAMS | jq -r .pwsh_path`
export WINDOWS_PROJECT_PATH=`echo $JSON_TEST_PARAMS | jq -r .current_dir`



#in order to connect to windows we neeed to use windows ip
Expand All @@ -17,7 +19,7 @@ echo "${USED_ADDRESS} ${HOST_NAME}" >> /etc/hosts
echo "##### /etc/hosts: ######"
cat /etc/hosts

echo "##### Starting tests #####"
echo "##### Starting tests ##### with params: $JSON_TEST_PARAMS"
cd tests
./init-proto.sh

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/windows-agent-robot-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ jobs:
python3
python3-pip
rrdtool
jq

- name: IP info
run: |
Expand Down
2 changes: 2 additions & 0 deletions agent/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ set( SRC_COMMON
${SRC_DIR}/bireactor.cc
${SRC_DIR}/check.cc
${SRC_DIR}/check_exec.cc
${SRC_DIR}/drive_size.cc
${SRC_DIR}/opentelemetry/proto/collector/metrics/v1/metrics_service.grpc.pb.cc
${SRC_DIR}/opentelemetry/proto/collector/metrics/v1/metrics_service.pb.cc
${SRC_DIR}/opentelemetry/proto/metrics/v1/metrics.pb.cc
Expand All @@ -126,6 +127,7 @@ set( SRC_COMMON
set( SRC_WINDOWS
${SRC_DIR}/config_win.cc
${NATIVE_SRC}/check_uptime.cc
${NATIVE_SRC}/check_drive_size.cc
)

set( SRC_LINUX
Expand Down
11 changes: 10 additions & 1 deletion agent/doc/agent-doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,13 @@ metrics aren't the same as linux version. We collect user, idle, kernel , interr
There are two methods, you can use internal microsoft function NtQuerySystemInformation. Yes Microsoft says that they can change signature or data format at any moment, but it's quite stable for many years. A trick, idle time is included un kernel time, so we subtract first from the second. Dpc time is yet included in interrupt time, so we don't sum it to calculate total time.
The second one relies on performance data counters (pdh API), it gives us percentage despite that sum of percentage is not quite 100%. That's why the default method is the first one.
The choice between the two methods is done by 'use-nt-query-system-information' boolean parameter.
The choice between the two methods is done by 'use-nt-query-system-information' boolean parameter.
### check_drive_size
we have to get free space on server drives. In case of network drives, this call can block in case of network failure. Unfortunately, there is no asynchronous API to do that. So a dedicated thread (drive_size_thread) computes these statistics. In order to be os independent and to test it, drive_size_thread relies on a functor that do the job: drive_size_thread::os_fs_stats. This functor is initialized in main function. drive_size thread is stopped at the end of main function.
So it works like that:
* check_drive_size post query in drive_size_thread queue
* drive_size_thread call os_fs_stats
* drive_size_thread post result in io_context
* io_context calls check_drive_size::_completion_handler
Loading

0 comments on commit 9326ccd

Please sign in to comment.