Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle APML_ALERT assertion for SBRMI::[AlertSts] #103

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion inc/cper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
* CPER section descriptor revision, used in revision field in struct
* cper_section_descriptor
*/
#define CPER_MINOR_REV (0x000C)
#define CPER_MINOR_REV (0x000D)

#define ADDC_GEN_NUMBER_1 (0x01)
#define ADDC_GEN_NUMBER_2 (0x02)
Expand Down
62 changes: 42 additions & 20 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,29 +502,49 @@ void clearSbrmiAlertMask()

oob_status_t ret;

sd_journal_print(LOG_ERR,
"Clear Alert Mask bit of SBRMI Control register \n");
uint8_t buffer;
for (uint8_t socNum = 0; socNum < num_of_proc; socNum++)
{
sd_journal_print(
LOG_INFO,
"Clear Alert Mask bit of SBRMI Control register for socket %d\n",
socNum);

ret = read_register(p0_info, SBRMI_CONTROL_REGISTER, &buffer);
uint8_t buffer;

if (ret == OOB_SUCCESS)
{
buffer = buffer & 0xFE;
write_register(p0_info, SBRMI_CONTROL_REGISTER,
static_cast<uint32_t>(buffer));
}
ret = read_register(socNum, SBRMI_CONTROL_REGISTER, &buffer);

if (num_of_proc == TWO_SOCKET)
{
buffer = 0;
ret = read_register(p1_info, SBRMI_CONTROL_REGISTER, &buffer);
if (ret == OOB_SUCCESS)
{
buffer = buffer & 0xFE;
write_register(p1_info, SBRMI_CONTROL_REGISTER,
write_register(socNum, SBRMI_CONTROL_REGISTER,
static_cast<uint32_t>(buffer));
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No cations required in case of read_register failure?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case of write failure , it will logged in the write_register() API.
So we dont require it to be logged here. Also we continue the ADDC functionality even if the write_register() call fails.

for (uint8_t i = 0; i < sizeof(alert_status); i++)
{
ret = read_register(socNum, alert_status[i], &buffer);

if (ret == OOB_SUCCESS)
{
if ((buffer & 0x0F) != 0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

magic number: 0x0F

{
sd_journal_print(
LOG_INFO,
"Socket%d: MCE Stat of SBRMIx[0x%x] is set to 0x%x\n",
socNum, alert_status[i], buffer);

buffer = buffer & 0xFF;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

magic number 0xFF

write_register(socNum, alert_status[i],
static_cast<uint32_t>(buffer));
}
}
else
{
sd_journal_print(LOG_ERR,
"Socket%d: Failed to read SBRMIx[0x%x]",
socNum, alert_status[i]);
}
}
}
}

Expand Down Expand Up @@ -625,8 +645,6 @@ void performPlatformInitialization()
oob_status_t ret = OOB_MAILBOX_CMD_UNKNOWN;
struct processor_info platInfo[INDEX_1];

std::cout << "perform performPlatformInitialization" << std::endl;

if (platformInitialized == false)
{
while (ret != OOB_SUCCESS)
Expand All @@ -641,7 +659,6 @@ void performPlatformInitialization()
}
sleep(INDEX_1);
}
std::cout << "platformInitialized " << std::endl;

if (ret == OOB_SUCCESS)
{
Expand All @@ -655,7 +672,6 @@ void performPlatformInitialization()
}
else if (platInfo->family == TURIN_FAMILY_ID)
{
std::cout << "Turin platform " << std::endl;
currentHostStateMonitor();

clearSbrmiAlertMask();
Expand All @@ -668,6 +684,12 @@ void performPlatformInitialization()

runtimeErrPollingSupported = true;
}
else
{
throw std::runtime_error(
"This program is not supported for the platform 0x%x\n" +
platInfo->family);
}
platformInitialized = true;
apmlInitialized = true;
}
Expand Down Expand Up @@ -703,7 +725,7 @@ void apmlActiveMonitor()
{
ret = get_bmc_ras_oob_config(INDEX_0, &d_out);

if(ret == OOB_MAILBOX_CMD_UNKNOWN)
if (ret == OOB_MAILBOX_CMD_UNKNOWN)
{
ret = esmi_get_processor_info(INDEX_0, plat_info);
}
Expand Down