Skip to content

Commit

Permalink
[phoenix] Fixed logging
Browse files Browse the repository at this point in the history
  • Loading branch information
bzizou committed Dec 12, 2023
1 parent f7a637a commit cd02e92
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions oar/tools/oar_phoenix.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
PHOENIX_MAX_REBOOTS = 20

# Timeout (s) for unix commands
PHOENIX_CMD_TIMEOUT = 15
PHOENIX_CMD_TIMEOUT = 60

# Properties of the broken nodes (SQL where clause)
PHOENIX_BROKEN_NODES = "state='Suspected' and network_address NOT IN (SELECT distinct(network_address) FROM resources where resource_id IN (SELECT resource_id FROM assigned_resources WHERE assigned_resource_index = 'CURRENT')) and network_address not like 'luke%' and network_address != 'dahu33' and network_address not like 'bigfoot%'"
Expand Down Expand Up @@ -66,6 +66,7 @@ def send_cmd(cmd):
else:
logfile.write(f"{current_time} - Command executed, no output\n")
except subprocess.TimeoutExpired:
logfile.write(f"{current_time} - Command timed out!\n")
process.kill()
return "Timed out!"
except Exception as e:
Expand Down Expand Up @@ -125,20 +126,22 @@ def get_nodes_to_hard_reboot(db, broken_nodes):

# Soft reboot nodes
def soft_reboot_nodes(db, nodes):
with open(f"{PHOENIX_LOGDIR}/oar_phoenix.log", "a") as logfile:
logfile=open(f"{PHOENIX_LOGDIR}/oar_phoenix.log", "a")
current_time = time.strftime("%Y-%m-%d %H:%M:%S")
for node in nodes:
logfile.write(f"{current_time} - Soft rebooting the broken node {node}\n")
logfile.close()
cmd = PHOENIX_SOFT_REBOOTCMD.replace("{NODENAME}", node)
db[node] = {'soft_reboot': time.time()}
send_cmd(cmd)

# Hard reboot nodes
def hard_reboot_nodes(db, nodes):
with open(f"{PHOENIX_LOGDIR}/oar_phoenix.log", "a") as logfile:
logfile=open(f"{PHOENIX_LOGDIR}/oar_phoenix.log", "a")
current_time = time.strftime("%Y-%m-%d %H:%M:%S")
for node in nodes:
logfile.write(f"{current_time} - Hard rebooting the broken node {node}\n")
logfile.close()
cmd = PHOENIX_HARD_REBOOTCMD.replace("{NODENAME}", node)
del db[node]
db[node] = {'hard_reboot': time.time()}
Expand Down

0 comments on commit cd02e92

Please sign in to comment.