-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtoolbox.py
223 lines (187 loc) · 7.08 KB
/
toolbox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
from kubernetes import client, config
from kubernetes.client.rest import ApiException
import json
import os
import shutil
import time
def delete_all_resources():
# Load J2K_CONFIG
with open("./J2K_CONFIG.json", 'r') as file:
j2k_config = json.load(file)
namespace = j2k_config['results-hub']['namespace']
# Load kubeconfig
config.load_kube_config()
# Initialize API clients
v1 = client.CoreV1Api()
batch_v1 = client.BatchV1Api()
apps_v1 = client.AppsV1Api()
try:
# Delete all Services
v1.delete_collection_namespaced_service(namespace=namespace)
print(f"All services deleted in namespace {namespace}.")
except ApiException as e:
print(f"Failed to delete services: {e}")
try:
# Delete all Jobs
batch_v1.delete_collection_namespaced_job(namespace=namespace)
print(f"All jobs deleted in namespace {namespace}.")
except ApiException as e:
print(f"Failed to delete jobs: {e}")
try:
# Delete all Pods
v1.delete_collection_namespaced_pod(namespace=namespace)
print(f"All pods deleted in namespace {namespace}.")
except ApiException as e:
print(f"Failed to delete pods: {e}")
try:
# Delete all StatefulSets
apps_v1.delete_collection_namespaced_stateful_set(namespace=namespace)
print(f"All statefulsets deleted in namespace {namespace}.")
except ApiException as e:
print(f"Failed to delete statefulsets: {e}")
try:
# Delete all PVCs
v1.delete_collection_namespaced_persistent_volume_claim(namespace=namespace)
print(f"All PVCs deleted in namespace {namespace}.")
except ApiException as e:
print(f"Failed to delete PVCs: {e}")
try:
# Delete all PVs
v1.delete_collection_persistent_volume()
print("All PVs deleted in the cluster.")
except ApiException as e:
print(f"Failed to delete PVs: {e}")
print("NOTE: Please wait ~ 1 minute for all pods to teriminate")
def create_reset_results_log():
# Load J2K_CONFIG
with open("./J2K_CONFIG.json", 'r') as file:
j2k_config = json.load(file)
rh_log_path = j2k_config['results-hub']['local-pv-path']
# Check if the directory exists
if os.path.exists(rh_log_path):
# If the directory exists, remove all its contents
for filename in os.listdir(rh_log_path):
file_path = os.path.join(rh_log_path, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path) # Remove file or link
elif os.path.isdir(file_path):
shutil.rmtree(file_path) # Remove directory and all its contents
except Exception as e:
print(f'Failed to delete {file_path}. Reason: {e}')
else:
# If the directory does not exist, create it
try:
os.makedirs(rh_log_path)
print(f'Directory created: {rh_log_path}')
except OSError as e:
print(f'Failed to create directory {rh_log_path}. Reason: {e}')
def wait_for_jobs():
# Load J2K_CONFIG
with open("./J2K_CONFIG.json", 'r') as file:
j2k_config = json.load(file)
namespace = j2k_config['results-hub']['namespace']
# Load the kubeconfig file to connect to the Kubernetes cluster
config.load_kube_config()
# Initialize the Kubernetes client for the BatchV1 API
batch_v1 = client.BatchV1Api()
while True:
# Get the list of all jobs in the specified namespace
jobs = batch_v1.list_namespaced_job(namespace)
all_finished = True
any_failed = False
for job in jobs.items:
# Check the status conditions for each job
conditions = job.status.conditions
if conditions:
for condition in conditions:
if condition.type == 'Failed':
any_failed = True
break
# Determine if the job is still active
if job.status.active:
all_finished = False
# Exit loop if all jobs have finished
if all_finished:
break
# Sleep for 1 second before the next check
time.sleep(1)
# Output the final status of the jobs
if any_failed:
print("Some jobs have failed, stop waiting!")
else:
print("All jobs have completed successfully.")
def clear_codegen_outputs():
# Load J2K_CONFIG
with open("./J2K_CONFIG.json", 'r') as file:
j2k_config = json.load(file)
output_dir = j2k_config['execution']['output-directory']
# Check if the directory exists
if os.path.exists(output_dir) and os.path.isdir(output_dir):
try:
# Recursively remove the directory and all its contents
shutil.rmtree(output_dir)
print(f"Successfully removed directory: {output_dir}")
except Exception as e:
print(f"Failed to remove directory: {output_dir}. Reason: {e}")
else:
print(f"The specified directory does not exist: {output_dir}")
def reset_execution():
delete_all_resources()
create_reset_results_log()
clear_codegen_outputs()
print("Execution reset complete.")
def list_pods_by_node():
# Load J2K_CONFIG
with open("./J2K_CONFIG.json", 'r') as file:
j2k_config = json.load(file)
namespace = j2k_config['results-hub']['namespace']
# Load kube config
config.load_kube_config()
# Create a Kubernetes client
v1 = client.CoreV1Api()
# Fetch all pods within the specified namespace
pods = v1.list_namespaced_pod(namespace)
# Dictionary to hold node to pods mapping
node_to_pods = {}
# Iterate over all pods and group by node name
for pod in pods.items:
node_name = pod.spec.node_name
if node_name not in node_to_pods:
node_to_pods[node_name] = []
node_to_pods[node_name].append(pod.metadata.name)
# Output the pods grouped by node
for node, pods in node_to_pods.items():
print(f"Node name {node}:")
for pod in pods:
print(pod)
def main():
print("\nChoose a tool from the following:")
print("1. Delete all cluster resources in the J2K namespace")
print("2. Reset ResultsHub's logs")
print("3. Wait for all jobs to finish")
print("4. Clear all codegen outputs")
print("5. Reset execution (combines choices 1, 2, and 4)")
print("6. List nodes and the pods on them")
print("7. Exit")
# Get user input
choice = input("Enter your choice (1-6): ")
# Process the choice
if choice == '1':
delete_all_resources()
elif choice == '2':
create_reset_results_log()
elif choice == '3':
wait_for_jobs()
elif choice == '4':
clear_codegen_outputs()
elif choice == '5':
reset_execution()
elif choice == '6':
list_pods_by_node()
elif choice == '7':
print("Exiting toolbox.")
else:
print("Invalid input!")
if __name__ == "__main__":
main()