diff --git a/optimize_lipids.py b/optimize_lipids.py index b13573c..f8be546 100755 --- a/optimize_lipids.py +++ b/optimize_lipids.py @@ -135,7 +135,8 @@ max_found_checkpoint_nb = int(filename.split('.')[0].split('_')[2]) if max_found_checkpoint_nb > fstpso_checkpoint_in_nb: fstpso_checkpoint_in_nb = max_found_checkpoint_nb - if fstpso_checkpoint_in_nb > 0 and not ns.user_config['next_cycle']: + # if fstpso_checkpoint_in_nb > 0 and not ns.user_config['next_cycle']: + if fstpso_checkpoint_in_nb > 0: # mid-2022 next_cycle is not used anymore fstpso_checkpoint_in = f'fstpso_checkpoint_{fstpso_checkpoint_in_nb}.obj' print('Loading FST-PSO checkpoint file:', fstpso_checkpoint_in, '\n') @@ -156,18 +157,19 @@ # it is possible that there is no FST-PSO checkpoint but the exec directory still exists # this happens when the opti did not reach the end of SWARM iter 2) if n_cycle_1 is not None: - if fstpso_checkpoint_in is not None or ns.user_config['next_cycle']: + # if fstpso_checkpoint_in is not None or ns.user_config['next_cycle']: + if fstpso_checkpoint_in is not None: # mid-2022 next_cycle is not used anymore try: n_cycle = n_cycle_1 # continue within same opti cycle n_swarm_iter = n_swarm_iter_1 + 1 # continue with the next swarm iteration n_particle = n_particle_1 # if we want to do a calibrated re-initialization of the swarm from best results of the previous opti cycle - if ns.user_config['next_cycle']: - n_cycle += 1 - print('--> Going for a calibrated restart in a new optimization cycle') - else: - print('--> Going to continue an on-going optimization cycle') + # if ns.user_config['next_cycle']: + # n_cycle += 1 + # print('--> Going for a calibrated restart in a new optimization cycle') + # else: + print('--> Going to continue an on-going optimization cycle') except ValueError: # means we have just read the headers (not sure this below is relevant anymore) n_swarm_iter = 1 # default start without checkpoints @@ -940,6 +942,7 @@ geom_grp_std = 'Unknown' # DISABLED BLOCK BELOW SO THAT WE ALWAYS START FROM ALL THE VALUES PRESENT IN THE CONFIG FILE + ns.params_val[geom_grp]['ref_eq_val'] = ns.user_config['init_bonded'][geom_grp]['val'] # by default we start from the values in the config file, and we instead start from the AA average if the parameter has to be tuned # in case we have no AA reference data, then we fall back to the values in the config file # if geom_grp in ns.user_config['tune_bonds_equi_val'] and geom_grp_avg != 'Unknown': @@ -987,6 +990,7 @@ geom_grp_std = 'Unknown' # DISABLED BLOCK BELOW SO THAT WE ALWAYS START FROM ALL THE VALUES PRESENT IN THE CONFIG FILE + ns.params_val[geom_grp]['ref_eq_val'] = ns.user_config['init_bonded'][geom_grp]['val'] # by default we start from the values in the config file, and we instead start from the AA average if the parameter has to be tuned # in case we have no AA reference data, then we fall back to the values in the config file # if geom_grp in ns.user_config['tune_angles_equi_val'] and geom_grp_avg != 'Unknown': @@ -1116,7 +1120,7 @@ # somehow long cycles with calibrated restarts using ALL parameters # (= no selection of bonds/angles/dihedrals/whatever like in Swarm-CG bonded version) opti_cycles = { - 1: {'sim_time': ns.user_config['cg_time_prod'], 'cg_sampling': 4 / 3 * ns.user_config['cg_time_prod'], 'max_sw_iter': 50, 'max_sw_iter_no_new_best': 5} + 1: {'sim_time': ns.user_config['cg_time_prod'], 'cg_sampling': 4 / 3 * ns.user_config['cg_time_prod'], 'max_sw_iter': 50, 'max_sw_iter_no_new_best': 10} } # for tests without opti cycles diff --git a/shared/eval_func_parallel.py b/shared/eval_func_parallel.py index 7336623..f0dd8f2 100755 --- a/shared/eval_func_parallel.py +++ b/shared/eval_func_parallel.py @@ -17,7 +17,7 @@ from statistics import mean import multiprocessing from shared.io import backup_swarm_iter_logs_and_checkpoint -from shared.simulations import run_parallel, init_process +from shared.simulations import run_parallel def create_files_and_dirs_for_swarm_iter(ns, parameters_sets, nb_eval_particles_range_over_complete_opti): @@ -249,11 +249,13 @@ def eval_function_parallel_swarm(parameters_sets, args): delta_ts_master_remaining_h = 'INFINITE' print(f'Starting new SWARM iteration running in LOCAL (= without SLURM) using {ns.nb_slots} slots') - slots_states = multiprocessing.Array('i', ns.nb_slots, lock=True) - for i in range(ns.nb_slots): # multiprocessing.Array does NOT like list comprehension - slots_states[i] = 1 # mark slot as available + # NOTE: this is NOT ENOUGH to only have lock=True here and array access has to be also managed with get_lock() or something + # slots_states = multiprocessing.Array('i', ns.nb_slots, lock=True) + # for i in range(ns.nb_slots): # multiprocessing.Array does NOT like list comprehension + # slots_states[i] = 1 # mark slot as available - with multiprocessing.Pool(processes=ns.nb_slots, initializer=init_process, initargs=(slots_states,)) as pool: + # with multiprocessing.Pool(processes=ns.nb_slots, initializer=init_process, initargs=(slots_states,)) as pool: + with multiprocessing.Pool(processes=ns.nb_slots) as pool: p_args = zip(repeat(ns), p_job_exec_dir, p_nb_eval_particle, p_lipid_code, p_temp) p_res = pool.starmap(run_parallel, p_args) p_time_start_str, p_time_end_str, p_time_elapsed_str = list(map(list, zip(*p_res))) diff --git a/shared/simulations.py b/shared/simulations.py index 61561f6..aa05b57 100755 --- a/shared/simulations.py +++ b/shared/simulations.py @@ -6,6 +6,7 @@ from datetime import datetime from shared.context_managers import working_dir import config +import multiprocessing # build gromacs command with arguments @@ -139,31 +140,50 @@ def run_sims(ns, slot_nt, slot_gpu_id): # for making a shared multiprocessing.Array() to handle slots states when running simulations in LOCAL (= NOT HPC) -def init_process(arg): - global g_slots_states - g_slots_states = arg +# def init_process(arg): +# global g_slots_states +# g_slots_states = arg def run_parallel(ns, job_exec_dir, nb_eval_particle, lipid_code, temp): - while True: - time.sleep(1) - for i in range(len(g_slots_states)): - if g_slots_states[i] == 1: # if slot is available - - g_slots_states[i] = 0 # mark slot as busy - print(f' Starting simulation for particle {nb_eval_particle} {lipid_code} {temp} on slot {i + 1}') - slot_nt = ns.slots_nts[i] - slot_gpu_id = ns.slots_gpu_ids[i] - # print(f' Slot uses -nt {slot_nt} and -gpu_id {slot_gpu_id} and in directory {job_exec_dir}') - with working_dir(job_exec_dir): - gmx_time = run_sims(ns, slot_nt, slot_gpu_id) - g_slots_states[i] = 1 # mark slot as available - # print(f'Finished simulation for particle {nb_eval_particle} with {lipid_code} {temp} on slot {i + 1}') - - time_start_str, time_end_str = '', '' # NOTE: this is NOT displayed anywhere atm & we don't care much - time_elapsed_str = time.strftime('%H:%M:%S', time.gmtime(round(gmx_time))) - - return time_start_str, time_end_str, time_elapsed_str + subprocess_slot_id = multiprocessing.current_process()._identity[0] - 1 # 0-indexed (originally _identity is 1-indexed) + print(f' Starting simulation for particle {nb_eval_particle} {lipid_code} {temp} on slot {subprocess_slot_id % len(ns.slots_nts) + 1}') + slot_nt = ns.slots_nts[subprocess_slot_id % len(ns.slots_nts)] + slot_gpu_id = ns.slots_gpu_ids[subprocess_slot_id % len(ns.slots_nts)] + # print(f' Slot uses -nt {slot_nt} and -gpu_id {slot_gpu_id} and in directory {job_exec_dir}') + with working_dir(job_exec_dir): + gmx_time = run_sims(ns, slot_nt, slot_gpu_id) + time_start_str, time_end_str = '', '' # NOTE: this is NOT displayed anywhere atm & we don't care much + time_elapsed_str = time.strftime('%H:%M:%S', time.gmtime(round(gmx_time))) + return time_start_str, time_end_str, time_elapsed_str + + + # while True: + # time.sleep(2) + # start_with_slot_id = None + # + # with g_slots_states.get_lock(): + # for i in range(len(g_slots_states)): + # if g_slots_states[i] == 1: # if slot is available + # start_with_slot_id = i + # g_slots_states[start_with_slot_id] = 0 # mark slot as busy + # print(f' Starting simulation for particle {nb_eval_particle} {lipid_code} {temp} on slot {start_with_slot_id + 1}') + # break + # + # if start_with_slot_id is not None: + # slot_nt = ns.slots_nts[start_with_slot_id] + # slot_gpu_id = ns.slots_gpu_ids[start_with_slot_id] + # # print(f' Slot uses -nt {slot_nt} and -gpu_id {slot_gpu_id} and in directory {job_exec_dir}') + # with working_dir(job_exec_dir): + # gmx_time = run_sims(ns, slot_nt, slot_gpu_id) + # + # with g_slots_states.get_lock(): + # g_slots_states[start_with_slot_id] = 1 # mark slot as available + # # print(f'Finished simulation for particle {nb_eval_particle} with {lipid_code} {temp} on slot {i + 1}') + # + # time_start_str, time_end_str = '', '' # NOTE: this is NOT displayed anywhere atm & we don't care much + # time_elapsed_str = time.strftime('%H:%M:%S', time.gmtime(round(gmx_time))) + # return time_start_str, time_end_str, time_elapsed_str