From a584304d9cea3ed173887865db409310e4e464d5 Mon Sep 17 00:00:00 2001 From: "Michael B. McCoy" Date: Wed, 23 Nov 2022 15:43:41 -0800 Subject: [PATCH 1/2] Avoid mutating list while iterating. Iterates over a copy of `subresults` and `jobresults` so `.remove` calls on the lists do not mutate the list we are iterating over. This change arises from debugging why the number of jobs processing was consistently lower than the number of processing threads. In short, it seems that modifying the `jobresults` list (`jobresults.remove(...)`) caused the list to consistently overestimate the number of remaining jobs. In my case, I was using 12 threads and was always reporting `Still waiting for 8 Jobs` each loop. This seems to occur because of the mutation of `jobsresults` list. While the behavior of `.remove` while iterating through a list is undefined in python, running on my local machine demonstrates that this is likely to cause problems: ``` >>> for ii, v in enumerate(x): ... print(ii, v) ... x.remove(v) ... 0 0 1 2 2 4 3 6 4 8 5 10 >>> x [1, 3, 5, 7, 9, 11] ``` Tests on a running instance also suggest that the patch above fixes the issue. --- net/process_submissions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/process_submissions.py b/net/process_submissions.py index 19bdf39a1..b510bd3e4 100644 --- a/net/process_submissions.py +++ b/net/process_submissions.py @@ -868,7 +868,7 @@ def main(dojob_nthreads, dosub_nthreads, refresh_rate, max_sub_retries, if subresults != lastsubs: print('Submissions running:', len(subresults)) lastsubs = subresults - for sid,res in subresults: + for sid,res in subresults[:]: print(' Submission id', sid, 'ready:', res.ready(), end=' ') if res.ready(): subresults.remove((sid,res)) @@ -887,7 +887,7 @@ def main(dojob_nthreads, dosub_nthreads, refresh_rate, max_sub_retries, print('Jobs running:', len(jobresults)) lastjobs = jobresults any_jobs_finished = False - for jid,res in jobresults: + for jid,res in jobresults[:]: print(' Job id', jid, 'ready:', res.ready(), end=' ') if res.ready(): any_jobs_finished = True From 49e784722a301188ef7f2446023f8527feb9f188 Mon Sep 17 00:00:00 2001 From: "Michael B. McCoy" Date: Wed, 23 Nov 2022 15:59:03 -0800 Subject: [PATCH 2/2] Copy `newuis` too. --- net/process_submissions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/process_submissions.py b/net/process_submissions.py index b510bd3e4..9d9d6aef3 100644 --- a/net/process_submissions.py +++ b/net/process_submissions.py @@ -991,7 +991,7 @@ def main(dojob_nthreads, dosub_nthreads, refresh_rate, max_sub_retries, iu = np.random.randint(len(users)) user = users[iu] print('Selected user', user) - for ui in newuis: + for ui in newuis[:]: if ui.user == user: print('Selected ui', ui) newuis.remove(ui)