Skip to content

Commit

Permalink
Fix enqueueing of Minion jobs breaking PARALLEL_ONE_HOST_ONLY=1
Browse files Browse the repository at this point in the history
When restarting openQA jobs, additional Minion jobs are enqueued (of
`git_clone` task, this is happening especially often when `git_auto_clone`
is enabled). So far this didn't happen in a transaction. So the scheduler
might see the openQA jobs but not the Minion jobs they are blocked by. This
is problematic because the scheduler might assign jobs too soon to a
worker. This is especially problematic if it does not happen consistently
across a parallel job cluster as it then breaks the
`PARALLEL_ONE_HOST_ONLY=1` feature.

Related ticket: https://progress.opensuse.org/issues/169342
  • Loading branch information
Martchus committed Nov 6, 2024
1 parent 850ba1d commit 2dea55e
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 11 deletions.
6 changes: 0 additions & 6 deletions lib/OpenQA/Resource/Jobs.pm
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use Mojo::Base -strict, -signatures;

use OpenQA::Jobs::Constants;
use OpenQA::Schema;
use OpenQA::Utils qw(create_git_clone_list);
use Exporter 'import';

our @EXPORT_OK = qw(job_restart);
Expand Down Expand Up @@ -50,8 +49,6 @@ sub job_restart ($jobids, %args) {
my $jobs_rs = $schema->resultset('Jobs');
my $jobs = $jobs_rs->search({id => $jobids, state => {'not in' => [PRISTINE_STATES]}});
$duplication_args{no_directly_chained_parent} = 1 unless $force;
my %clones;
my @clone_ids;
while (my $job = $jobs->next) {
my $job_id = $job->id;
my $missing_assets = $job->missing_assets;
Expand All @@ -76,18 +73,15 @@ sub job_restart ($jobids, %args) {

my $cloned_job_or_error = $job->auto_duplicate(\%duplication_args);
if (ref $cloned_job_or_error) {
create_git_clone_list($job->settings_hash, \%clones);
push @duplicates, $cloned_job_or_error->{cluster_cloned};
push @comments, @{$cloned_job_or_error->{comments_created}};
push @clone_ids, $cloned_job_or_error->{cluster_cloned}->{$job_id};
}
else {
$res{enforceable} = 1 if index($cloned_job_or_error, 'Direct parent ') == 0;
push @errors, ($cloned_job_or_error // "An internal error occurred when duplicating $job_id");
}
push @processed, $job_id;
}
OpenQA::App->singleton->gru->enqueue_git_clones(\%clones, \@clone_ids) if keys %clones;

# abort running jobs
return \%res if $args{skip_aborting_jobs};
Expand Down
11 changes: 9 additions & 2 deletions lib/OpenQA/Schema/Result/Jobs.pm
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use DateTime;
use OpenQA::Constants qw(WORKER_COMMAND_ABORT WORKER_COMMAND_CANCEL);
use OpenQA::Log qw(log_trace log_debug log_info log_warning log_error);
use OpenQA::Utils (
qw(parse_assets_from_settings locate_asset),
qw(create_git_clone_list parse_assets_from_settings locate_asset),
qw(resultdir assetdir read_test_modules find_bugref random_string),
qw(run_cmd_with_log_return_error needledir testcasedir gitrepodir find_video_files)
);
Expand Down Expand Up @@ -693,18 +693,25 @@ sub _create_clones ($self, $jobs, $comments, $comment_text, $comment_user_id, @c
$res->register_assets_from_settings;
}

my %git_clones;
my @clone_ids;
for my $original_job_id (@original_job_ids) {
my $cloned_job = $clones{$original_job_id};
# calculate blocked_by
$cloned_job->calculate_blocked_by;
# add a reference to the clone within $jobs
$jobs->{$original_job_id}->{clone} = $cloned_job->id;
push @clone_ids, $jobs->{$original_job_id}->{clone} = $cloned_job->id;
# add Git repositories to clone
create_git_clone_list($cloned_job->settings_hash, \%git_clones);
}

# create comments on original jobs
$result_source->schema->resultset('Comments')
->create_for_jobs(\@original_job_ids, $comment_text, $comment_user_id, $comments)
if defined $comment_text;

# enqueue Minion jobs to clone required Git repositories
OpenQA::App->singleton->gru->enqueue_git_clones(\%clones, \@clone_ids);
}

# internal (recursive) function for duplicate - returns hash of all jobs in the
Expand Down
2 changes: 1 addition & 1 deletion lib/OpenQA/Shared/Plugin/Gru.pm
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ sub enqueue_git_update_all ($self) {
}

sub enqueue_git_clones ($self, $clones, $job_ids) {
return unless %$clones;
return unless keys %$clones;
return unless OpenQA::App->singleton->config->{'scm git'}->{git_auto_clone} eq 'yes';
# $clones is a hashref with paths as keys and git urls as values
# $job_id is used to create entries in a related table (gru_dependencies)
Expand Down
5 changes: 3 additions & 2 deletions t/api/04-jobs.t
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ $ENV{MOJO_MAX_MESSAGE_SIZE} = 207741824;

my $t = client(Test::Mojo->new('OpenQA::WebAPI'));
my $cfg = $t->app->config;
$cfg->{'scm git'}->{git_auto_clone} = 'no';
$cfg->{'scm git'}->{git_auto_update} = 'no';
is $cfg->{audit}->{blocklist}, 'job_grab', 'blocklist updated';

Expand Down Expand Up @@ -1614,7 +1615,7 @@ subtest 'handle FOO_URL' => sub {
};

subtest 'handle git_clone with CASEDIR' => sub {
OpenQA::App->singleton->config->{'scm git'}->{git_auto_clone} = 'yes';
$cfg->{'scm git'}->{git_auto_clone} = 'yes';
$testsuites->create(
{
name => 'handle_foo_casedir',
Expand Down Expand Up @@ -1645,7 +1646,7 @@ subtest 'handle git_clone with CASEDIR' => sub {
};

subtest 'handle git_clone without CASEDIR' => sub {
OpenQA::App->singleton->config->{'scm git'}->{git_auto_update} = 'yes';
$cfg->{'scm git'}->{git_auto_update} = 'yes';
$testsuites->create(
{
name => 'handle_git_clone',
Expand Down

0 comments on commit 2dea55e

Please sign in to comment.