Skip to content

Commit

Permalink
Merge branch 'release/0.202'
Browse files Browse the repository at this point in the history
  • Loading branch information
sb10 committed May 27, 2015
2 parents bcb1b27 + 0355f20 commit cf12878
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Build.PL
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use VRPipeBuild;

my $build = VRPipeBuild->new(
module_name => 'VRPipe',
dist_version => 0.201,
dist_version => 0.202,
dist_author => 'Vertebrate Resequencing group at the Sanger Institute',
dist_abstract => 'A system for setting up, running and tracking many jobs in pipelines on a compute farm.',
license => 'gpl',
Expand Down
1 change: 1 addition & 0 deletions HISTORY
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
0.202 npg_cram_stats_parser step now stores md5_of_ref_seq_md5s
0.201 irods datasource methods fixed to all work with the new irods protocol
system.
0.200 delete_inputs step behaviour now checks that input files are not needed
Expand Down
1 change: 1 addition & 0 deletions modules/VRPipe/Schema/VRTrack.pm
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ class VRPipe::Schema::VRTrack with VRPipe::SchemaRole {
label => 'Header_Mistakes',
unique => [qw(uuid)],
required => [qw(num_mistakes)],
indexed => [qw(md5_of_ref_seq_md5s)],
allow_anything => 1
},

Expand Down
19 changes: 17 additions & 2 deletions modules/VRPipe/Steps/npg_cram_stats_parser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class VRPipe::Steps::npg_cram_stats_parser with VRPipe::StepRole {
use VRPipe::FileProtocol;
use VRPipe::Parser;
use JSON::XS;
use Digest::MD5;

method options_definition {
return {
Expand Down Expand Up @@ -300,7 +301,7 @@ class VRPipe::Steps::npg_cram_stats_parser with VRPipe::StepRole {
my $header_lines = $cram_file->header_lines; # automagically works with irods files if HTSLIB has been compiled with irods support
my $props = $graph_file->properties(flatten_parents => 1);
my %rg_key_to_prop_key = (LB => ['vrtrack_library_id', 'vrtrack_library_name'], SM => 'vrtrack_sample_accession', DS => "vrtrack_study_$sample_id_type");
my (%diffs, $did_ref);
my (%diffs, $ref_md5s);
foreach (@$header_lines) {
if (/^\@RG/) {
while (my ($rg_key, $prop_keys) = each %rg_key_to_prop_key) {
Expand All @@ -326,13 +327,27 @@ class VRPipe::Steps::npg_cram_stats_parser with VRPipe::StepRole {

delete $diffs{$rg_key} if $ok;
}
last;
}
elsif (/^\@SQ.*\tM5:(\S+)/) {
# even though we know the reference it was supposed to be
# aligned to, the absolute path of that might differ to the abs
# path in the SQ line, so instead we'll store an md5 of all the
# md5s of the sequences here, and later some auto qc step could
# check all the sequence-md5s for crams in a study is the same
# (as each other, or the expected fasta)
$ref_md5s .= $1;
}
}
if ($ref_md5s) {
my $dmd5 = Digest::MD5->new();
$dmd5->add($ref_md5s);
$ref_md5s = $dmd5->hexdigest;
}
$schema->add(
'Header_Mistakes',
{
num_mistakes => scalar keys %diffs,
$ref_md5s ? (md5_of_ref_seq_md5s => $ref_md5s) : (),
%diffs
},
incoming => { type => 'header_mistakes', node => $graph_file }
Expand Down
5 changes: 2 additions & 3 deletions scripts/vrpipe-disk_usage
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,13 @@ if ($disk) {
my %done_files;
foreach my $s (VRPipe::PipelineSetup->search({ output_root => { like => $disk . '%' } })) {
my $sid = $s->id;
print STDERR "\n$sid ";
next if $skip_setup{$sid};
my $sn = $s->name;

my $pager = VRPipe::StepOutputFile->search_paged({ 'stepstate.pipelinesetup' => $sid, 'stepstate.same_submissions_as' => undef, output_key => { '!=' => 'temp' }, }, { join => ['stepstate'], prefetch => 'file' }, 10000); # -or => [{e => 1, moved_to => undef}, {moved_to => { '!=' => undef} }] # join file, 'file.path' => { like => $disk.'%' } or where this applies to moved_to file id

while (my $sofs = $pager->next(no_resetting => 1)) {
print STDERR ", ";
print STDERR ". ";
foreach my $sof (@$sofs) {
my $f = $sof->file;
my $fid = $f->id;
Expand All @@ -68,10 +67,10 @@ if ($disk) {
next unless $p =~ /^$disk/;
my $mtime = $f->mtime || '';
print join("\t", $sid, $sn, $p, $f->type, $s, $mtime), "\n";
print STDERR "! ";
$done_files{$rid} = 1;
}
}
print STDERR "\n";
}
}
else {
Expand Down
7 changes: 4 additions & 3 deletions t/VRPipe/Pipelines/sequencing_qc_from_irods.t
Original file line number Diff line number Diff line change
Expand Up @@ -230,16 +230,17 @@ is_deeply $props,

$props = $mistakes->properties;
delete $props->{uuid};
is_deeply $props, { num_mistakes => 0 }, 'Header_Mistakes node had the correct properties';
is_deeply $props, { num_mistakes => 0, md5_of_ref_seq_md5s => 'f95dcc1c1300f59b028fba79f49878a4' }, 'Header_Mistakes node had the correct properties';

# and test the one we artificially made a mistake for
my ($fake_mistakes) = $wrong_lane->related(outgoing => { max_depth => 5, namespace => 'VRTrack', label => 'Header_Mistakes' });
$props = $fake_mistakes->properties;
delete $props->{uuid};
is_deeply $props,
{
num_mistakes => 1,
LB => ['13607731', '999']
num_mistakes => 1,
md5_of_ref_seq_md5s => 'f95dcc1c1300f59b028fba79f49878a4',
LB => ['13607731', '999']
},
'Header_Mistakes nodes can correctly show mistakes';

Expand Down

0 comments on commit cf12878

Please sign in to comment.