#!/usr/bin/env perl # # Copyright (C) 2012-2013 Genome Research Ltd. # # Author: Petr Danecek # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. use strict; use warnings; use Carp; use FindBin; use lib "$FindBin::Bin"; use Getopt::Long; use File::Temp qw/ tempfile tempdir /; use IO::Handle; my $opts = parse_params(); test_view($opts,0); test_view($opts,4); test_vcf_api($opts,out=>'test-vcf-api.out'); test_vcf_sweep($opts,out=>'test-vcf-sweep.out'); test_vcf_various($opts); test_bcf_sr_sort($opts); test_command($opts,cmd=>'test-bcf-translate -',out=>'test-bcf-translate.out'); test_convert_padded_header($opts); test_rebgzip($opts); test_logging($opts); test_realn($opts); print "\nNumber of tests:\n"; printf " total .. %d\n", $$opts{nok}+$$opts{nfailed}; printf " passed .. %d\n", $$opts{nok}; printf " failed .. %d\n", $$opts{nfailed}; print "\n"; exit ($$opts{nfailed} > 0); #-------------------- sub error { my (@msg) = @_; if ( scalar @msg ) { confess @msg; } print "About: samtools/htslib consistency test script\n", "Usage: test.pl [OPTIONS]\n", "Options:\n", " -r, --redo-outputs Recreate expected output files.\n", " -t, --temp-dir When given, temporary files will not be removed.\n", " -f, --fail-fast Fail-fast mode: exit as soon as a test fails.\n", " -h, -?, --help This help message.\n", "\n"; exit 1; } sub cygpath { my ($path) = @_; $path = `cygpath -m $path`; $path =~ s/\r?\n//; return $path } sub safe_tempdir { my $dir = tempdir(CLEANUP=>1); if ($^O =~ /^msys/) { $dir = cygpath($dir); } return $dir; } sub parse_params { my $opts = { keep_files=>0, nok=>0, nfailed=>0 }; my $help; Getopt::Long::Configure('bundling'); my $ret = GetOptions ( 't|temp-dir:s' => \$$opts{keep_files}, 'r|redo-outputs' => \$$opts{redo_outputs}, 'f|fail-fast' => \$$opts{fail_fast}, 'h|?|help' => \$help ); if ( !$ret or $help ) { error(); } $$opts{tmp} = $$opts{keep_files} ? $$opts{keep_files} : safe_tempdir(); if ( $$opts{keep_files} ) { cmd("mkdir -p $$opts{keep_files}"); } $$opts{path} = $FindBin::RealBin; $$opts{bin} = $FindBin::RealBin; $$opts{bin} =~ s{/test/?$}{}; if ($^O =~ /^msys/) { $$opts{path} = cygpath($$opts{path}); $$opts{bin} = cygpath($$opts{bin}); } return $opts; } sub _cmd { my ($cmd) = @_; my $kid_io; my @out; my $pid = open($kid_io, "-|"); if ( !defined $pid ) { error("Cannot fork: $!"); } if ($pid) { # parent @out = <$kid_io>; close($kid_io); } else { # Example of how to embed Valgrind into the testing framework. # TEST_PRECMD="valgrind --leak-check=full --suppressions=$ENV{HOME}/valgrind.supp" make check $cmd = "$ENV{TEST_PRECMD} $cmd" if exists $ENV{TEST_PRECMD}; # child exec('bash', '-o','pipefail','-c', $cmd) or error("Cannot execute the command [/bin/sh -o pipefail -c $cmd]: $!"); } return ($? >> 8, join('',@out)); } sub cmd { my ($cmd) = @_; my ($ret,$out) = _cmd($cmd); if ( $ret ) { error("The command failed [$ret]: $cmd\n", $out); } return $out; } sub test_cmd { my ($opts,%args) = @_; if ( !exists($args{out}) ) { if ( !exists($args{in}) ) { error("FIXME: expected out or in key\n"); } $args{out} = "$args{in}.out"; } my ($package, $filename, $line, $test)=caller(1); $test =~ s/^.+:://; print "$test:\n"; print "\t$args{cmd}\n"; my ($ret,$out) = _cmd("$args{cmd}"); if ( $ret ) { failed($opts,$test); return; } if ( $$opts{redo_outputs} && -e "$$opts{path}/$args{out}" ) { rename("$$opts{path}/$args{out}","$$opts{path}/$args{out}.old"); open(my $fh,'>',"$$opts{path}/$args{out}") or error("$$opts{path}/$args{out}: $!"); print $fh $out; close($fh); my ($ret,$out) = _cmd("cmp $$opts{path}/$args{out} $$opts{path}/$args{out}.old"); if ( !$ret && $out eq '' ) { unlink("$$opts{path}/$args{out}.old"); } else { print "\tthe expected output changed, saving:\n"; print "\t old .. $$opts{path}/$args{out}.old\n"; print "\t new .. $$opts{path}/$args{out}\n"; } } my $exp = ''; if ( open(my $fh,'<',"$$opts{path}/$args{out}") ) { my @exp = <$fh>; $exp = join('',@exp); $exp =~ s/\015?\012/\n/g; close($fh); } elsif ( !$$opts{redo_outputs} ) { failed($opts,$test,"$$opts{path}/$args{out}: $!"); return; } (my $out_lf = $out) =~ s/\015?\012/\n/g; if ( $exp ne $out_lf ) { open(my $fh,'>',"$$opts{path}/$args{out}.new") or error("$$opts{path}/$args{out}.new"); print $fh $out; close($fh); if ( !-e "$$opts{path}/$args{out}" ) { rename("$$opts{path}/$args{out}.new","$$opts{path}/$args{out}") or error("rename $$opts{path}/$args{out}.new $$opts{path}/$args{out}: $!"); print "\tthe file with expected output does not exist, creating new one:\n"; print "\t\t$$opts{path}/$args{out}\n"; } else { failed($opts,$test,"The outputs differ:\n\t\t$$opts{path}/$args{out}\n\t\t$$opts{path}/$args{out}.new"); } return; } passed($opts,$test); } sub failed { my ($opts,$test,$reason) = @_; $$opts{nfailed}++; print "\n"; STDOUT->flush(); if ( defined $reason ) { print STDERR "\t$reason\n"; } print STDERR ".. failed ...\n\n"; STDERR->flush(); if ($$opts{fail_fast}) { die "\n"; } } sub passed { my ($opts,$test) = @_; $$opts{nok}++; print ".. ok\n\n"; } sub is_file_newer { my ($afile,$bfile) = @_; my (@astat) = stat($afile) or return 0; my (@bstat) = stat($bfile) or return 0; if ( $astat[9]>$bstat[9] ) { return 1 } return 0; } # The tests -------------------------- my $test_view_failures; sub testv { my ($opts, $cmd) = @_; print " $cmd\n"; my ($ret, $out) = _cmd($cmd); if ($ret != 0) { STDOUT->flush(); print STDERR "FAILED\n$out\n"; STDERR->flush(); $test_view_failures++; if ($$opts{fail_fast}) { die "\n"; } } } sub test_view { my ($opts, $nthreads) = @_; my $tv_args = $nthreads ? "-\@$nthreads" : ""; foreach my $sam (glob("*#*.sam")) { my ($base, $ref) = ($sam =~ /((.*)#.*)\.sam/); $ref .= ".fa"; my $bam = "$base.tmp.bam"; my $cram = "$base.tmp.cram"; my $md = "-nomd"; if ($sam =~ /^md/) { $md = ""; } print "test_view testing $sam, ref $ref:\n"; $test_view_failures = 0; # SAM -> BAM -> SAM testv $opts, "./test_view $tv_args -S -b $sam > $bam"; testv $opts, "./test_view $tv_args $bam > $bam.sam_"; testv $opts, "./compare_sam.pl $sam $bam.sam_"; # SAM -> BAMu -> SAM testv $opts, "./test_view $tv_args -S -l0 -b $sam > $bam"; testv $opts, "./test_view $tv_args $bam > $bam.sam_"; testv $opts, "./compare_sam.pl $sam $bam.sam_"; # SAM -> CRAM2 -> SAM testv $opts, "./test_view $tv_args -t $ref -S -C -o VERSION=2.1 $sam > $cram"; testv $opts, "./test_view $tv_args -D $cram > $cram.sam_"; testv $opts, "./compare_sam.pl $md $sam $cram.sam_"; # BAM -> CRAM2 -> BAM -> SAM $cram = "$bam.cram"; testv $opts, "./test_view $tv_args -t $ref -C -o VERSION=2.1 $bam > $cram"; testv $opts, "./test_view $tv_args -b -D $cram > $cram.bam"; testv $opts, "./test_view $tv_args $cram.bam > $cram.bam.sam_"; testv $opts, "./compare_sam.pl $md $sam $cram.bam.sam_"; # SAM -> CRAM3u -> SAM $cram = "$base.tmp.cram"; testv $opts, "./test_view $tv_args -t $ref -S -l0 -C -o VERSION=3.0 $sam > $cram"; testv $opts, "./test_view $tv_args -D $cram > $cram.sam_"; testv $opts, "./compare_sam.pl $md $sam $cram.sam_"; # BAM -> CRAM3 -> BAM -> SAM $cram = "$bam.cram"; testv $opts, "./test_view $tv_args -t $ref -C -o VERSION=3.0 $bam > $cram"; testv $opts, "./test_view $tv_args -b -D $cram > $cram.bam"; testv $opts, "./test_view $tv_args $cram.bam > $cram.bam.sam_"; testv $opts, "./compare_sam.pl $md $sam $cram.bam.sam_"; # CRAM3 -> CRAM2 $cram = "$base.tmp.cram"; testv $opts, "./test_view $tv_args -t $ref -C -o VERSION=2.1 $cram > $cram.cram"; # CRAM2 -> CRAM3 testv $opts, "./test_view $tv_args -t $ref -C -o VERSION=3.0 $cram.cram > $cram"; # CRAM3 -> CRAM3 + multi-slice testv $opts, "./test_view $tv_args -t $ref -C -o VERSION=3.0 -o seqs_per_slice=7 -o slices_per_container=5 $cram.cram > $cram"; testv $opts, "./test_view $tv_args $cram > $cram.sam_"; testv $opts, "./compare_sam.pl $md $sam $cram.sam_"; # Java pre-made CRAM -> SAM my $jcram = "${base}_java.cram"; if (-e $jcram) { my $jsam = "${base}_java.tmp.sam_"; testv $opts, "./test_view $tv_args -i reference=$ref $jcram > $jsam"; testv $opts, "./compare_sam.pl -Baux $md $sam $jsam"; } if ($test_view_failures == 0) { passed($opts, "$sam conversions"); } else { failed($opts, "$sam conversions", "$test_view_failures subtests failed"); } } # BAM and CRAM range queries on prebuilt BAM and CRAM # The cram file has @SQ UR: set to point to an invalid location to # force the reference to be reloaded from the one given on the # command line and nowhere else. REF_PATH should also point to nowhere # (currently done by the Makefile). This is to test the refseq reference # counting and reload (Issue #654). my $regions = "CHROMOSOME_II:2980-2980 CHROMOSOME_IV:1500-1500 CHROMOSOME_II:2980-2980 CHROMOSOME_I:1000-1100"; testv $opts, "./test_view $tv_args -i reference=ce.fa range.cram $regions > range.tmp"; testv $opts, "./compare_sam.pl range.tmp range.out"; testv $opts, "./test_view $tv_args range.bam $regions > range.tmp"; testv $opts, "./compare_sam.pl range.tmp range.out"; } sub test_vcf_api { my ($opts,%args) = @_; test_cmd($opts,%args,cmd=>"$$opts{path}/test-vcf-api $$opts{tmp}/test-vcf-api.bcf"); } sub test_vcf_sweep { my ($opts,%args) = @_; test_cmd($opts,%args,cmd=>"$$opts{path}/test-vcf-sweep $$opts{tmp}/test-vcf-api.bcf"); } sub test_vcf_various { my ($opts, %args) = @_; # Excess spaces in header lines test_cmd($opts, %args, out => "test-vcf-hdr.out", cmd => "$$opts{bin}/htsfile -ch $$opts{path}/test-vcf-hdr-in.vcf"); # Various VCF parsing issues test_cmd($opts, %args, out => "formatcols.vcf", cmd => "$$opts{bin}/htsfile -c $$opts{path}/formatcols.vcf"); test_cmd($opts, %args, out => "noroundtrip-out.vcf", cmd => "$$opts{bin}/htsfile -c $$opts{path}/noroundtrip.vcf"); test_cmd($opts, %args, out => "formatmissing-out.vcf", cmd => "$$opts{bin}/htsfile -c $$opts{path}/formatmissing.vcf"); } sub write_multiblock_bgzf { my ($name, $frags) = @_; my $tmp = "$name.tmp"; open(my $out, '>', $name) || die "Couldn't open $name $!\n"; for (my $i = 0; $i < @$frags; $i++) { local $/; open(my $f, '>', $tmp) || die "Couldn't open $tmp : $!\n"; print $f $frags->[$i]; close($f) || die "Error writing to $tmp: $!\n"; open(my $bgz, '-|', "$$opts{bin}/bgzip -c $tmp") || die "Couldn't open pipe to bgzip: $!\n"; my $compressed = <$bgz>; close($bgz) || die "Error running bgzip\n"; if ($i < $#$frags) { # Strip EOF block $compressed =~ s/\x1f\x8b\x08\x04\x00{5}\xff\x06\x00\x42\x43\x02\x00\x1b\x00\x03\x00{9}$//; } print $out $compressed; } close($out) || die "Error writing to $name: $!\n"; unlink($tmp); } sub test_rebgzip { my ($opts, %args) = @_; # Write a file that should match the one we ship my @frags = qw(1 22 333 4444 55555); my $mb = "$$opts{path}/bgziptest.txt.tmp.gz"; write_multiblock_bgzf($mb, \@frags); # See if it really does match my ($ret, $out) = _cmd("cmp $mb $$opts{path}/bgziptest.txt.gz"); if (!$ret && $out eq '') { # If it does, use the original test_cmd($opts, %args, out => "bgziptest.txt.gz", cmd => "$$opts{bin}/bgzip -I $$opts{path}/bgziptest.txt.gz.gzi -c -g $$opts{path}/bgziptest.txt"); } else { # Otherwise index the one we just made and test that print "test_rebgzip: Alternate zlib/deflate library detected\n"; cmd("$$opts{bin}/bgzip -I $mb.gzi -r $mb"); test_cmd($opts, %args, out => "bgziptest.txt.tmp.gz", cmd => "$$opts{bin}/bgzip -I $mb.gzi -c -g $$opts{path}/bgziptest.txt"); } } sub test_convert_padded_header { my ($opts, %args) = @_; $args{out} = "headernul.tmp.cram"; cmd("$$opts{path}/test_view -t ce.fa -C ce#1.sam > $args{out}"); foreach my $nuls (0, 1, 678) { my $nulsbam = "$$opts{tmp}/headernul$nuls.bam"; cmd("$$opts{path}/test_view -b -Z $nuls ce#1.sam > $nulsbam"); test_cmd($opts, %args, cmd => "$$opts{path}/test_view -t ce.fa -C $nulsbam"); } } sub test_bcf_sr_sort { my ($opts, %args) = @_; for (my $i=0; $i<10; $i++) { my $seed = int(rand(time)); my $test = 'test-bcf-sr'; my $cmd = "$$opts{path}/test-bcf-sr.pl -t $$opts{tmp} -s $seed"; print "$test:\n"; print "\t$cmd\n"; my ($ret,$out) = _cmd($cmd); if ( $ret ) { failed($opts,$test); } else { passed($opts,$test); } } } sub test_command { my ($opts, %args) = @_; my $cmd = "$$opts{path}/$args{cmd}"; test_cmd($opts, %args, cmd=>$cmd); } sub test_logging { my ($opts) = @_; my $test = 'test-logging'; my $cmd = "$$opts{path}/test-logging.pl"; print "$test:\n"; print "\t$cmd\n"; my ($ret,$out) = _cmd($cmd); if ( $ret ) { failed($opts,$test); } else { passed($opts,$test); } } sub test_realn { my ($opts) = @_; my $test_realn = "$$opts{path}/test_realn"; # Calculate BAQ test_cmd($opts, cmd => "$test_realn -f $$opts{path}/realn01.fa -i $$opts{path}/realn01.sam -o -", out => "realn01_exp.sam"); test_cmd($opts, cmd => "$test_realn -f $$opts{path}/realn02.fa -i $$opts{path}/realn02.sam -o -", out => "realn02_exp.sam"); # Calculate and apply BAQ test_cmd($opts, cmd => "$test_realn -a -f $$opts{path}/realn01.fa -i $$opts{path}/realn01.sam -o -", out => "realn01_exp-a.sam"); test_cmd($opts, cmd => "$test_realn -a -f $$opts{path}/realn02.fa -i $$opts{path}/realn02.sam -o -", out => "realn02_exp-a.sam"); # Calculate extended BAQ test_cmd($opts, cmd => "$test_realn -e -f $$opts{path}/realn01.fa -i $$opts{path}/realn01.sam -o -", out => "realn01_exp-e.sam"); test_cmd($opts, cmd => "$test_realn -e -f $$opts{path}/realn02.fa -i $$opts{path}/realn02.sam -o -", out => "realn02_exp-e.sam"); # Recalculate BAQ test_cmd($opts, cmd => "$test_realn -r -f $$opts{path}/realn02.fa -i $$opts{path}/realn02-r.sam -o -", out => "realn02_exp.sam"); # Apply from existing BQ tags test_cmd($opts, cmd => "$test_realn -a -f $$opts{path}/realn02.fa -i $$opts{path}/realn02_exp.sam -o -", out => "realn02_exp-a.sam"); # Revert quality values (using data in ZQ tags) test_cmd($opts, cmd => "$test_realn -f $$opts{path}/realn02.fa -i $$opts{path}/realn02_exp-a.sam -o -", out => "realn02_exp.sam"); }