comparison samtools_view.xml @ 15:e41d3ce2ab9f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_view commit e3de8bc1123bf4ce56818f2b7ad4b53080cb3bd8
author iuc
date Fri, 30 Aug 2024 10:24:13 +0000
parents e63aab0f18c6
children 17c2bd677389
comparison
equal deleted inserted replaced
14:e63aab0f18c6 15:e41d3ce2ab9f
1 <tool id="samtools_view" name="Samtools view" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> 1 <tool id="samtools_view" name="Samtools view" version="@TOOL_VERSION@+galaxy3" profile="@PROFILE@">
2 <description>- reformat, filter, or subsample SAM, BAM or CRAM</description> 2 <description>- reformat, filter, or subsample SAM, BAM or CRAM</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 <token name="@REF_DATA@"> 5 <token name="@REF_DATA@">
6 ## additional reference data 6 ## additional reference data
134 #set $std_filters = $std_filters + " --tag '%s'" % $mode.filter_config.tag 134 #set $std_filters = $std_filters + " --tag '%s'" % $mode.filter_config.tag
135 #end if 135 #end if
136 #if $mode.filter_config.qname_file: 136 #if $mode.filter_config.qname_file:
137 #set std_filters = $std_filters + " --qname-file '%s'" % $mode.filter_config.qname_file 137 #set std_filters = $std_filters + " --qname-file '%s'" % $mode.filter_config.qname_file
138 #end if 138 #end if
139 #if str($cond_expr.select_expr) == "yes":
140 #set std_filters = $std_filters + " -e '%s'" % $cond_expr.expression
141 #end if
139 #end if 142 #end if
140 143
141 #if $with_subsampling: 144 #if $with_subsampling:
142 ## handle seed and fraction calculation for subsampling 145 ## handle seed and fraction calculation for subsampling
143 #import random 146 #import random
152 #if $input.is_of_type('sam') or $std_filters or $reg_filters: 155 #if $input.is_of_type('sam') or $std_filters or $reg_filters:
153 ## There is no index or we cannot use it because we are 156 ## There is no index or we cannot use it because we are
154 ## not dealing with all of the reads in the indexed 157 ## not dealing with all of the reads in the indexed
155 ## file. We have to do an extra pass over the input to 158 ## file. We have to do an extra pass over the input to
156 ## count the reads to subsample. 159 ## count the reads to subsample.
157 sample_fragment=`samtools view -c $std_filters infile $reg_filters | awk '{s=\$1} END {frac=s/${mode.subsample_config.subsampling_mode.target}; printf("%.8f\n", frac > 1 ? $seed+1/frac : ".0")}'` && 160 sample_fragment=`samtools view -c $std_filters infile $reg_filters | awk '{s=\$1} END {fac=s/${mode.subsample_config.subsampling_mode.target}; printf("%.8f\n", fac > 1 ? 1/fac : 1)}'` &&
158 #else: 161 #else:
159 ## We can get the count of reads to subsample using 162 ## We can get the count of reads to subsample using
160 ## an inexpensive call to idxstats. 163 ## an inexpensive call to idxstats.
161 sample_fragment=`samtools idxstats infile | awk '{s+=\$4+\$3} END {frac=s/${mode.subsample_config.subsampling_mode.target}; printf("%.8f\n", frac > 1 ? $seed+1/frac : ".0")}'` && 164 sample_fragment=`samtools idxstats infile | awk '{s+=\$4+\$3} END {fac=s/${mode.subsample_config.subsampling_mode.target}; printf("%.8f\n", fac > 1 ? 1/fac : 1)}'` &&
162 #end if 165 #end if
163 #end if 166 #end if
164 #end if 167 #end if
165 168
166 ## call samtools view 169 ## call samtools view
168 -@ \$addthreads 171 -@ \$addthreads
169 $fmtopt 172 $fmtopt
170 173
171 ## filter options (except regions filter, which is the last parameter) 174 ## filter options (except regions filter, which is the last parameter)
172 $std_filters 175 $std_filters
173
174 #if $with_subsampling: 176 #if $with_subsampling:
177 --subsample-seed $seed
175 #if str($mode.subsample_config.subsampling_mode.select_subsample) == "target": 178 #if str($mode.subsample_config.subsampling_mode.select_subsample) == "target":
176 ##this is calculated at execution time before the main samtools command 179 ##this is calculated at execution time before the main samtools command
177 -s \${sample_fragment} 180 --subsample \${sample_fragment}
178 #else: 181 #else:
179 #set $fraction = $seed + 1 / float($mode.subsample_config.subsampling_mode.factor) 182 #set $fraction = 1 / float($mode.subsample_config.subsampling_mode.factor)
180 -s $fraction 183 --subsample $fraction
181 #end if 184 #end if
182 #end if 185 #end if
183 186
184 ## output options 187 ## output options
185 #if str($mode.output_options.reads_report_type) == 'count': 188 #if str($mode.output_options.reads_report_type) == 'count':
295 <when value="text"> 298 <when value="text">
296 <param name="readgr" type="text" argument="-r" label="Filter by read group" help="Only output alignments in read group." /> 299 <param name="readgr" type="text" argument="-r" label="Filter by read group" help="Only output alignments in read group." />
297 </when> 300 </when>
298 <when value="file"> 301 <when value="file">
299 <param name="rgfile" type="data" format="tabular" argument="-R" label="Filter by read groups in file" help="Output alignments in read groups listed in FILE." /> 302 <param name="rgfile" type="data" format="tabular" argument="-R" label="Filter by read groups in file" help="Output alignments in read groups listed in FILE." />
303 </when>
304 </conditional>
305 <conditional name="cond_expr">
306 <param name="select_expr" type="select" label="Filter by expression">
307 <option value="no" selected="True">No</option>
308 <option value="yes">Filter using an expression (see manual)</option>
309 </param>
310 <when value="no"/>
311 <when value="yes">
312 <param name="expression" type="text" argument="-e" label="Filter by expression - for example sclen&gt;0 will filter all soft clipped reads" help="See Samtools manual for Filter expression syntax">
313 <sanitizer invalid_char="">
314 <valid initial="string.printable">
315 <remove value=" "/>
316 <remove value="'"/>
317 <remove value='"'/>
318 </valid>
319 </sanitizer>
320 </param>
300 </when> 321 </when>
301 </conditional> 322 </conditional>
302 <param name="quality" type="integer" argument="-q" optional="true" min="0" label="Filter by quality" help="Skip alignments with MAPQ smaller than INT." /> 323 <param name="quality" type="integer" argument="-q" optional="true" min="0" label="Filter by quality" help="Skip alignments with MAPQ smaller than INT." />
303 <param name="library" type="text" argument="-l" optional="true" label="Filter by library" help="Only output alignments in library STR" /> 324 <param name="library" type="text" argument="-l" optional="true" label="Filter by library" help="Only output alignments in library STR" />
304 <param name="cigarcons" type="integer" argument="-m" optional="true" min="0" label="Filter by number of CIGAR bases consuming query sequence" help="Only output alignments with number of CIGAR bases consuming query sequence greater than or equal INT." /> 325 <param name="cigarcons" type="integer" argument="-m" optional="true" min="0" label="Filter by number of CIGAR bases consuming query sequence" help="Only output alignments with number of CIGAR bases consuming query sequence greater than or equal INT." />
396 <data name="outputcnt" format="tabular" from_work_dir="outfile" label="${tool.name} on ${on_string}: Counts"> 417 <data name="outputcnt" format="tabular" from_work_dir="outfile" label="${tool.name} on ${on_string}: Counts">
397 <filter>mode['outtype'] != 'header' and mode['output_options']['reads_report_type'] == 'count'</filter> 418 <filter>mode['outtype'] != 'header' and mode['output_options']['reads_report_type'] == 'count'</filter>
398 </data> 419 </data>
399 </outputs> 420 </outputs>
400 <tests> 421 <tests>
401 <!-- 1) sam to bam (copied from the sam_to_bam tool) --> 422 <!-- 1) sam to bam (copied from the sam_to_bam tool) -->
402 <test> 423 <test expect_num_outputs="1">
403 <param name="input" ftype="sam" value="in_test_1.sam" /> 424 <param name="input" ftype="sam" value="in_test_1.sam" />
404 <output name="outputsam" ftype="bam" file="test_1.bam" lines_diff="4" /> 425 <output name="outputsam" ftype="bam" file="test_1.bam" lines_diff="4" />
405 </test> 426 </test>
406 <!-- 2) --> 427 <!-- 2) -->
407 <test> 428 <test expect_num_outputs="1">
408 <param name="input" ftype="sam" dbkey="equCab2" value="in_test_1.sam" /> 429 <param name="input" ftype="sam" dbkey="equCab2" value="in_test_1.sam" />
409 <conditional name="addref_cond"> 430 <conditional name="addref_cond">
410 <param name="addref_select" value="cached" /> 431 <param name="addref_select" value="cached" />
411 <param name="ref" value="equCab2chrM" /> 432 <param name="ref" value="equCab2chrM" />
412 </conditional> 433 </conditional>
413 <output name="outputsam" ftype="bam" file="test_2.bam" lines_diff="4" /> 434 <output name="outputsam" ftype="bam" file="test_2.bam" lines_diff="4" />
414 </test> 435 </test>
415 <!-- 3) --> 436 <!-- 3) -->
416 <test> 437 <test expect_num_outputs="1">
417 <param name="input" ftype="sam" value="in_test_3.sam" /> 438 <param name="input" ftype="sam" value="in_test_3.sam" />
418 <conditional name="addref_cond"> 439 <conditional name="addref_cond">
419 <param name="addref_select" value="history" /> 440 <param name="addref_select" value="history" />
420 <param name="ref" ftype="fasta" dbkey="equCab2" value="chr_m.fasta" /> 441 <param name="ref" ftype="fasta" dbkey="equCab2" value="chr_m.fasta" />
421 </conditional> 442 </conditional>
422 <output name="outputsam" ftype="bam" file="test_3.bam" lines_diff="4" /> 443 <output name="outputsam" ftype="bam" file="test_3.bam" lines_diff="4" />
423 </test> 444 </test>
424 <!-- 4) cram to bam --> 445 <!-- 4) cram to bam -->
425 <test> 446 <test expect_num_outputs="1">
426 <param name="input" value="in_test_4.cram" ftype="cram" /> 447 <param name="input" value="in_test_4.cram" ftype="cram" />
427 <conditional name="addref_cond"> 448 <conditional name="addref_cond">
428 <param name="addref_select" value="history" /> 449 <param name="addref_select" value="history" />
429 <param name="ref" value="test.fa" /> 450 <param name="ref" value="test.fa" />
430 </conditional> 451 </conditional>
431 <output name="outputsam" file="test_4.bam" ftype="bam" lines_diff="4" /> 452 <output name="outputsam" file="test_4.bam" ftype="bam" lines_diff="4" />
432 </test> 453 </test>
433 <!-- 5) within bam operations expected to result in sorting or not --> 454 <!-- 5) within bam operations expected to result in sorting or not -->
434 <test > 455 <test expect_num_outputs="1">
435 <!-- sorted bam should always result in unmodifed output --> 456 <!-- sorted bam should always result in unmodifed output -->
436 <param name="input" ftype="bam" value="in_test_5.bam" /> 457 <param name="input" ftype="bam" value="in_test_5.bam" />
437 <assert_command> 458 <assert_command>
438 <not_has_text text="samtools sort" /> 459 <not_has_text text="samtools sort" />
439 </assert_command> 460 </assert_command>
440 <output name="outputsam" ftype="bam" file="test_5.bam" lines_diff="2"/> 461 <output name="outputsam" ftype="bam" file="test_5.bam" lines_diff="2"/>
441 </test> 462 </test>
442 <!-- 6) --> 463 <!-- 6) -->
443 <test> 464 <test expect_num_outputs="1">
444 <!-- sorted bam should always result in unmodifed output --> 465 <!-- sorted bam should always result in unmodifed output -->
445 <param name="input" ftype="bam" value="in_test_5.bam" /> 466 <param name="input" ftype="bam" value="in_test_5.bam" />
446 <conditional name="mode"> 467 <conditional name="mode">
447 <conditional name="output_options"> 468 <conditional name="output_options">
448 <conditional name="output_format"> 469 <conditional name="output_format">
454 <not_has_text text="samtools sort" /> 475 <not_has_text text="samtools sort" />
455 </assert_command> 476 </assert_command>
456 <output name="outputsam" ftype="bam" file="test_5.bam" lines_diff="2"/> 477 <output name="outputsam" ftype="bam" file="test_5.bam" lines_diff="2"/>
457 </test> 478 </test>
458 <!-- 7) --> 479 <!-- 7) -->
459 <test> 480 <test expect_num_outputs="1">
460 <!-- qname_sorted.bam should get sorted during "conversion" to bam ... --> 481 <!-- qname_sorted.bam should get sorted during "conversion" to bam ... -->
461 <param name="input" ftype="qname_sorted.bam" value="in_test_7.bam" /> 482 <param name="input" ftype="qname_sorted.bam" value="in_test_7.bam" />
462 <assert_command> 483 <assert_command>
463 <has_text text="samtools sort" /> 484 <has_text text="samtools sort" />
464 </assert_command> 485 </assert_command>
465 <output name="outputsam" ftype="bam" file="test_7.bam" lines_diff="4" /> 486 <output name="outputsam" ftype="bam" file="test_7.bam" lines_diff="4" />
466 </test> 487 </test>
467 <!-- 8) --> 488 <!-- 8) -->
468 <test> 489 <test expect_num_outputs="1">
469 <!-- ... but should be emitted unmodifed when using input format --> 490 <!-- ... but should be emitted unmodifed when using input format -->
470 <param name="input" ftype="qname_sorted.bam" value="in_test_7.bam" /> 491 <param name="input" ftype="qname_sorted.bam" value="in_test_7.bam" />
471 <conditional name="mode"> 492 <conditional name="mode">
472 <conditional name="output_options"> 493 <conditional name="output_options">
473 <conditional name="output_format"> 494 <conditional name="output_format">
479 <not_has_text text="samtools sort" /> 500 <not_has_text text="samtools sort" />
480 </assert_command> 501 </assert_command>
481 <output name="outputsam" ftype="qname_sorted.bam" file="test_8.bam" lines_diff="2"/> 502 <output name="outputsam" ftype="qname_sorted.bam" file="test_8.bam" lines_diff="2"/>
482 </test> 503 </test>
483 <!-- 9) --> 504 <!-- 9) -->
484 <test> 505 <test expect_num_outputs="1">
485 <!-- unsorted.bam should get sorted during "conversion" to bam ... --> 506 <!-- unsorted.bam should get sorted during "conversion" to bam ... -->
486 <param name="input" ftype="unsorted.bam" value="in_test_7.bam" /> 507 <param name="input" ftype="unsorted.bam" value="in_test_7.bam" />
487 <assert_command> 508 <assert_command>
488 <has_text text="samtools sort" /> 509 <has_text text="samtools sort" />
489 </assert_command> 510 </assert_command>
490 <output name="outputsam" ftype="bam" file="test_7.bam" lines_diff="4" /> 511 <output name="outputsam" ftype="bam" file="test_7.bam" lines_diff="4" />
491 </test> 512 </test>
492 <!-- 10) --> 513 <!-- 10) -->
493 <test> 514 <test expect_num_outputs="1">
494 <!-- ... ... but should be emitted unmodifed when using input format --> 515 <!-- ... ... but should be emitted unmodifed when using input format -->
495 <param name="input" ftype="unsorted.bam" value="in_test_7.bam" /> 516 <param name="input" ftype="unsorted.bam" value="in_test_7.bam" />
496 <conditional name="mode"> 517 <conditional name="mode">
497 <conditional name="output_options"> 518 <conditional name="output_options">
498 <conditional name="output_format"> 519 <conditional name="output_format">
504 <not_has_text text="samtools sort" /> 525 <not_has_text text="samtools sort" />
505 </assert_command> 526 </assert_command>
506 <output name="outputsam" ftype="unsorted.bam" file="test_8.bam" lines_diff="2" /> 527 <output name="outputsam" ftype="unsorted.bam" file="test_8.bam" lines_diff="2" />
507 </test> 528 </test>
508 <!-- 11) bam to sam + header options (adapted from bam_to_sam tool)--> 529 <!-- 11) bam to sam + header options (adapted from bam_to_sam tool)-->
509 <test> 530 <test expect_num_outputs="1">
510 <param ftype="bam" name="input" value="in_test_11.bam" /> 531 <param ftype="bam" name="input" value="in_test_11.bam" />
511 <conditional name="mode"> 532 <conditional name="mode">
512 <conditional name="output_options"> 533 <conditional name="output_options">
513 <conditional name="output_format"> 534 <conditional name="output_format">
514 <param name="oformat" value="sam" /> 535 <param name="oformat" value="sam" />
517 </conditional> 538 </conditional>
518 </conditional> 539 </conditional>
519 <output file="test_11.sam" ftype="sam" name="outputsam" lines_diff="2" /> 540 <output file="test_11.sam" ftype="sam" name="outputsam" lines_diff="2" />
520 </test> 541 </test>
521 <!-- 12) --> 542 <!-- 12) -->
522 <test> 543 <test expect_num_outputs="1">
523 <param ftype="bam" name="input" value="in_test_11.bam" /> 544 <param ftype="bam" name="input" value="in_test_11.bam" />
524 <conditional name="mode"> 545 <conditional name="mode">
525 <param name="outtype" value="header" /> 546 <param name="outtype" value="header" />
526 <conditional name="output_options"> 547 <conditional name="output_options">
527 <conditional name="output_format"> 548 <conditional name="output_format">
530 </conditional> 551 </conditional>
531 </conditional> 552 </conditional>
532 <output file="test_12.sam" ftype="sam" name="outputsam" lines_diff="2" /> 553 <output file="test_12.sam" ftype="sam" name="outputsam" lines_diff="2" />
533 </test> 554 </test>
534 <!-- 13) --> 555 <!-- 13) -->
535 <test> 556 <test expect_num_outputs="1">
536 <param ftype="bam" name="input" value="in_test_11.bam" /> 557 <param ftype="bam" name="input" value="in_test_11.bam" />
537 <conditional name="mode"> 558 <conditional name="mode">
538 <conditional name="output_options"> 559 <conditional name="output_options">
539 <conditional name="output_format"> 560 <conditional name="output_format">
540 <param name="oformat" value="sam" /> 561 <param name="oformat" value="sam" />
543 </conditional> 564 </conditional>
544 </conditional> 565 </conditional>
545 <output file="test_13.sam" ftype="sam" name="outputsam" lines_diff="2" /> 566 <output file="test_13.sam" ftype="sam" name="outputsam" lines_diff="2" />
546 </test> 567 </test>
547 <!-- 14) count alignments --> 568 <!-- 14) count alignments -->
548 <test> 569 <test expect_num_outputs="1">
549 <param name="input" value="in_test_14.bam" ftype="bam" /> 570 <param name="input" value="in_test_14.bam" ftype="bam" />
550 <conditional name="mode"> 571 <conditional name="mode">
551 <param name="outtype" value="all_reads" /> 572 <param name="outtype" value="all_reads" />
552 <conditional name="output_options"> 573 <conditional name="output_options">
553 <param name="reads_report_type" value="count" /> 574 <param name="reads_report_type" value="count" />
554 </conditional> 575 </conditional>
555 </conditional> 576 </conditional>
556 <output name="outputcnt" file="test_14.tab" ftype="tabular" lines_diff="2" /> 577 <output name="outputcnt" file="test_14.tab" ftype="tabular" lines_diff="2" />
557 </test> 578 </test>
558 <!-- 15) region filters --> 579 <!-- 15) region filters -->
559 <test> 580 <test expect_num_outputs="1">
560 <param name="input" value="in_test_15.sam" ftype="sam" /> 581 <param name="input" value="in_test_15.sam" ftype="sam" />
561 <conditional name="mode"> 582 <conditional name="mode">
562 <param name="outtype" value="selected_reads" /> 583 <param name="outtype" value="selected_reads" />
563 <section name="filter_config"> 584 <section name="filter_config">
564 <conditional name="cond_region"> 585 <conditional name="cond_region">
573 </conditional> 594 </conditional>
574 <conditional name="addref_cond"> 595 <conditional name="addref_cond">
575 <param name="addref_select" value="history" /> 596 <param name="addref_select" value="history" />
576 <param name="ref" value="test.fa" /> 597 <param name="ref" value="test.fa" />
577 </conditional> 598 </conditional>
578 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" /> 599 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="500" />
579 </test> 600 </test>
580 <!-- 16) --> 601 <!-- 16) -->
581 <test> 602 <test expect_num_outputs="1">
582 <param name="input" value="in_test_14.bam" ftype="bam" /> 603 <param name="input" value="in_test_14.bam" ftype="bam" />
583 <conditional name="mode"> 604 <conditional name="mode">
584 <param name="outtype" value="selected_reads" /> 605 <param name="outtype" value="selected_reads" />
585 <section name="filter_config"> 606 <section name="filter_config">
586 <conditional name="cond_region"> 607 <conditional name="cond_region">
598 <param name="ref" value="test.fa" /> 619 <param name="ref" value="test.fa" />
599 </conditional> 620 </conditional>
600 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" /> 621 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" />
601 </test> 622 </test>
602 <!-- 17) --> 623 <!-- 17) -->
603 <test> 624 <test expect_num_outputs="1">
604 <param name="input" value="in_test_17.cram" dbkey="equCab2" ftype="cram" /> 625 <param name="input" value="in_test_17.cram" dbkey="equCab2" ftype="cram" />
605 <conditional name="mode"> 626 <conditional name="mode">
606 <param name="outtype" value="selected_reads" /> 627 <param name="outtype" value="selected_reads" />
607 <section name="filter_config"> 628 <section name="filter_config">
608 <conditional name="cond_region"> 629 <conditional name="cond_region">
620 <param name="ref" value="equCab2chrM" /> 641 <param name="ref" value="equCab2chrM" />
621 </conditional> 642 </conditional>
622 <output name="outputsam" file="test_17.bam" ftype="bam" lines_diff="4" /> 643 <output name="outputsam" file="test_17.bam" ftype="bam" lines_diff="4" />
623 </test> 644 </test>
624 <!-- 18) --> 645 <!-- 18) -->
625 <test> 646 <test expect_num_outputs="1">
626 <param name="input" value="in_test_14.bam" ftype="bam" /> 647 <param name="input" value="in_test_14.bam" ftype="bam" />
627 <conditional name="mode"> 648 <conditional name="mode">
628 <param name="outtype" value="selected_reads" /> 649 <param name="outtype" value="selected_reads" />
629 <section name="filter_config"> 650 <section name="filter_config">
630 <conditional name="cond_region"> 651 <conditional name="cond_region">
643 <param name="ref" value="test.fa" /> 664 <param name="ref" value="test.fa" />
644 </conditional> 665 </conditional>
645 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" /> 666 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" />
646 </test> 667 </test>
647 <!-- 19) --> 668 <!-- 19) -->
648 <test> 669 <test expect_num_outputs="1">
649 <param name="input" value="test_15.cram" ftype="cram" /> 670 <param name="input" value="test_15.cram" ftype="cram" />
650 <conditional name="mode"> 671 <conditional name="mode">
651 <param name="outtype" value="selected_reads" /> 672 <param name="outtype" value="selected_reads" />
652 <section name="filter_config"> 673 <section name="filter_config">
653 <conditional name="cond_region"> 674 <conditional name="cond_region">
666 <param name="ref" value="test.fa" /> 687 <param name="ref" value="test.fa" />
667 </conditional> 688 </conditional>
668 <output name="outputsam" file="test_19.bam" ftype="bam" lines_diff="4"/> 689 <output name="outputsam" file="test_19.bam" ftype="bam" lines_diff="4"/>
669 </test> 690 </test>
670 <!-- 20) --> 691 <!-- 20) -->
671 <test> 692 <test expect_num_outputs="1">
672 <param name="input" value="test_15.cram" ftype="cram" /> 693 <param name="input" value="test_15.cram" ftype="cram" />
673 <conditional name="mode"> 694 <conditional name="mode">
674 <param name="outtype" value="selected_reads" /> 695 <param name="outtype" value="selected_reads" />
675 <section name="filter_config"> 696 <section name="filter_config">
676 <conditional name="cond_region"> 697 <conditional name="cond_region">
689 <param name="ref" value="test.fa" /> 710 <param name="ref" value="test.fa" />
690 </conditional> 711 </conditional>
691 <output name="outputsam" file="test_20.bam" ftype="bam" lines_diff="4" /> 712 <output name="outputsam" file="test_20.bam" ftype="bam" lines_diff="4" />
692 </test> 713 </test>
693 <!-- 21) sampling options target < total reads --> 714 <!-- 21) sampling options target < total reads -->
694 <test> 715 <test expect_num_outputs="1">
695 <param name="input" value="in_test_15.sam" ftype="sam" /> 716 <param name="input" value="in_test_15.sam" ftype="sam" />
696 <conditional name="mode"> 717 <conditional name="mode">
697 <param name="outtype" value="selected_reads" /> 718 <param name="outtype" value="selected_reads" />
698 <section name="subsample_config"> 719 <section name="subsample_config">
699 <conditional name="subsampling_mode"> 720 <conditional name="subsampling_mode">
708 </conditional> 729 </conditional>
709 </conditional> 730 </conditional>
710 <output name="outputsam" file="test_21.sam" ftype="sam" compare="diff" lines_diff="10" /> 731 <output name="outputsam" file="test_21.sam" ftype="sam" compare="diff" lines_diff="10" />
711 </test> 732 </test>
712 <!-- 22) target > total reads --> 733 <!-- 22) target > total reads -->
713 <test> 734 <test expect_num_outputs="1">
714 <param name="input" value="in_test_15.sam" ftype="sam" /> 735 <param name="input" value="in_test_15.sam" ftype="sam" />
715 <conditional name="mode"> 736 <conditional name="mode">
716 <param name="outtype" value="selected_reads" /> 737 <param name="outtype" value="selected_reads" />
717 <section name="subsample_config"> 738 <section name="subsample_config">
718 <conditional name="subsampling_mode"> 739 <conditional name="subsampling_mode">
727 </conditional> 748 </conditional>
728 </conditional> 749 </conditional>
729 <output name="outputsam" file="test_22.sam" ftype="sam" lines_diff="2"/> 750 <output name="outputsam" file="test_22.sam" ftype="sam" lines_diff="2"/>
730 </test> 751 </test>
731 <!-- 23) --> 752 <!-- 23) -->
732 <test> 753 <test expect_num_outputs="1">
733 <!-- subsampling SAM input without reads --> 754 <!-- subsampling SAM input without reads -->
734 <param name="input" value="in_test_23.sam" ftype="sam" /> 755 <param name="input" value="in_test_23.sam" ftype="sam" />
735 <conditional name="mode"> 756 <conditional name="mode">
736 <param name="outtype" value="selected_reads" /> 757 <param name="outtype" value="selected_reads" />
737 <section name="subsample_config"> 758 <section name="subsample_config">
747 </conditional> 768 </conditional>
748 </conditional> 769 </conditional>
749 <output name="outputsam" file="test_23.sam" ftype="sam" lines_diff="2"/> 770 <output name="outputsam" file="test_23.sam" ftype="sam" lines_diff="2"/>
750 </test> 771 </test>
751 <!-- 24) --> 772 <!-- 24) -->
752 <test> 773 <test expect_num_outputs="1">
753 <!-- subsampling BAM input without reads --> 774 <!-- subsampling BAM input without reads -->
754 <param name="input" value="in_test_24.bam" ftype="bam" /> 775 <param name="input" value="in_test_24.bam" ftype="bam" />
755 <conditional name="mode"> 776 <conditional name="mode">
756 <param name="outtype" value="selected_reads" /> 777 <param name="outtype" value="selected_reads" />
757 <section name="subsample_config"> 778 <section name="subsample_config">
767 </conditional> 788 </conditional>
768 </conditional> 789 </conditional>
769 <output name="outputsam" file="test_24.bam" ftype="bam" lines_diff="2" /> 790 <output name="outputsam" file="test_24.bam" ftype="bam" lines_diff="2" />
770 </test> 791 </test>
771 <!-- 25) --> 792 <!-- 25) -->
772 <test> 793 <test expect_num_outputs="1">
773 <param name="input" value="in_test_15.sam" ftype="sam" /> 794 <param name="input" value="in_test_15.sam" ftype="sam" />
774 <conditional name="mode"> 795 <conditional name="mode">
775 <param name="outtype" value="selected_reads" /> 796 <param name="outtype" value="selected_reads" />
776 <section name="subsample_config"> 797 <section name="subsample_config">
777 <conditional name="subsampling_mode"> 798 <conditional name="subsampling_mode">
787 </conditional> 808 </conditional>
788 </conditional> 809 </conditional>
789 <output name="outputsam" file="test_25.sam" ftype="sam" compare="diff" lines_diff="2" /> 810 <output name="outputsam" file="test_25.sam" ftype="sam" compare="diff" lines_diff="2" />
790 </test> 811 </test>
791 <!-- 26) --> 812 <!-- 26) -->
792 <test> 813 <test expect_num_outputs="1">
793 <param name="input" value="in_test_14.bam" ftype="bam" /> 814 <param name="input" value="in_test_14.bam" ftype="bam" />
794 <conditional name="mode"> 815 <conditional name="mode">
795 <param name="outtype" value="selected_reads" /> 816 <param name="outtype" value="selected_reads" />
796 <section name="subsample_config"> 817 <section name="subsample_config">
797 <conditional name="subsampling_mode"> 818 <conditional name="subsampling_mode">
807 </conditional> 828 </conditional>
808 </conditional> 829 </conditional>
809 <output name="outputsam" file="test_26.bam" ftype="bam" lines_diff="2" /> 830 <output name="outputsam" file="test_26.bam" ftype="bam" lines_diff="2" />
810 </test> 831 </test>
811 <!-- 27) --> 832 <!-- 27) -->
812 <test> 833 <test expect_num_outputs="1">
813 <param name="input" value="in_test_14.bam" ftype="bam" /> 834 <param name="input" value="in_test_14.bam" ftype="bam" />
814 <conditional name="mode"> 835 <conditional name="mode">
815 <param name="outtype" value="selected_reads" /> 836 <param name="outtype" value="selected_reads" />
816 <section name="subsample_config"> 837 <section name="subsample_config">
817 <conditional name="subsampling_mode"> 838 <conditional name="subsampling_mode">
827 </conditional> 848 </conditional>
828 </conditional> 849 </conditional>
829 <output name="outputsam" file="test_27.bam" ftype="bam" lines_diff="2"/> 850 <output name="outputsam" file="test_27.bam" ftype="bam" lines_diff="2"/>
830 </test> 851 </test>
831 <!-- 28) --> 852 <!-- 28) -->
832 <test> 853 <test expect_num_outputs="1">
833 <param name="input" value="in_test_14.bam" ftype="bam" /> 854 <param name="input" value="in_test_14.bam" ftype="bam" />
834 <conditional name="mode"> 855 <conditional name="mode">
835 <param name="outtype" value="selected_reads" /> 856 <param name="outtype" value="selected_reads" />
836 <section name="subsample_config"> 857 <section name="subsample_config">
837 <conditional name="subsampling_mode"> 858 <conditional name="subsampling_mode">
847 </conditional> 868 </conditional>
848 </conditional> 869 </conditional>
849 <output name="outputsam" file="test_28.bam" ftype="bam" lines_diff="2" /> 870 <output name="outputsam" file="test_28.bam" ftype="bam" lines_diff="2" />
850 </test> 871 </test>
851 <!-- 29) --> 872 <!-- 29) -->
852 <test> 873 <test expect_num_outputs="1">
853 <param name="input" value="in_test_14.bam" ftype="bam" /> 874 <param name="input" value="in_test_14.bam" ftype="bam" />
854 <conditional name="mode"> 875 <conditional name="mode">
855 <param name="outtype" value="selected_reads" /> 876 <param name="outtype" value="selected_reads" />
856 <section name="subsample_config"> 877 <section name="subsample_config">
857 <conditional name="subsampling_mode"> 878 <conditional name="subsampling_mode">
868 </conditional> 889 </conditional>
869 </conditional> 890 </conditional>
870 <output name="outputsam" file="test_29.bam" ftype="bam" lines_diff="2"/> 891 <output name="outputsam" file="test_29.bam" ftype="bam" lines_diff="2"/>
871 </test> 892 </test>
872 <!-- 30) testing tag filtering --> 893 <!-- 30) testing tag filtering -->
873 <test> 894 <test expect_num_outputs="1">
874 <param name="input" value="in_test_30.bam" ftype="bam" /> 895 <param name="input" value="in_test_30.bam" ftype="bam" />
875 <conditional name="mode"> 896 <conditional name="mode">
876 <param name="outtype" value="selected_reads" /> 897 <param name="outtype" value="selected_reads" />
877 <section name="filter_config"> 898 <section name="filter_config">
878 <param name="tag" value="XS:-18" /> 899 <param name="tag" value="XS:-18" />
887 <has_text text="--tag 'XS:-18'"/> 908 <has_text text="--tag 'XS:-18'"/>
888 </assert_command> 909 </assert_command>
889 <output name="outputsam" file="test_30.bam" ftype="bam" lines_diff="2" /> 910 <output name="outputsam" file="test_30.bam" ftype="bam" lines_diff="2" />
890 </test> 911 </test>
891 <!-- 31) testing readname filtering --> 912 <!-- 31) testing readname filtering -->
892 <test> 913 <test expect_num_outputs="1">
893 <param name="input" value="in_test_30.bam" ftype="bam" /> 914 <param name="input" value="in_test_30.bam" ftype="bam" />
894 <conditional name="mode"> 915 <conditional name="mode">
895 <param name="outtype" value="selected_reads" /> 916 <param name="outtype" value="selected_reads" />
896 <section name="filter_config"> 917 <section name="filter_config">
897 <param name="qname_file" value="readnames.txt" /> 918 <param name="qname_file" value="readnames.txt" />
904 </conditional> 925 </conditional>
905 <assert_command> 926 <assert_command>
906 <has_text text="--qname-file"/> 927 <has_text text="--qname-file"/>
907 </assert_command> 928 </assert_command>
908 <output name="outputsam" file="test_31.bam" ftype="bam" lines_diff="2" /> 929 <output name="outputsam" file="test_31.bam" ftype="bam" lines_diff="2" />
930 </test>
931 <!-- 32) testing expression filters -->
932 <test expect_num_outputs="1">
933 <param name="input" value="in_test_30.bam" ftype="bam"/>
934 <conditional name="mode">
935 <param name="outtype" value="selected_reads" />
936 <section name="filter_config">
937 <conditional name="cond_expr">
938 <param name="select_expr" value="yes"/>
939 <param name="expression" value="sclen>0"/>
940 </conditional>
941 </section>
942 <conditional name="output_options">
943 <conditional name="output_format">
944 <param name="oformat" value="bam" />
945 </conditional>
946 </conditional>
947 </conditional>
948 <assert_command>
949 <has_text text="-e 'sclen>0'"/>
950 </assert_command>
951 <output name="outputsam" file="test_32.bam" ftype="bam" lines_diff="2" />
952 </test>
953 <!-- 33) testing expression filters -->
954 <test expect_num_outputs="1">
955 <param name="input" value="in_test_30.bam" ftype="bam"/>
956 <conditional name="mode">
957 <param name="outtype" value="selected_reads" />
958 <section name="filter_config">
959 <conditional name="cond_expr">
960 <param name="select_expr" value="yes"/>
961 <param name="expression" value='rname!="chr13"'/>
962 </conditional>
963 </section>
964 <conditional name="output_options">
965 <conditional name="output_format">
966 <param name="oformat" value="bam" />
967 </conditional>
968 </conditional>
969 </conditional>
970 <assert_command>
971 <has_text text="-e 'rname!="/>
972 </assert_command>
973 <output name="outputsam" file="test_33.bam" ftype="bam" lines_diff="2" />
909 </test> 974 </test>
910 </tests> 975 </tests>
911 <help> 976 <help>
912 **What it does** 977 **What it does**
913 978
988 1053
989 **Filtering by quality** 1054 **Filtering by quality**
990 1055
991 This filters based on the MAPQ column of the SAM format which gives an estimate about the correct placement of the alignment. Note that aligners do not follow a consistent definition. 1056 This filters based on the MAPQ column of the SAM format which gives an estimate about the correct placement of the alignment. Note that aligners do not follow a consistent definition.
992 1057
993 ## Filtering by Tag ** 1058 **Filtering by Tag**
994 1059
995 This filter allows to select reads based on tool or user specific tags, e.g., XS:i:-18 the alignment score tag of bowtie. 1060 This filter allows to select reads based on tool or user specific tags, e.g., XS:i:-18 the alignment score tag of bowtie.
996 Thus to filter for a specific value of the tag you need the format STR1:STR2, e.g., XS:-18 to filter reads with an aligment score of -18. 1061 Thus to filter for a specific value of the tag you need the format STR1:STR2, e.g., XS:-18 to filter reads with an aligment score of -18.
997 You can also just write STR1 without the value STR2 hence the filter selects all reads with the tag STR1, e.g., XS. 1062 You can also just write STR1 without the value STR2 hence the filter selects all reads with the tag STR1, e.g., XS.
998 1063
1064 **Filtering by Expression**
1065
1066
1067 Filter expressions are used as an on-the-fly checking of incoming SAM, BAM or CRAM records, discarding records that do not match the specified expression.
1068
1069 The language used is primarily C style, but with a few differences in the precedence rules for bit operators and the inclusion of regular expression
1070 matching.
1071
1072 The operator precedence, from strongest binding to weakest, is
1073
1074 ::
1075
1076 Grouping (, ) E.g. &quot;(1+2)&#42;3&quot;
1077 Values: literals, vars Numbers, strings and variables
1078 Unary ops: +, -, !, ~ E.g. -10 +10, !10 (not), ~5 (bit not)
1079 Math ops: \*, /, % Multiply, division and (integer) modulo
1080 Math ops: +, - Addition / subtraction
1081 Bit-wise: &amp; Integer AND
1082 Bit-wise ^ Integer XOR
1083 Bit-wise | Integer OR
1084 Conditionals: &gt;, &gt;=, &lt;, &lt;=
1085 Equality: \=\=, !=, =~, !~ =~ and !~ match regular expressions
1086 Boolean: &amp;&amp;, || Logical AND / OR
1087
1088
1089 Expressions are computed using floating point mathematics, so &quot;10 / 4&quot; evaluates to 2.5 rather than 2. They may be written as integers in decimal or
1090 &quot;0x&quot; plus hexadecimal, and floating point with or without exponents.However operations that require integers first do an implicit type conversion, so
1091 &quot;7.9 % 5&quot; is 2 and &quot;7.9 &amp; 4.1&quot; is equivalent to &quot;7 &amp; 4&quot;, which is 4. Strings are always specified using double quotes. To get a double quote in a
1092 string, use backslash. Similarly a double backslash is used to get a literal backslash. For example ab\&quot;c\\d is the string ab&quot;c\d.
1093
1094 Comparison operators are evaluated as a match being 1 and a mismatch being 0, thus &quot;(2 &gt; 1) + (3 &lt; 5)&quot; evaluates as 2. All comparisons involving undefined (null) values are deemed to be false.
1095
1096 The variables are where the file format specifics are accessed from the expression. The variables correspond to SAM fields, for example to find paired
1097 alignments with high mapping quality and a very large insert size, we may use the expression &quot;mapq &gt;= 30 &amp;&amp; (tlen &gt;= 100000 || tlen &lt;= -100000)&quot;. Valid
1098 variable names and their data types are:
1099
1100 ::
1101
1102 endpos int Alignment end position (1-based)
1103 flag int Combined FLAG field
1104 flag.paired int Single bit, 0 or 1
1105 flag.proper_pair int Single bit, 0 or 2
1106 flag.unmap int Single bit, 0 or 4
1107 flag.munmap int Single bit, 0 or 8
1108 flag.reverse int Single bit, 0 or 16
1109 flag.mreverse int Single bit, 0 or 32
1110 flag.read1 int Single bit, 0 or 64
1111 flag.read2 int Single bit, 0 or 128
1112 flag.secondary int Single bit, 0 or 256
1113 flag.qcfail int Single bit, 0 or 512
1114 flag.dup int Single bit, 0 or 1024
1115 flag.supplementary int Single bit, 0 or 2048
1116 hclen int Number of hard-clipped bases
1117 library string Library (LB header via RG)
1118 mapq int Mapping quality
1119 mpos int Synonym for pnext
1120 mrefid int Mate reference number (0 based)
1121 mrname string Synonym for rnext
1122 ncigar int Number of cigar operations
1123 pnext int Mate's alignment position (1-based)
1124 pos int Alignment position (1-based)
1125 qlen int Alignment length: no. query bases
1126 qname string Query name
1127 qual string Quality values (raw, 0 based)
1128 refid int Integer reference number (0 based)
1129 rlen int Alignment length: no. reference bases
1130 rname string Reference name
1131 rnext string Mate's reference name
1132 sclen int Number of soft-clipped bases
1133 seq string Sequence
1134 tlen int Template length (insert size)
1135 [XX] int / string XX tag value
1136
1137
1138 Flags are returned either as the whole flag value or by checking for a single bit. Hence the filter expression flag.dup is equivalent to flag &amp; 1024.
1139
1140 &quot;qlen&quot; and &quot;rlen&quot; are measured using the CIGAR string to count the number of query (sequence) and reference bases consumed. Note &quot;qlen&quot; may not exactly
1141 match the length of the &quot;seq&quot; field if the sequence is &quot;&#42;&quot;.
1142
1143 &quot;sclen&quot; and &quot;hclen&quot; are the number of soft and hard-clipped bases respectively. The formula &quot;qlen-sclen&quot; gives the number of sequence bases used in the
1144 alignment, distinguishing between global alignment and local alignment length.
1145
1146 &quot;endpos&quot; is the (1-based inclusive) position of the rightmost mapped base of the read, as measured using the CIGAR string, and for mapped reads is
1147 equivalent to &quot;pos+rlen-1&quot;. For unmapped reads, it is the same as &quot;pos&quot;.
1148
1149 Reference names may be matched either by their string forms (&quot;rname&quot; and &quot;mrname&quot;) or as the Nth @SQ line (counting from zero) as stored in BAM using
1150 &quot;tid&quot; and &quot;mtid&quot; respectively.
1151
1152 Auxiliary tags are described in square brackets and these expand to either integer or string as defined by the tag itself (XX:Z:string or XX:i:int).
1153 For example [NM]&gt;=10 can be used to look for alignments with many mismatches and [RG]=~&quot;grp[ABC]-&quot; will match the read-group string.
1154
1155 If no comparison is used with an auxiliary tag it is taken simply to be a test for the existence of that tag. So [NM] will return any record containing
1156 an NM tag, even if that tag is zero (NM:i:0). In htslib &lt;= 1.15 negating this with ![NM] gave misleading results as it was true if the tag did not exist
1157 or did exist but was zero. Now this is strictly does-not-exist. An explicit exists([NM]) and !exists([NM]) function has also been added to make
1158 this intention clear.
1159
1160 Similarly in htslib &lt;= 1.15 using [NM]!=0 was true both when the tag existed and was not zero as well as when the tag did not exist. From 1.16 onwards
1161 all comparison operators are only true for tags that exist, so [NM]!=0 works as expected.
1162
1163 Some simple functions are available to operate on strings. These treat the strings as arrays of bytes, permitting their length, minimum, maximum and
1164 average values to be computed. These are useful for processing Quality Scores.
1165
1166 ::
1167
1168 length(x) Length of the string (excluding nul char)
1169 min(x) Minimum byte value in the string
1170 max(x) Maximum byte value in the string
1171 avg(x) Average byte value in the string
1172
1173
1174 Note that &quot;avg&quot; is a floating point value and it may be NAN for empty strings. This means that &quot;avg(qual)&quot; does not produce an error for records that
1175 have both seq and qual of &quot;&#42;&quot;. NAN values will fail any conditional checks, so e.g. &quot;avg(qual) &gt; 20&quot; works and will not report these records. NAN also
1176 fails all equality, &lt; and &gt; comparisons, and returns zero when given as an argument to the exists function. It can be negated with !x in which case it
1177 becomes true.
1178
1179 Functions that operate on both strings and numerics:
1180
1181 ::
1182
1183 exists(x) True if the value exists (or is explicitly true).
1184 default(x,d) Value x if it exists or d if not.
1185
1186 Functions that apply only to numeric values:
1187
1188 ::
1189
1190 qrt(x) Square root of x
1191 og(x) Natural logarithm of x
1192 ow(x, y) Power function, x to the power of y
1193 xp(x) Base-e exponential, equivalent to pow(e,x)
1194
999 </help> 1195 </help>
1000 <expand macro="citations"/> 1196 <expand macro="citations"/>
1001 </tool> 1197 </tool>