Mercurial > repos > iuc > samtools_view
comparison samtools_view.xml @ 15:e41d3ce2ab9f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_view commit e3de8bc1123bf4ce56818f2b7ad4b53080cb3bd8
| author | iuc |
|---|---|
| date | Fri, 30 Aug 2024 10:24:13 +0000 |
| parents | e63aab0f18c6 |
| children | 17c2bd677389 |
comparison
equal
deleted
inserted
replaced
| 14:e63aab0f18c6 | 15:e41d3ce2ab9f |
|---|---|
| 1 <tool id="samtools_view" name="Samtools view" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> | 1 <tool id="samtools_view" name="Samtools view" version="@TOOL_VERSION@+galaxy3" profile="@PROFILE@"> |
| 2 <description>- reformat, filter, or subsample SAM, BAM or CRAM</description> | 2 <description>- reformat, filter, or subsample SAM, BAM or CRAM</description> |
| 3 <macros> | 3 <macros> |
| 4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
| 5 <token name="@REF_DATA@"> | 5 <token name="@REF_DATA@"> |
| 6 ## additional reference data | 6 ## additional reference data |
| 134 #set $std_filters = $std_filters + " --tag '%s'" % $mode.filter_config.tag | 134 #set $std_filters = $std_filters + " --tag '%s'" % $mode.filter_config.tag |
| 135 #end if | 135 #end if |
| 136 #if $mode.filter_config.qname_file: | 136 #if $mode.filter_config.qname_file: |
| 137 #set std_filters = $std_filters + " --qname-file '%s'" % $mode.filter_config.qname_file | 137 #set std_filters = $std_filters + " --qname-file '%s'" % $mode.filter_config.qname_file |
| 138 #end if | 138 #end if |
| 139 #if str($cond_expr.select_expr) == "yes": | |
| 140 #set std_filters = $std_filters + " -e '%s'" % $cond_expr.expression | |
| 141 #end if | |
| 139 #end if | 142 #end if |
| 140 | 143 |
| 141 #if $with_subsampling: | 144 #if $with_subsampling: |
| 142 ## handle seed and fraction calculation for subsampling | 145 ## handle seed and fraction calculation for subsampling |
| 143 #import random | 146 #import random |
| 152 #if $input.is_of_type('sam') or $std_filters or $reg_filters: | 155 #if $input.is_of_type('sam') or $std_filters or $reg_filters: |
| 153 ## There is no index or we cannot use it because we are | 156 ## There is no index or we cannot use it because we are |
| 154 ## not dealing with all of the reads in the indexed | 157 ## not dealing with all of the reads in the indexed |
| 155 ## file. We have to do an extra pass over the input to | 158 ## file. We have to do an extra pass over the input to |
| 156 ## count the reads to subsample. | 159 ## count the reads to subsample. |
| 157 sample_fragment=`samtools view -c $std_filters infile $reg_filters | awk '{s=\$1} END {frac=s/${mode.subsample_config.subsampling_mode.target}; printf("%.8f\n", frac > 1 ? $seed+1/frac : ".0")}'` && | 160 sample_fragment=`samtools view -c $std_filters infile $reg_filters | awk '{s=\$1} END {fac=s/${mode.subsample_config.subsampling_mode.target}; printf("%.8f\n", fac > 1 ? 1/fac : 1)}'` && |
| 158 #else: | 161 #else: |
| 159 ## We can get the count of reads to subsample using | 162 ## We can get the count of reads to subsample using |
| 160 ## an inexpensive call to idxstats. | 163 ## an inexpensive call to idxstats. |
| 161 sample_fragment=`samtools idxstats infile | awk '{s+=\$4+\$3} END {frac=s/${mode.subsample_config.subsampling_mode.target}; printf("%.8f\n", frac > 1 ? $seed+1/frac : ".0")}'` && | 164 sample_fragment=`samtools idxstats infile | awk '{s+=\$4+\$3} END {fac=s/${mode.subsample_config.subsampling_mode.target}; printf("%.8f\n", fac > 1 ? 1/fac : 1)}'` && |
| 162 #end if | 165 #end if |
| 163 #end if | 166 #end if |
| 164 #end if | 167 #end if |
| 165 | 168 |
| 166 ## call samtools view | 169 ## call samtools view |
| 168 -@ \$addthreads | 171 -@ \$addthreads |
| 169 $fmtopt | 172 $fmtopt |
| 170 | 173 |
| 171 ## filter options (except regions filter, which is the last parameter) | 174 ## filter options (except regions filter, which is the last parameter) |
| 172 $std_filters | 175 $std_filters |
| 173 | |
| 174 #if $with_subsampling: | 176 #if $with_subsampling: |
| 177 --subsample-seed $seed | |
| 175 #if str($mode.subsample_config.subsampling_mode.select_subsample) == "target": | 178 #if str($mode.subsample_config.subsampling_mode.select_subsample) == "target": |
| 176 ##this is calculated at execution time before the main samtools command | 179 ##this is calculated at execution time before the main samtools command |
| 177 -s \${sample_fragment} | 180 --subsample \${sample_fragment} |
| 178 #else: | 181 #else: |
| 179 #set $fraction = $seed + 1 / float($mode.subsample_config.subsampling_mode.factor) | 182 #set $fraction = 1 / float($mode.subsample_config.subsampling_mode.factor) |
| 180 -s $fraction | 183 --subsample $fraction |
| 181 #end if | 184 #end if |
| 182 #end if | 185 #end if |
| 183 | 186 |
| 184 ## output options | 187 ## output options |
| 185 #if str($mode.output_options.reads_report_type) == 'count': | 188 #if str($mode.output_options.reads_report_type) == 'count': |
| 295 <when value="text"> | 298 <when value="text"> |
| 296 <param name="readgr" type="text" argument="-r" label="Filter by read group" help="Only output alignments in read group." /> | 299 <param name="readgr" type="text" argument="-r" label="Filter by read group" help="Only output alignments in read group." /> |
| 297 </when> | 300 </when> |
| 298 <when value="file"> | 301 <when value="file"> |
| 299 <param name="rgfile" type="data" format="tabular" argument="-R" label="Filter by read groups in file" help="Output alignments in read groups listed in FILE." /> | 302 <param name="rgfile" type="data" format="tabular" argument="-R" label="Filter by read groups in file" help="Output alignments in read groups listed in FILE." /> |
| 303 </when> | |
| 304 </conditional> | |
| 305 <conditional name="cond_expr"> | |
| 306 <param name="select_expr" type="select" label="Filter by expression"> | |
| 307 <option value="no" selected="True">No</option> | |
| 308 <option value="yes">Filter using an expression (see manual)</option> | |
| 309 </param> | |
| 310 <when value="no"/> | |
| 311 <when value="yes"> | |
| 312 <param name="expression" type="text" argument="-e" label="Filter by expression - for example sclen>0 will filter all soft clipped reads" help="See Samtools manual for Filter expression syntax"> | |
| 313 <sanitizer invalid_char=""> | |
| 314 <valid initial="string.printable"> | |
| 315 <remove value=" "/> | |
| 316 <remove value="'"/> | |
| 317 <remove value='"'/> | |
| 318 </valid> | |
| 319 </sanitizer> | |
| 320 </param> | |
| 300 </when> | 321 </when> |
| 301 </conditional> | 322 </conditional> |
| 302 <param name="quality" type="integer" argument="-q" optional="true" min="0" label="Filter by quality" help="Skip alignments with MAPQ smaller than INT." /> | 323 <param name="quality" type="integer" argument="-q" optional="true" min="0" label="Filter by quality" help="Skip alignments with MAPQ smaller than INT." /> |
| 303 <param name="library" type="text" argument="-l" optional="true" label="Filter by library" help="Only output alignments in library STR" /> | 324 <param name="library" type="text" argument="-l" optional="true" label="Filter by library" help="Only output alignments in library STR" /> |
| 304 <param name="cigarcons" type="integer" argument="-m" optional="true" min="0" label="Filter by number of CIGAR bases consuming query sequence" help="Only output alignments with number of CIGAR bases consuming query sequence greater than or equal INT." /> | 325 <param name="cigarcons" type="integer" argument="-m" optional="true" min="0" label="Filter by number of CIGAR bases consuming query sequence" help="Only output alignments with number of CIGAR bases consuming query sequence greater than or equal INT." /> |
| 396 <data name="outputcnt" format="tabular" from_work_dir="outfile" label="${tool.name} on ${on_string}: Counts"> | 417 <data name="outputcnt" format="tabular" from_work_dir="outfile" label="${tool.name} on ${on_string}: Counts"> |
| 397 <filter>mode['outtype'] != 'header' and mode['output_options']['reads_report_type'] == 'count'</filter> | 418 <filter>mode['outtype'] != 'header' and mode['output_options']['reads_report_type'] == 'count'</filter> |
| 398 </data> | 419 </data> |
| 399 </outputs> | 420 </outputs> |
| 400 <tests> | 421 <tests> |
| 401 <!-- 1) sam to bam (copied from the sam_to_bam tool) --> | 422 <!-- 1) sam to bam (copied from the sam_to_bam tool) --> |
| 402 <test> | 423 <test expect_num_outputs="1"> |
| 403 <param name="input" ftype="sam" value="in_test_1.sam" /> | 424 <param name="input" ftype="sam" value="in_test_1.sam" /> |
| 404 <output name="outputsam" ftype="bam" file="test_1.bam" lines_diff="4" /> | 425 <output name="outputsam" ftype="bam" file="test_1.bam" lines_diff="4" /> |
| 405 </test> | 426 </test> |
| 406 <!-- 2) --> | 427 <!-- 2) --> |
| 407 <test> | 428 <test expect_num_outputs="1"> |
| 408 <param name="input" ftype="sam" dbkey="equCab2" value="in_test_1.sam" /> | 429 <param name="input" ftype="sam" dbkey="equCab2" value="in_test_1.sam" /> |
| 409 <conditional name="addref_cond"> | 430 <conditional name="addref_cond"> |
| 410 <param name="addref_select" value="cached" /> | 431 <param name="addref_select" value="cached" /> |
| 411 <param name="ref" value="equCab2chrM" /> | 432 <param name="ref" value="equCab2chrM" /> |
| 412 </conditional> | 433 </conditional> |
| 413 <output name="outputsam" ftype="bam" file="test_2.bam" lines_diff="4" /> | 434 <output name="outputsam" ftype="bam" file="test_2.bam" lines_diff="4" /> |
| 414 </test> | 435 </test> |
| 415 <!-- 3) --> | 436 <!-- 3) --> |
| 416 <test> | 437 <test expect_num_outputs="1"> |
| 417 <param name="input" ftype="sam" value="in_test_3.sam" /> | 438 <param name="input" ftype="sam" value="in_test_3.sam" /> |
| 418 <conditional name="addref_cond"> | 439 <conditional name="addref_cond"> |
| 419 <param name="addref_select" value="history" /> | 440 <param name="addref_select" value="history" /> |
| 420 <param name="ref" ftype="fasta" dbkey="equCab2" value="chr_m.fasta" /> | 441 <param name="ref" ftype="fasta" dbkey="equCab2" value="chr_m.fasta" /> |
| 421 </conditional> | 442 </conditional> |
| 422 <output name="outputsam" ftype="bam" file="test_3.bam" lines_diff="4" /> | 443 <output name="outputsam" ftype="bam" file="test_3.bam" lines_diff="4" /> |
| 423 </test> | 444 </test> |
| 424 <!-- 4) cram to bam --> | 445 <!-- 4) cram to bam --> |
| 425 <test> | 446 <test expect_num_outputs="1"> |
| 426 <param name="input" value="in_test_4.cram" ftype="cram" /> | 447 <param name="input" value="in_test_4.cram" ftype="cram" /> |
| 427 <conditional name="addref_cond"> | 448 <conditional name="addref_cond"> |
| 428 <param name="addref_select" value="history" /> | 449 <param name="addref_select" value="history" /> |
| 429 <param name="ref" value="test.fa" /> | 450 <param name="ref" value="test.fa" /> |
| 430 </conditional> | 451 </conditional> |
| 431 <output name="outputsam" file="test_4.bam" ftype="bam" lines_diff="4" /> | 452 <output name="outputsam" file="test_4.bam" ftype="bam" lines_diff="4" /> |
| 432 </test> | 453 </test> |
| 433 <!-- 5) within bam operations expected to result in sorting or not --> | 454 <!-- 5) within bam operations expected to result in sorting or not --> |
| 434 <test > | 455 <test expect_num_outputs="1"> |
| 435 <!-- sorted bam should always result in unmodifed output --> | 456 <!-- sorted bam should always result in unmodifed output --> |
| 436 <param name="input" ftype="bam" value="in_test_5.bam" /> | 457 <param name="input" ftype="bam" value="in_test_5.bam" /> |
| 437 <assert_command> | 458 <assert_command> |
| 438 <not_has_text text="samtools sort" /> | 459 <not_has_text text="samtools sort" /> |
| 439 </assert_command> | 460 </assert_command> |
| 440 <output name="outputsam" ftype="bam" file="test_5.bam" lines_diff="2"/> | 461 <output name="outputsam" ftype="bam" file="test_5.bam" lines_diff="2"/> |
| 441 </test> | 462 </test> |
| 442 <!-- 6) --> | 463 <!-- 6) --> |
| 443 <test> | 464 <test expect_num_outputs="1"> |
| 444 <!-- sorted bam should always result in unmodifed output --> | 465 <!-- sorted bam should always result in unmodifed output --> |
| 445 <param name="input" ftype="bam" value="in_test_5.bam" /> | 466 <param name="input" ftype="bam" value="in_test_5.bam" /> |
| 446 <conditional name="mode"> | 467 <conditional name="mode"> |
| 447 <conditional name="output_options"> | 468 <conditional name="output_options"> |
| 448 <conditional name="output_format"> | 469 <conditional name="output_format"> |
| 454 <not_has_text text="samtools sort" /> | 475 <not_has_text text="samtools sort" /> |
| 455 </assert_command> | 476 </assert_command> |
| 456 <output name="outputsam" ftype="bam" file="test_5.bam" lines_diff="2"/> | 477 <output name="outputsam" ftype="bam" file="test_5.bam" lines_diff="2"/> |
| 457 </test> | 478 </test> |
| 458 <!-- 7) --> | 479 <!-- 7) --> |
| 459 <test> | 480 <test expect_num_outputs="1"> |
| 460 <!-- qname_sorted.bam should get sorted during "conversion" to bam ... --> | 481 <!-- qname_sorted.bam should get sorted during "conversion" to bam ... --> |
| 461 <param name="input" ftype="qname_sorted.bam" value="in_test_7.bam" /> | 482 <param name="input" ftype="qname_sorted.bam" value="in_test_7.bam" /> |
| 462 <assert_command> | 483 <assert_command> |
| 463 <has_text text="samtools sort" /> | 484 <has_text text="samtools sort" /> |
| 464 </assert_command> | 485 </assert_command> |
| 465 <output name="outputsam" ftype="bam" file="test_7.bam" lines_diff="4" /> | 486 <output name="outputsam" ftype="bam" file="test_7.bam" lines_diff="4" /> |
| 466 </test> | 487 </test> |
| 467 <!-- 8) --> | 488 <!-- 8) --> |
| 468 <test> | 489 <test expect_num_outputs="1"> |
| 469 <!-- ... but should be emitted unmodifed when using input format --> | 490 <!-- ... but should be emitted unmodifed when using input format --> |
| 470 <param name="input" ftype="qname_sorted.bam" value="in_test_7.bam" /> | 491 <param name="input" ftype="qname_sorted.bam" value="in_test_7.bam" /> |
| 471 <conditional name="mode"> | 492 <conditional name="mode"> |
| 472 <conditional name="output_options"> | 493 <conditional name="output_options"> |
| 473 <conditional name="output_format"> | 494 <conditional name="output_format"> |
| 479 <not_has_text text="samtools sort" /> | 500 <not_has_text text="samtools sort" /> |
| 480 </assert_command> | 501 </assert_command> |
| 481 <output name="outputsam" ftype="qname_sorted.bam" file="test_8.bam" lines_diff="2"/> | 502 <output name="outputsam" ftype="qname_sorted.bam" file="test_8.bam" lines_diff="2"/> |
| 482 </test> | 503 </test> |
| 483 <!-- 9) --> | 504 <!-- 9) --> |
| 484 <test> | 505 <test expect_num_outputs="1"> |
| 485 <!-- unsorted.bam should get sorted during "conversion" to bam ... --> | 506 <!-- unsorted.bam should get sorted during "conversion" to bam ... --> |
| 486 <param name="input" ftype="unsorted.bam" value="in_test_7.bam" /> | 507 <param name="input" ftype="unsorted.bam" value="in_test_7.bam" /> |
| 487 <assert_command> | 508 <assert_command> |
| 488 <has_text text="samtools sort" /> | 509 <has_text text="samtools sort" /> |
| 489 </assert_command> | 510 </assert_command> |
| 490 <output name="outputsam" ftype="bam" file="test_7.bam" lines_diff="4" /> | 511 <output name="outputsam" ftype="bam" file="test_7.bam" lines_diff="4" /> |
| 491 </test> | 512 </test> |
| 492 <!-- 10) --> | 513 <!-- 10) --> |
| 493 <test> | 514 <test expect_num_outputs="1"> |
| 494 <!-- ... ... but should be emitted unmodifed when using input format --> | 515 <!-- ... ... but should be emitted unmodifed when using input format --> |
| 495 <param name="input" ftype="unsorted.bam" value="in_test_7.bam" /> | 516 <param name="input" ftype="unsorted.bam" value="in_test_7.bam" /> |
| 496 <conditional name="mode"> | 517 <conditional name="mode"> |
| 497 <conditional name="output_options"> | 518 <conditional name="output_options"> |
| 498 <conditional name="output_format"> | 519 <conditional name="output_format"> |
| 504 <not_has_text text="samtools sort" /> | 525 <not_has_text text="samtools sort" /> |
| 505 </assert_command> | 526 </assert_command> |
| 506 <output name="outputsam" ftype="unsorted.bam" file="test_8.bam" lines_diff="2" /> | 527 <output name="outputsam" ftype="unsorted.bam" file="test_8.bam" lines_diff="2" /> |
| 507 </test> | 528 </test> |
| 508 <!-- 11) bam to sam + header options (adapted from bam_to_sam tool)--> | 529 <!-- 11) bam to sam + header options (adapted from bam_to_sam tool)--> |
| 509 <test> | 530 <test expect_num_outputs="1"> |
| 510 <param ftype="bam" name="input" value="in_test_11.bam" /> | 531 <param ftype="bam" name="input" value="in_test_11.bam" /> |
| 511 <conditional name="mode"> | 532 <conditional name="mode"> |
| 512 <conditional name="output_options"> | 533 <conditional name="output_options"> |
| 513 <conditional name="output_format"> | 534 <conditional name="output_format"> |
| 514 <param name="oformat" value="sam" /> | 535 <param name="oformat" value="sam" /> |
| 517 </conditional> | 538 </conditional> |
| 518 </conditional> | 539 </conditional> |
| 519 <output file="test_11.sam" ftype="sam" name="outputsam" lines_diff="2" /> | 540 <output file="test_11.sam" ftype="sam" name="outputsam" lines_diff="2" /> |
| 520 </test> | 541 </test> |
| 521 <!-- 12) --> | 542 <!-- 12) --> |
| 522 <test> | 543 <test expect_num_outputs="1"> |
| 523 <param ftype="bam" name="input" value="in_test_11.bam" /> | 544 <param ftype="bam" name="input" value="in_test_11.bam" /> |
| 524 <conditional name="mode"> | 545 <conditional name="mode"> |
| 525 <param name="outtype" value="header" /> | 546 <param name="outtype" value="header" /> |
| 526 <conditional name="output_options"> | 547 <conditional name="output_options"> |
| 527 <conditional name="output_format"> | 548 <conditional name="output_format"> |
| 530 </conditional> | 551 </conditional> |
| 531 </conditional> | 552 </conditional> |
| 532 <output file="test_12.sam" ftype="sam" name="outputsam" lines_diff="2" /> | 553 <output file="test_12.sam" ftype="sam" name="outputsam" lines_diff="2" /> |
| 533 </test> | 554 </test> |
| 534 <!-- 13) --> | 555 <!-- 13) --> |
| 535 <test> | 556 <test expect_num_outputs="1"> |
| 536 <param ftype="bam" name="input" value="in_test_11.bam" /> | 557 <param ftype="bam" name="input" value="in_test_11.bam" /> |
| 537 <conditional name="mode"> | 558 <conditional name="mode"> |
| 538 <conditional name="output_options"> | 559 <conditional name="output_options"> |
| 539 <conditional name="output_format"> | 560 <conditional name="output_format"> |
| 540 <param name="oformat" value="sam" /> | 561 <param name="oformat" value="sam" /> |
| 543 </conditional> | 564 </conditional> |
| 544 </conditional> | 565 </conditional> |
| 545 <output file="test_13.sam" ftype="sam" name="outputsam" lines_diff="2" /> | 566 <output file="test_13.sam" ftype="sam" name="outputsam" lines_diff="2" /> |
| 546 </test> | 567 </test> |
| 547 <!-- 14) count alignments --> | 568 <!-- 14) count alignments --> |
| 548 <test> | 569 <test expect_num_outputs="1"> |
| 549 <param name="input" value="in_test_14.bam" ftype="bam" /> | 570 <param name="input" value="in_test_14.bam" ftype="bam" /> |
| 550 <conditional name="mode"> | 571 <conditional name="mode"> |
| 551 <param name="outtype" value="all_reads" /> | 572 <param name="outtype" value="all_reads" /> |
| 552 <conditional name="output_options"> | 573 <conditional name="output_options"> |
| 553 <param name="reads_report_type" value="count" /> | 574 <param name="reads_report_type" value="count" /> |
| 554 </conditional> | 575 </conditional> |
| 555 </conditional> | 576 </conditional> |
| 556 <output name="outputcnt" file="test_14.tab" ftype="tabular" lines_diff="2" /> | 577 <output name="outputcnt" file="test_14.tab" ftype="tabular" lines_diff="2" /> |
| 557 </test> | 578 </test> |
| 558 <!-- 15) region filters --> | 579 <!-- 15) region filters --> |
| 559 <test> | 580 <test expect_num_outputs="1"> |
| 560 <param name="input" value="in_test_15.sam" ftype="sam" /> | 581 <param name="input" value="in_test_15.sam" ftype="sam" /> |
| 561 <conditional name="mode"> | 582 <conditional name="mode"> |
| 562 <param name="outtype" value="selected_reads" /> | 583 <param name="outtype" value="selected_reads" /> |
| 563 <section name="filter_config"> | 584 <section name="filter_config"> |
| 564 <conditional name="cond_region"> | 585 <conditional name="cond_region"> |
| 573 </conditional> | 594 </conditional> |
| 574 <conditional name="addref_cond"> | 595 <conditional name="addref_cond"> |
| 575 <param name="addref_select" value="history" /> | 596 <param name="addref_select" value="history" /> |
| 576 <param name="ref" value="test.fa" /> | 597 <param name="ref" value="test.fa" /> |
| 577 </conditional> | 598 </conditional> |
| 578 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" /> | 599 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="500" /> |
| 579 </test> | 600 </test> |
| 580 <!-- 16) --> | 601 <!-- 16) --> |
| 581 <test> | 602 <test expect_num_outputs="1"> |
| 582 <param name="input" value="in_test_14.bam" ftype="bam" /> | 603 <param name="input" value="in_test_14.bam" ftype="bam" /> |
| 583 <conditional name="mode"> | 604 <conditional name="mode"> |
| 584 <param name="outtype" value="selected_reads" /> | 605 <param name="outtype" value="selected_reads" /> |
| 585 <section name="filter_config"> | 606 <section name="filter_config"> |
| 586 <conditional name="cond_region"> | 607 <conditional name="cond_region"> |
| 598 <param name="ref" value="test.fa" /> | 619 <param name="ref" value="test.fa" /> |
| 599 </conditional> | 620 </conditional> |
| 600 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" /> | 621 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" /> |
| 601 </test> | 622 </test> |
| 602 <!-- 17) --> | 623 <!-- 17) --> |
| 603 <test> | 624 <test expect_num_outputs="1"> |
| 604 <param name="input" value="in_test_17.cram" dbkey="equCab2" ftype="cram" /> | 625 <param name="input" value="in_test_17.cram" dbkey="equCab2" ftype="cram" /> |
| 605 <conditional name="mode"> | 626 <conditional name="mode"> |
| 606 <param name="outtype" value="selected_reads" /> | 627 <param name="outtype" value="selected_reads" /> |
| 607 <section name="filter_config"> | 628 <section name="filter_config"> |
| 608 <conditional name="cond_region"> | 629 <conditional name="cond_region"> |
| 620 <param name="ref" value="equCab2chrM" /> | 641 <param name="ref" value="equCab2chrM" /> |
| 621 </conditional> | 642 </conditional> |
| 622 <output name="outputsam" file="test_17.bam" ftype="bam" lines_diff="4" /> | 643 <output name="outputsam" file="test_17.bam" ftype="bam" lines_diff="4" /> |
| 623 </test> | 644 </test> |
| 624 <!-- 18) --> | 645 <!-- 18) --> |
| 625 <test> | 646 <test expect_num_outputs="1"> |
| 626 <param name="input" value="in_test_14.bam" ftype="bam" /> | 647 <param name="input" value="in_test_14.bam" ftype="bam" /> |
| 627 <conditional name="mode"> | 648 <conditional name="mode"> |
| 628 <param name="outtype" value="selected_reads" /> | 649 <param name="outtype" value="selected_reads" /> |
| 629 <section name="filter_config"> | 650 <section name="filter_config"> |
| 630 <conditional name="cond_region"> | 651 <conditional name="cond_region"> |
| 643 <param name="ref" value="test.fa" /> | 664 <param name="ref" value="test.fa" /> |
| 644 </conditional> | 665 </conditional> |
| 645 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" /> | 666 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" /> |
| 646 </test> | 667 </test> |
| 647 <!-- 19) --> | 668 <!-- 19) --> |
| 648 <test> | 669 <test expect_num_outputs="1"> |
| 649 <param name="input" value="test_15.cram" ftype="cram" /> | 670 <param name="input" value="test_15.cram" ftype="cram" /> |
| 650 <conditional name="mode"> | 671 <conditional name="mode"> |
| 651 <param name="outtype" value="selected_reads" /> | 672 <param name="outtype" value="selected_reads" /> |
| 652 <section name="filter_config"> | 673 <section name="filter_config"> |
| 653 <conditional name="cond_region"> | 674 <conditional name="cond_region"> |
| 666 <param name="ref" value="test.fa" /> | 687 <param name="ref" value="test.fa" /> |
| 667 </conditional> | 688 </conditional> |
| 668 <output name="outputsam" file="test_19.bam" ftype="bam" lines_diff="4"/> | 689 <output name="outputsam" file="test_19.bam" ftype="bam" lines_diff="4"/> |
| 669 </test> | 690 </test> |
| 670 <!-- 20) --> | 691 <!-- 20) --> |
| 671 <test> | 692 <test expect_num_outputs="1"> |
| 672 <param name="input" value="test_15.cram" ftype="cram" /> | 693 <param name="input" value="test_15.cram" ftype="cram" /> |
| 673 <conditional name="mode"> | 694 <conditional name="mode"> |
| 674 <param name="outtype" value="selected_reads" /> | 695 <param name="outtype" value="selected_reads" /> |
| 675 <section name="filter_config"> | 696 <section name="filter_config"> |
| 676 <conditional name="cond_region"> | 697 <conditional name="cond_region"> |
| 689 <param name="ref" value="test.fa" /> | 710 <param name="ref" value="test.fa" /> |
| 690 </conditional> | 711 </conditional> |
| 691 <output name="outputsam" file="test_20.bam" ftype="bam" lines_diff="4" /> | 712 <output name="outputsam" file="test_20.bam" ftype="bam" lines_diff="4" /> |
| 692 </test> | 713 </test> |
| 693 <!-- 21) sampling options target < total reads --> | 714 <!-- 21) sampling options target < total reads --> |
| 694 <test> | 715 <test expect_num_outputs="1"> |
| 695 <param name="input" value="in_test_15.sam" ftype="sam" /> | 716 <param name="input" value="in_test_15.sam" ftype="sam" /> |
| 696 <conditional name="mode"> | 717 <conditional name="mode"> |
| 697 <param name="outtype" value="selected_reads" /> | 718 <param name="outtype" value="selected_reads" /> |
| 698 <section name="subsample_config"> | 719 <section name="subsample_config"> |
| 699 <conditional name="subsampling_mode"> | 720 <conditional name="subsampling_mode"> |
| 708 </conditional> | 729 </conditional> |
| 709 </conditional> | 730 </conditional> |
| 710 <output name="outputsam" file="test_21.sam" ftype="sam" compare="diff" lines_diff="10" /> | 731 <output name="outputsam" file="test_21.sam" ftype="sam" compare="diff" lines_diff="10" /> |
| 711 </test> | 732 </test> |
| 712 <!-- 22) target > total reads --> | 733 <!-- 22) target > total reads --> |
| 713 <test> | 734 <test expect_num_outputs="1"> |
| 714 <param name="input" value="in_test_15.sam" ftype="sam" /> | 735 <param name="input" value="in_test_15.sam" ftype="sam" /> |
| 715 <conditional name="mode"> | 736 <conditional name="mode"> |
| 716 <param name="outtype" value="selected_reads" /> | 737 <param name="outtype" value="selected_reads" /> |
| 717 <section name="subsample_config"> | 738 <section name="subsample_config"> |
| 718 <conditional name="subsampling_mode"> | 739 <conditional name="subsampling_mode"> |
| 727 </conditional> | 748 </conditional> |
| 728 </conditional> | 749 </conditional> |
| 729 <output name="outputsam" file="test_22.sam" ftype="sam" lines_diff="2"/> | 750 <output name="outputsam" file="test_22.sam" ftype="sam" lines_diff="2"/> |
| 730 </test> | 751 </test> |
| 731 <!-- 23) --> | 752 <!-- 23) --> |
| 732 <test> | 753 <test expect_num_outputs="1"> |
| 733 <!-- subsampling SAM input without reads --> | 754 <!-- subsampling SAM input without reads --> |
| 734 <param name="input" value="in_test_23.sam" ftype="sam" /> | 755 <param name="input" value="in_test_23.sam" ftype="sam" /> |
| 735 <conditional name="mode"> | 756 <conditional name="mode"> |
| 736 <param name="outtype" value="selected_reads" /> | 757 <param name="outtype" value="selected_reads" /> |
| 737 <section name="subsample_config"> | 758 <section name="subsample_config"> |
| 747 </conditional> | 768 </conditional> |
| 748 </conditional> | 769 </conditional> |
| 749 <output name="outputsam" file="test_23.sam" ftype="sam" lines_diff="2"/> | 770 <output name="outputsam" file="test_23.sam" ftype="sam" lines_diff="2"/> |
| 750 </test> | 771 </test> |
| 751 <!-- 24) --> | 772 <!-- 24) --> |
| 752 <test> | 773 <test expect_num_outputs="1"> |
| 753 <!-- subsampling BAM input without reads --> | 774 <!-- subsampling BAM input without reads --> |
| 754 <param name="input" value="in_test_24.bam" ftype="bam" /> | 775 <param name="input" value="in_test_24.bam" ftype="bam" /> |
| 755 <conditional name="mode"> | 776 <conditional name="mode"> |
| 756 <param name="outtype" value="selected_reads" /> | 777 <param name="outtype" value="selected_reads" /> |
| 757 <section name="subsample_config"> | 778 <section name="subsample_config"> |
| 767 </conditional> | 788 </conditional> |
| 768 </conditional> | 789 </conditional> |
| 769 <output name="outputsam" file="test_24.bam" ftype="bam" lines_diff="2" /> | 790 <output name="outputsam" file="test_24.bam" ftype="bam" lines_diff="2" /> |
| 770 </test> | 791 </test> |
| 771 <!-- 25) --> | 792 <!-- 25) --> |
| 772 <test> | 793 <test expect_num_outputs="1"> |
| 773 <param name="input" value="in_test_15.sam" ftype="sam" /> | 794 <param name="input" value="in_test_15.sam" ftype="sam" /> |
| 774 <conditional name="mode"> | 795 <conditional name="mode"> |
| 775 <param name="outtype" value="selected_reads" /> | 796 <param name="outtype" value="selected_reads" /> |
| 776 <section name="subsample_config"> | 797 <section name="subsample_config"> |
| 777 <conditional name="subsampling_mode"> | 798 <conditional name="subsampling_mode"> |
| 787 </conditional> | 808 </conditional> |
| 788 </conditional> | 809 </conditional> |
| 789 <output name="outputsam" file="test_25.sam" ftype="sam" compare="diff" lines_diff="2" /> | 810 <output name="outputsam" file="test_25.sam" ftype="sam" compare="diff" lines_diff="2" /> |
| 790 </test> | 811 </test> |
| 791 <!-- 26) --> | 812 <!-- 26) --> |
| 792 <test> | 813 <test expect_num_outputs="1"> |
| 793 <param name="input" value="in_test_14.bam" ftype="bam" /> | 814 <param name="input" value="in_test_14.bam" ftype="bam" /> |
| 794 <conditional name="mode"> | 815 <conditional name="mode"> |
| 795 <param name="outtype" value="selected_reads" /> | 816 <param name="outtype" value="selected_reads" /> |
| 796 <section name="subsample_config"> | 817 <section name="subsample_config"> |
| 797 <conditional name="subsampling_mode"> | 818 <conditional name="subsampling_mode"> |
| 807 </conditional> | 828 </conditional> |
| 808 </conditional> | 829 </conditional> |
| 809 <output name="outputsam" file="test_26.bam" ftype="bam" lines_diff="2" /> | 830 <output name="outputsam" file="test_26.bam" ftype="bam" lines_diff="2" /> |
| 810 </test> | 831 </test> |
| 811 <!-- 27) --> | 832 <!-- 27) --> |
| 812 <test> | 833 <test expect_num_outputs="1"> |
| 813 <param name="input" value="in_test_14.bam" ftype="bam" /> | 834 <param name="input" value="in_test_14.bam" ftype="bam" /> |
| 814 <conditional name="mode"> | 835 <conditional name="mode"> |
| 815 <param name="outtype" value="selected_reads" /> | 836 <param name="outtype" value="selected_reads" /> |
| 816 <section name="subsample_config"> | 837 <section name="subsample_config"> |
| 817 <conditional name="subsampling_mode"> | 838 <conditional name="subsampling_mode"> |
| 827 </conditional> | 848 </conditional> |
| 828 </conditional> | 849 </conditional> |
| 829 <output name="outputsam" file="test_27.bam" ftype="bam" lines_diff="2"/> | 850 <output name="outputsam" file="test_27.bam" ftype="bam" lines_diff="2"/> |
| 830 </test> | 851 </test> |
| 831 <!-- 28) --> | 852 <!-- 28) --> |
| 832 <test> | 853 <test expect_num_outputs="1"> |
| 833 <param name="input" value="in_test_14.bam" ftype="bam" /> | 854 <param name="input" value="in_test_14.bam" ftype="bam" /> |
| 834 <conditional name="mode"> | 855 <conditional name="mode"> |
| 835 <param name="outtype" value="selected_reads" /> | 856 <param name="outtype" value="selected_reads" /> |
| 836 <section name="subsample_config"> | 857 <section name="subsample_config"> |
| 837 <conditional name="subsampling_mode"> | 858 <conditional name="subsampling_mode"> |
| 847 </conditional> | 868 </conditional> |
| 848 </conditional> | 869 </conditional> |
| 849 <output name="outputsam" file="test_28.bam" ftype="bam" lines_diff="2" /> | 870 <output name="outputsam" file="test_28.bam" ftype="bam" lines_diff="2" /> |
| 850 </test> | 871 </test> |
| 851 <!-- 29) --> | 872 <!-- 29) --> |
| 852 <test> | 873 <test expect_num_outputs="1"> |
| 853 <param name="input" value="in_test_14.bam" ftype="bam" /> | 874 <param name="input" value="in_test_14.bam" ftype="bam" /> |
| 854 <conditional name="mode"> | 875 <conditional name="mode"> |
| 855 <param name="outtype" value="selected_reads" /> | 876 <param name="outtype" value="selected_reads" /> |
| 856 <section name="subsample_config"> | 877 <section name="subsample_config"> |
| 857 <conditional name="subsampling_mode"> | 878 <conditional name="subsampling_mode"> |
| 868 </conditional> | 889 </conditional> |
| 869 </conditional> | 890 </conditional> |
| 870 <output name="outputsam" file="test_29.bam" ftype="bam" lines_diff="2"/> | 891 <output name="outputsam" file="test_29.bam" ftype="bam" lines_diff="2"/> |
| 871 </test> | 892 </test> |
| 872 <!-- 30) testing tag filtering --> | 893 <!-- 30) testing tag filtering --> |
| 873 <test> | 894 <test expect_num_outputs="1"> |
| 874 <param name="input" value="in_test_30.bam" ftype="bam" /> | 895 <param name="input" value="in_test_30.bam" ftype="bam" /> |
| 875 <conditional name="mode"> | 896 <conditional name="mode"> |
| 876 <param name="outtype" value="selected_reads" /> | 897 <param name="outtype" value="selected_reads" /> |
| 877 <section name="filter_config"> | 898 <section name="filter_config"> |
| 878 <param name="tag" value="XS:-18" /> | 899 <param name="tag" value="XS:-18" /> |
| 887 <has_text text="--tag 'XS:-18'"/> | 908 <has_text text="--tag 'XS:-18'"/> |
| 888 </assert_command> | 909 </assert_command> |
| 889 <output name="outputsam" file="test_30.bam" ftype="bam" lines_diff="2" /> | 910 <output name="outputsam" file="test_30.bam" ftype="bam" lines_diff="2" /> |
| 890 </test> | 911 </test> |
| 891 <!-- 31) testing readname filtering --> | 912 <!-- 31) testing readname filtering --> |
| 892 <test> | 913 <test expect_num_outputs="1"> |
| 893 <param name="input" value="in_test_30.bam" ftype="bam" /> | 914 <param name="input" value="in_test_30.bam" ftype="bam" /> |
| 894 <conditional name="mode"> | 915 <conditional name="mode"> |
| 895 <param name="outtype" value="selected_reads" /> | 916 <param name="outtype" value="selected_reads" /> |
| 896 <section name="filter_config"> | 917 <section name="filter_config"> |
| 897 <param name="qname_file" value="readnames.txt" /> | 918 <param name="qname_file" value="readnames.txt" /> |
| 904 </conditional> | 925 </conditional> |
| 905 <assert_command> | 926 <assert_command> |
| 906 <has_text text="--qname-file"/> | 927 <has_text text="--qname-file"/> |
| 907 </assert_command> | 928 </assert_command> |
| 908 <output name="outputsam" file="test_31.bam" ftype="bam" lines_diff="2" /> | 929 <output name="outputsam" file="test_31.bam" ftype="bam" lines_diff="2" /> |
| 930 </test> | |
| 931 <!-- 32) testing expression filters --> | |
| 932 <test expect_num_outputs="1"> | |
| 933 <param name="input" value="in_test_30.bam" ftype="bam"/> | |
| 934 <conditional name="mode"> | |
| 935 <param name="outtype" value="selected_reads" /> | |
| 936 <section name="filter_config"> | |
| 937 <conditional name="cond_expr"> | |
| 938 <param name="select_expr" value="yes"/> | |
| 939 <param name="expression" value="sclen>0"/> | |
| 940 </conditional> | |
| 941 </section> | |
| 942 <conditional name="output_options"> | |
| 943 <conditional name="output_format"> | |
| 944 <param name="oformat" value="bam" /> | |
| 945 </conditional> | |
| 946 </conditional> | |
| 947 </conditional> | |
| 948 <assert_command> | |
| 949 <has_text text="-e 'sclen>0'"/> | |
| 950 </assert_command> | |
| 951 <output name="outputsam" file="test_32.bam" ftype="bam" lines_diff="2" /> | |
| 952 </test> | |
| 953 <!-- 33) testing expression filters --> | |
| 954 <test expect_num_outputs="1"> | |
| 955 <param name="input" value="in_test_30.bam" ftype="bam"/> | |
| 956 <conditional name="mode"> | |
| 957 <param name="outtype" value="selected_reads" /> | |
| 958 <section name="filter_config"> | |
| 959 <conditional name="cond_expr"> | |
| 960 <param name="select_expr" value="yes"/> | |
| 961 <param name="expression" value='rname!="chr13"'/> | |
| 962 </conditional> | |
| 963 </section> | |
| 964 <conditional name="output_options"> | |
| 965 <conditional name="output_format"> | |
| 966 <param name="oformat" value="bam" /> | |
| 967 </conditional> | |
| 968 </conditional> | |
| 969 </conditional> | |
| 970 <assert_command> | |
| 971 <has_text text="-e 'rname!="/> | |
| 972 </assert_command> | |
| 973 <output name="outputsam" file="test_33.bam" ftype="bam" lines_diff="2" /> | |
| 909 </test> | 974 </test> |
| 910 </tests> | 975 </tests> |
| 911 <help> | 976 <help> |
| 912 **What it does** | 977 **What it does** |
| 913 | 978 |
| 988 | 1053 |
| 989 **Filtering by quality** | 1054 **Filtering by quality** |
| 990 | 1055 |
| 991 This filters based on the MAPQ column of the SAM format which gives an estimate about the correct placement of the alignment. Note that aligners do not follow a consistent definition. | 1056 This filters based on the MAPQ column of the SAM format which gives an estimate about the correct placement of the alignment. Note that aligners do not follow a consistent definition. |
| 992 | 1057 |
| 993 ## Filtering by Tag ** | 1058 **Filtering by Tag** |
| 994 | 1059 |
| 995 This filter allows to select reads based on tool or user specific tags, e.g., XS:i:-18 the alignment score tag of bowtie. | 1060 This filter allows to select reads based on tool or user specific tags, e.g., XS:i:-18 the alignment score tag of bowtie. |
| 996 Thus to filter for a specific value of the tag you need the format STR1:STR2, e.g., XS:-18 to filter reads with an aligment score of -18. | 1061 Thus to filter for a specific value of the tag you need the format STR1:STR2, e.g., XS:-18 to filter reads with an aligment score of -18. |
| 997 You can also just write STR1 without the value STR2 hence the filter selects all reads with the tag STR1, e.g., XS. | 1062 You can also just write STR1 without the value STR2 hence the filter selects all reads with the tag STR1, e.g., XS. |
| 998 | 1063 |
| 1064 **Filtering by Expression** | |
| 1065 | |
| 1066 | |
| 1067 Filter expressions are used as an on-the-fly checking of incoming SAM, BAM or CRAM records, discarding records that do not match the specified expression. | |
| 1068 | |
| 1069 The language used is primarily C style, but with a few differences in the precedence rules for bit operators and the inclusion of regular expression | |
| 1070 matching. | |
| 1071 | |
| 1072 The operator precedence, from strongest binding to weakest, is | |
| 1073 | |
| 1074 :: | |
| 1075 | |
| 1076 Grouping (, ) E.g. "(1+2)*3" | |
| 1077 Values: literals, vars Numbers, strings and variables | |
| 1078 Unary ops: +, -, !, ~ E.g. -10 +10, !10 (not), ~5 (bit not) | |
| 1079 Math ops: \*, /, % Multiply, division and (integer) modulo | |
| 1080 Math ops: +, - Addition / subtraction | |
| 1081 Bit-wise: & Integer AND | |
| 1082 Bit-wise ^ Integer XOR | |
| 1083 Bit-wise | Integer OR | |
| 1084 Conditionals: >, >=, <, <= | |
| 1085 Equality: \=\=, !=, =~, !~ =~ and !~ match regular expressions | |
| 1086 Boolean: &&, || Logical AND / OR | |
| 1087 | |
| 1088 | |
| 1089 Expressions are computed using floating point mathematics, so "10 / 4" evaluates to 2.5 rather than 2. They may be written as integers in decimal or | |
| 1090 "0x" plus hexadecimal, and floating point with or without exponents.However operations that require integers first do an implicit type conversion, so | |
| 1091 "7.9 % 5" is 2 and "7.9 & 4.1" is equivalent to "7 & 4", which is 4. Strings are always specified using double quotes. To get a double quote in a | |
| 1092 string, use backslash. Similarly a double backslash is used to get a literal backslash. For example ab\"c\\d is the string ab"c\d. | |
| 1093 | |
| 1094 Comparison operators are evaluated as a match being 1 and a mismatch being 0, thus "(2 > 1) + (3 < 5)" evaluates as 2. All comparisons involving undefined (null) values are deemed to be false. | |
| 1095 | |
| 1096 The variables are where the file format specifics are accessed from the expression. The variables correspond to SAM fields, for example to find paired | |
| 1097 alignments with high mapping quality and a very large insert size, we may use the expression "mapq >= 30 && (tlen >= 100000 || tlen <= -100000)". Valid | |
| 1098 variable names and their data types are: | |
| 1099 | |
| 1100 :: | |
| 1101 | |
| 1102 endpos int Alignment end position (1-based) | |
| 1103 flag int Combined FLAG field | |
| 1104 flag.paired int Single bit, 0 or 1 | |
| 1105 flag.proper_pair int Single bit, 0 or 2 | |
| 1106 flag.unmap int Single bit, 0 or 4 | |
| 1107 flag.munmap int Single bit, 0 or 8 | |
| 1108 flag.reverse int Single bit, 0 or 16 | |
| 1109 flag.mreverse int Single bit, 0 or 32 | |
| 1110 flag.read1 int Single bit, 0 or 64 | |
| 1111 flag.read2 int Single bit, 0 or 128 | |
| 1112 flag.secondary int Single bit, 0 or 256 | |
| 1113 flag.qcfail int Single bit, 0 or 512 | |
| 1114 flag.dup int Single bit, 0 or 1024 | |
| 1115 flag.supplementary int Single bit, 0 or 2048 | |
| 1116 hclen int Number of hard-clipped bases | |
| 1117 library string Library (LB header via RG) | |
| 1118 mapq int Mapping quality | |
| 1119 mpos int Synonym for pnext | |
| 1120 mrefid int Mate reference number (0 based) | |
| 1121 mrname string Synonym for rnext | |
| 1122 ncigar int Number of cigar operations | |
| 1123 pnext int Mate's alignment position (1-based) | |
| 1124 pos int Alignment position (1-based) | |
| 1125 qlen int Alignment length: no. query bases | |
| 1126 qname string Query name | |
| 1127 qual string Quality values (raw, 0 based) | |
| 1128 refid int Integer reference number (0 based) | |
| 1129 rlen int Alignment length: no. reference bases | |
| 1130 rname string Reference name | |
| 1131 rnext string Mate's reference name | |
| 1132 sclen int Number of soft-clipped bases | |
| 1133 seq string Sequence | |
| 1134 tlen int Template length (insert size) | |
| 1135 [XX] int / string XX tag value | |
| 1136 | |
| 1137 | |
| 1138 Flags are returned either as the whole flag value or by checking for a single bit. Hence the filter expression flag.dup is equivalent to flag & 1024. | |
| 1139 | |
| 1140 "qlen" and "rlen" are measured using the CIGAR string to count the number of query (sequence) and reference bases consumed. Note "qlen" may not exactly | |
| 1141 match the length of the "seq" field if the sequence is "*". | |
| 1142 | |
| 1143 "sclen" and "hclen" are the number of soft and hard-clipped bases respectively. The formula "qlen-sclen" gives the number of sequence bases used in the | |
| 1144 alignment, distinguishing between global alignment and local alignment length. | |
| 1145 | |
| 1146 "endpos" is the (1-based inclusive) position of the rightmost mapped base of the read, as measured using the CIGAR string, and for mapped reads is | |
| 1147 equivalent to "pos+rlen-1". For unmapped reads, it is the same as "pos". | |
| 1148 | |
| 1149 Reference names may be matched either by their string forms ("rname" and "mrname") or as the Nth @SQ line (counting from zero) as stored in BAM using | |
| 1150 "tid" and "mtid" respectively. | |
| 1151 | |
| 1152 Auxiliary tags are described in square brackets and these expand to either integer or string as defined by the tag itself (XX:Z:string or XX:i:int). | |
| 1153 For example [NM]>=10 can be used to look for alignments with many mismatches and [RG]=~"grp[ABC]-" will match the read-group string. | |
| 1154 | |
| 1155 If no comparison is used with an auxiliary tag it is taken simply to be a test for the existence of that tag. So [NM] will return any record containing | |
| 1156 an NM tag, even if that tag is zero (NM:i:0). In htslib <= 1.15 negating this with ![NM] gave misleading results as it was true if the tag did not exist | |
| 1157 or did exist but was zero. Now this is strictly does-not-exist. An explicit exists([NM]) and !exists([NM]) function has also been added to make | |
| 1158 this intention clear. | |
| 1159 | |
| 1160 Similarly in htslib <= 1.15 using [NM]!=0 was true both when the tag existed and was not zero as well as when the tag did not exist. From 1.16 onwards | |
| 1161 all comparison operators are only true for tags that exist, so [NM]!=0 works as expected. | |
| 1162 | |
| 1163 Some simple functions are available to operate on strings. These treat the strings as arrays of bytes, permitting their length, minimum, maximum and | |
| 1164 average values to be computed. These are useful for processing Quality Scores. | |
| 1165 | |
| 1166 :: | |
| 1167 | |
| 1168 length(x) Length of the string (excluding nul char) | |
| 1169 min(x) Minimum byte value in the string | |
| 1170 max(x) Maximum byte value in the string | |
| 1171 avg(x) Average byte value in the string | |
| 1172 | |
| 1173 | |
| 1174 Note that "avg" is a floating point value and it may be NAN for empty strings. This means that "avg(qual)" does not produce an error for records that | |
| 1175 have both seq and qual of "*". NAN values will fail any conditional checks, so e.g. "avg(qual) > 20" works and will not report these records. NAN also | |
| 1176 fails all equality, < and > comparisons, and returns zero when given as an argument to the exists function. It can be negated with !x in which case it | |
| 1177 becomes true. | |
| 1178 | |
| 1179 Functions that operate on both strings and numerics: | |
| 1180 | |
| 1181 :: | |
| 1182 | |
| 1183 exists(x) True if the value exists (or is explicitly true). | |
| 1184 default(x,d) Value x if it exists or d if not. | |
| 1185 | |
| 1186 Functions that apply only to numeric values: | |
| 1187 | |
| 1188 :: | |
| 1189 | |
| 1190 qrt(x) Square root of x | |
| 1191 og(x) Natural logarithm of x | |
| 1192 ow(x, y) Power function, x to the power of y | |
| 1193 xp(x) Base-e exponential, equivalent to pow(e,x) | |
| 1194 | |
| 999 </help> | 1195 </help> |
| 1000 <expand macro="citations"/> | 1196 <expand macro="citations"/> |
| 1001 </tool> | 1197 </tool> |
