Mercurial > repos > iuc > data_manager_salmon_index_builder
comparison data_manager/salmon_index_builder.xml @ 5:befb1da9de9a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_salmon_index_builder commit aed25572a6ac6a1f8acc72bb25ed3c337a623696
| author | iuc |
|---|---|
| date | Thu, 16 Oct 2025 20:11:34 +0000 |
| parents | eeb725655afc |
| children | 0b41709046cf |
comparison
equal
deleted
inserted
replaced
| 4:056c07f9900c | 5:befb1da9de9a |
|---|---|
| 1 <tool id="salmon_index_builder_data_manager" name="Salmon" tool_type="manage_data" version="1.3.0" profile="19.01"> | 1 <tool id="salmon_index_builder_data_manager" name="Salmon" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.0"> |
| 2 <description>index builder</description> | 2 <description>index builder</description> |
| 3 <macros> | |
| 4 <token name="@TOOL_VERSION@">1.3.0</token> | |
| 5 <token name="@VERSION_SUFFIX@">1</token> | |
| 6 <token name="@PROFILE@">24.0</token> | |
| 7 <token name="@IDX_VERSION@">q7</token> | |
| 8 </macros> | |
| 3 <requirements> | 9 <requirements> |
| 4 <requirement type="package" version="1.3.0">salmon</requirement> | 10 <requirement type="package" version="@TOOL_VERSION@">salmon</requirement> |
| 5 <requirement type="package" version="3.7">python</requirement> | |
| 6 </requirements> | 11 </requirements> |
| 7 <macros> | |
| 8 <token name="@IDX_VERSION@">q7</token> | |
| 9 </macros> | |
| 10 <command detect_errors="exit_code"><![CDATA[ | 12 <command detect_errors="exit_code"><![CDATA[ |
| 11 python '$__tool_directory__/salmon_index_builder.py' --output '${out_file}' | 13 ## https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/ |
| 12 --fasta_filename '${all_fasta_source.fields.path}' | 14 ## https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode |
| 13 --fasta_dbkey '${all_fasta_source.fields.dbkey}' | 15 |
| 14 --fasta_description '${all_fasta_source.fields.name}' | 16 #for $transcripts in $transcriptome.fields.path.split(",") |
| 15 --kmer_size "${kmer_size}" | 17 (zcat '$transcripts' 2>/dev/null || cat '$transcripts') >> gentrome.fa && |
| 16 --data_table_name salmon_indexes_versioned | 18 #end for |
| 17 --index_version @IDX_VERSION@ | 19 (zcat '$all_fasta_source.fields.path' 2>/dev/null || cat '$all_fasta_source.fields.path') >> gentrome.fa && |
| 18 ]]> | 20 |
| 19 </command> | 21 (zcat '$all_fasta_source.fields.path' 2>/dev/null || cat '$all_fasta_source.fields.path') | awk '{if($1 ~ /^>/) print $1}' | cut -c2- | tr -d " " > decoys.txt && |
| 22 | |
| 23 mkdir '$out_file.extra_files_path' && | |
| 24 | |
| 25 salmon index | |
| 26 -k $kmer_size | |
| 27 -t gentrome.fa | |
| 28 -d decoys.txt | |
| 29 -i '$out_file.extra_files_path' | |
| 30 -p "\${GALAXY_SLOTS:-12}" | |
| 31 $gencode | |
| 32 && | |
| 33 | |
| 34 cp '$dmjson' '$out_file' | |
| 35 ]]></command> | |
| 36 <configfiles> | |
| 37 <configfile name="dmjson"><![CDATA[{ | |
| 38 #if str($sequence_id).strip() == "" | |
| 39 #set sequence_id = $all_fasta_source.fields.dbkey | |
| 40 #end if | |
| 41 #if str($sequence_name).strip() == "" | |
| 42 #set sequence_name = $all_fasta_source.fields.dbkey | |
| 43 #end if | |
| 44 | |
| 45 "data_tables":{ | |
| 46 "salmon_indexes_versioned":[ | |
| 47 { | |
| 48 "value": "$sequence_id", | |
| 49 "dbkey": "$all_fasta_source.fields.dbkey", | |
| 50 "name": "$sequence_name", | |
| 51 "path": "$out_file.extra_files_path", | |
| 52 "version": "@IDX_VERSION@" | |
| 53 } | |
| 54 ] | |
| 55 } | |
| 56 }]]></configfile> | |
| 57 </configfiles> | |
| 20 <inputs> | 58 <inputs> |
| 21 <param label="Source FASTA Sequence" name="all_fasta_source" type="select"> | 59 <param label="Transcriptome sequences" name="transcriptome" optional="false" type="select"> |
| 60 <options from_data_table="transcriptomes" /> | |
| 61 </param> | |
| 62 <param label="Genome" name="all_fasta_source" optional="false" type="select"> | |
| 22 <options from_data_table="all_fasta" /> | 63 <options from_data_table="all_fasta" /> |
| 23 </param> | 64 </param> |
| 24 <param name="sequence_name" type="text" value="" label="Name of sequence" /> | 65 <param name="sequence_name" type="text" value="" label="Name of sequence" /> |
| 25 <param name="sequence_id" type="text" value="" label="ID for sequence" /> | 66 <param name="sequence_id" type="text" value="" label="ID for sequence" /> |
| 26 <param name="kmer_size" type="integer" optional='true' value="21" max="32" label="The size of the k-mer on which the index is built" | 67 <param name="kmer_size" type="integer" optional='true' value="31" max="32" label="The size of the k-mer on which the index is built" |
| 27 help="There is a tradeoff here between the distinctiveness of the k-mers and their robustness to errors. The shorter the k-mers, the more robust they will be to errors in the reads, but the longer the k-mers, the more distinct they will be. We generally recommend using a k-mer size of at least 20. MUST BE AN ODD VALUE "/> | 68 help="There is a tradeoff here between the distinctiveness of the k-mers and their robustness to errors. The shorter the k-mers, the more robust they will be to errors in the reads, but the longer the k-mers, the more distinct they will be. We generally recommend using a k-mer size of at least 20. MUST BE AN ODD VALUE "> |
| 69 <validator type="expression" message="Only odd values">value % 2 == 1</validator> | |
| 70 </param> | |
| 71 <param name="gencode" type="boolean" label="Transcript sequences are in gencode format" truevalue="--gencode" falsevalue="" checked="false" help="Will split the transcript name at the first '|' character. These reduced names will be used in the output and when looking for these transcripts in a gene to transcript GTF."/> | |
| 28 </inputs> | 72 </inputs> |
| 29 <outputs> | 73 <outputs> |
| 30 <data name="out_file" format="data_manager_json" /> | 74 <data name="out_file" format="data_manager_json" /> |
| 31 </outputs> | 75 </outputs> |
| 32 <tests> | 76 <tests> |
| 33 <test> | 77 <test> |
| 78 <param name="transcriptome" value="phiX174"/> | |
| 34 <param name="all_fasta_source" value="phiX174"/> | 79 <param name="all_fasta_source" value="phiX174"/> |
| 35 <param name="sequence_name" value="sequence_name"/> | 80 <param name="sequence_name" value="sequence_name"/> |
| 36 <param name="sequence_id" value="sequence_id"/> | 81 <param name="sequence_id" value="sequence_id"/> |
| 37 <output name="out_file"> | 82 <output name="out_file"> |
| 38 <assert_contents> | 83 <assert_contents> |
| 39 <has_line line='{"data_tables": {"salmon_indexes_versioned": [{"dbkey": "phiX174", "name": "sequence_name", "path": "sequence_id", "value": "sequence_id", "version": "q7"}]}}' /> | 84 <has_text text='"salmon_indexes_versioned"' /> |
| 85 <has_text text='"dbkey": "phiX174"' /> | |
| 86 <has_text text='"name": "sequence_name"' /> | |
| 87 <has_text text='"value": "sequence_id"' /> | |
| 88 <has_text text='"version": "q7"' /> | |
| 89 <has_text text='"path":' /> | |
| 90 </assert_contents> | |
| 91 </output> | |
| 92 </test> | |
| 93 <test> | |
| 94 <param name="transcriptome" value="phiX174"/> | |
| 95 <param name="all_fasta_source" value="phiX174"/> | |
| 96 <param name="sequence_name" value=""/> | |
| 97 <param name="sequence_id" value=""/> | |
| 98 <output name="out_file"> | |
| 99 <assert_contents> | |
| 100 <has_text text='"salmon_indexes_versioned"' /> | |
| 101 <has_text text='"dbkey": "phiX174"' /> | |
| 102 <has_text text='"name": "phiX174"' /> | |
| 103 <has_text text='"value": "phiX174"' /> | |
| 104 <has_text text='"version": "q7"' /> | |
| 105 <has_text text='"path":' /> | |
| 40 </assert_contents> | 106 </assert_contents> |
| 41 </output> | 107 </output> |
| 42 </test> | 108 </test> |
| 43 </tests> | 109 </tests> |
| 44 <help> | 110 <help> |
| 45 <![CDATA[ | 111 <![CDATA[ |
| 46 .. class:: infomark | 112 .. class:: infomark |
| 47 | 113 |
| 48 **Notice:** If you leave name, description, or id blank, it will be generated automatically. | 114 Indices are constructed as described here: https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/ |
| 115 | |
| 116 See also https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode | |
| 117 | |
| 118 **Notice:** If you leave name, description, or id blank, it the dbkey of the genome will be used. | |
| 49 ]]> | 119 ]]> |
| 50 </help> | 120 </help> |
| 51 <citations> | 121 <citations> |
| 52 <citation type="doi">https://doi.org/10.1038/nmeth.4197</citation> | 122 <citation type="doi">https://doi.org/10.1038/nmeth.4197</citation> |
| 53 </citations> | 123 </citations> |
