comparison data_manager/salmon_index_builder.xml @ 5:befb1da9de9a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_salmon_index_builder commit aed25572a6ac6a1f8acc72bb25ed3c337a623696
author iuc
date Thu, 16 Oct 2025 20:11:34 +0000
parents eeb725655afc
children 0b41709046cf
comparison
equal deleted inserted replaced
4:056c07f9900c 5:befb1da9de9a
1 <tool id="salmon_index_builder_data_manager" name="Salmon" tool_type="manage_data" version="1.3.0" profile="19.01"> 1 <tool id="salmon_index_builder_data_manager" name="Salmon" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.0">
2 <description>index builder</description> 2 <description>index builder</description>
3 <macros>
4 <token name="@TOOL_VERSION@">1.3.0</token>
5 <token name="@VERSION_SUFFIX@">1</token>
6 <token name="@PROFILE@">24.0</token>
7 <token name="@IDX_VERSION@">q7</token>
8 </macros>
3 <requirements> 9 <requirements>
4 <requirement type="package" version="1.3.0">salmon</requirement> 10 <requirement type="package" version="@TOOL_VERSION@">salmon</requirement>
5 <requirement type="package" version="3.7">python</requirement>
6 </requirements> 11 </requirements>
7 <macros>
8 <token name="@IDX_VERSION@">q7</token>
9 </macros>
10 <command detect_errors="exit_code"><![CDATA[ 12 <command detect_errors="exit_code"><![CDATA[
11 python '$__tool_directory__/salmon_index_builder.py' --output '${out_file}' 13 ## https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/
12 --fasta_filename '${all_fasta_source.fields.path}' 14 ## https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode
13 --fasta_dbkey '${all_fasta_source.fields.dbkey}' 15
14 --fasta_description '${all_fasta_source.fields.name}' 16 #for $transcripts in $transcriptome.fields.path.split(",")
15 --kmer_size "${kmer_size}" 17 (zcat '$transcripts' 2>/dev/null || cat '$transcripts') >> gentrome.fa &&
16 --data_table_name salmon_indexes_versioned 18 #end for
17 --index_version @IDX_VERSION@ 19 (zcat '$all_fasta_source.fields.path' 2>/dev/null || cat '$all_fasta_source.fields.path') >> gentrome.fa &&
18 ]]> 20
19 </command> 21 (zcat '$all_fasta_source.fields.path' 2>/dev/null || cat '$all_fasta_source.fields.path') | awk '{if($1 ~ /^>/) print $1}' | cut -c2- | tr -d " " > decoys.txt &&
22
23 mkdir '$out_file.extra_files_path' &&
24
25 salmon index
26 -k $kmer_size
27 -t gentrome.fa
28 -d decoys.txt
29 -i '$out_file.extra_files_path'
30 -p "\${GALAXY_SLOTS:-12}"
31 $gencode
32 &&
33
34 cp '$dmjson' '$out_file'
35 ]]></command>
36 <configfiles>
37 <configfile name="dmjson"><![CDATA[{
38 #if str($sequence_id).strip() == ""
39 #set sequence_id = $all_fasta_source.fields.dbkey
40 #end if
41 #if str($sequence_name).strip() == ""
42 #set sequence_name = $all_fasta_source.fields.dbkey
43 #end if
44
45 "data_tables":{
46 "salmon_indexes_versioned":[
47 {
48 "value": "$sequence_id",
49 "dbkey": "$all_fasta_source.fields.dbkey",
50 "name": "$sequence_name",
51 "path": "$out_file.extra_files_path",
52 "version": "@IDX_VERSION@"
53 }
54 ]
55 }
56 }]]></configfile>
57 </configfiles>
20 <inputs> 58 <inputs>
21 <param label="Source FASTA Sequence" name="all_fasta_source" type="select"> 59 <param label="Transcriptome sequences" name="transcriptome" optional="false" type="select">
60 <options from_data_table="transcriptomes" />
61 </param>
62 <param label="Genome" name="all_fasta_source" optional="false" type="select">
22 <options from_data_table="all_fasta" /> 63 <options from_data_table="all_fasta" />
23 </param> 64 </param>
24 <param name="sequence_name" type="text" value="" label="Name of sequence" /> 65 <param name="sequence_name" type="text" value="" label="Name of sequence" />
25 <param name="sequence_id" type="text" value="" label="ID for sequence" /> 66 <param name="sequence_id" type="text" value="" label="ID for sequence" />
26 <param name="kmer_size" type="integer" optional='true' value="21" max="32" label="The size of the k-mer on which the index is built" 67 <param name="kmer_size" type="integer" optional='true' value="31" max="32" label="The size of the k-mer on which the index is built"
27 help="There is a tradeoff here between the distinctiveness of the k-mers and their robustness to errors. The shorter the k-mers, the more robust they will be to errors in the reads, but the longer the k-mers, the more distinct they will be. We generally recommend using a k-mer size of at least 20. MUST BE AN ODD VALUE "/> 68 help="There is a tradeoff here between the distinctiveness of the k-mers and their robustness to errors. The shorter the k-mers, the more robust they will be to errors in the reads, but the longer the k-mers, the more distinct they will be. We generally recommend using a k-mer size of at least 20. MUST BE AN ODD VALUE ">
69 <validator type="expression" message="Only odd values">value % 2 == 1</validator>
70 </param>
71 <param name="gencode" type="boolean" label="Transcript sequences are in gencode format" truevalue="--gencode" falsevalue="" checked="false" help="Will split the transcript name at the first '|' character. These reduced names will be used in the output and when looking for these transcripts in a gene to transcript GTF."/>
28 </inputs> 72 </inputs>
29 <outputs> 73 <outputs>
30 <data name="out_file" format="data_manager_json" /> 74 <data name="out_file" format="data_manager_json" />
31 </outputs> 75 </outputs>
32 <tests> 76 <tests>
33 <test> 77 <test>
78 <param name="transcriptome" value="phiX174"/>
34 <param name="all_fasta_source" value="phiX174"/> 79 <param name="all_fasta_source" value="phiX174"/>
35 <param name="sequence_name" value="sequence_name"/> 80 <param name="sequence_name" value="sequence_name"/>
36 <param name="sequence_id" value="sequence_id"/> 81 <param name="sequence_id" value="sequence_id"/>
37 <output name="out_file"> 82 <output name="out_file">
38 <assert_contents> 83 <assert_contents>
39 <has_line line='{"data_tables": {"salmon_indexes_versioned": [{"dbkey": "phiX174", "name": "sequence_name", "path": "sequence_id", "value": "sequence_id", "version": "q7"}]}}' /> 84 <has_text text='"salmon_indexes_versioned"' />
85 <has_text text='"dbkey": "phiX174"' />
86 <has_text text='"name": "sequence_name"' />
87 <has_text text='"value": "sequence_id"' />
88 <has_text text='"version": "q7"' />
89 <has_text text='"path":' />
90 </assert_contents>
91 </output>
92 </test>
93 <test>
94 <param name="transcriptome" value="phiX174"/>
95 <param name="all_fasta_source" value="phiX174"/>
96 <param name="sequence_name" value=""/>
97 <param name="sequence_id" value=""/>
98 <output name="out_file">
99 <assert_contents>
100 <has_text text='"salmon_indexes_versioned"' />
101 <has_text text='"dbkey": "phiX174"' />
102 <has_text text='"name": "phiX174"' />
103 <has_text text='"value": "phiX174"' />
104 <has_text text='"version": "q7"' />
105 <has_text text='"path":' />
40 </assert_contents> 106 </assert_contents>
41 </output> 107 </output>
42 </test> 108 </test>
43 </tests> 109 </tests>
44 <help> 110 <help>
45 <![CDATA[ 111 <![CDATA[
46 .. class:: infomark 112 .. class:: infomark
47 113
48 **Notice:** If you leave name, description, or id blank, it will be generated automatically. 114 Indices are constructed as described here: https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/
115
116 See also https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode
117
118 **Notice:** If you leave name, description, or id blank, it the dbkey of the genome will be used.
49 ]]> 119 ]]>
50 </help> 120 </help>
51 <citations> 121 <citations>
52 <citation type="doi">https://doi.org/10.1038/nmeth.4197</citation> 122 <citation type="doi">https://doi.org/10.1038/nmeth.4197</citation>
53 </citations> 123 </citations>