data_manager_salmon_index_builder: data_manager/salmon_index

comparison data_manager/salmon_index_builder.xml @ 5:befb1da9de9a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_salmon_index_builder commit aed25572a6ac6a1f8acc72bb25ed3c337a623696

author	iuc
date	Thu, 16 Oct 2025 20:11:34 +0000
parents	eeb725655afc
children	0b41709046cf

comparison

equal deleted inserted replaced

-:056c07f9900c
+:befb1da9de9a
-<tool id="salmon_index_builder_data_manager" name="Salmon" tool_type="manage_data" version="1.3.0" profile="19.01">
+<tool id="salmon_index_builder_data_manager" name="Salmon" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.0">
 <description>index builder</description>
+<macros>
+<token name="@TOOL_VERSION@">1.3.0</token>
+<token name="@VERSION_SUFFIX@">1</token>
+<token name="@PROFILE@">24.0</token>
+<token name="@IDX_VERSION@">q7</token>
+</macros>
 <requirements>
-<requirement type="package" version="1.3.0">salmon</requirement>
+<requirement type="package" version="@TOOL_VERSION@">salmon</requirement>
-<requirement type="package" version="3.7">python</requirement>
 </requirements>
-<macros>
-<token name="@IDX_VERSION@">q7</token>
-</macros>
 <command detect_errors="exit_code"><![CDATA[
-python '$__tool_directory__/salmon_index_builder.py' --output '${out_file}'
+## https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/
---fasta_filename '${all_fasta_source.fields.path}'
+## https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode
---fasta_dbkey '${all_fasta_source.fields.dbkey}'
---fasta_description '${all_fasta_source.fields.name}'
+#for $transcripts in $transcriptome.fields.path.split(",")
---kmer_size "${kmer_size}"
+(zcat '$transcripts' 2>/dev/null || cat '$transcripts') >> gentrome.fa &&
---data_table_name salmon_indexes_versioned
+#end for
---index_version @IDX_VERSION@
+(zcat '$all_fasta_source.fields.path' 2>/dev/null || cat '$all_fasta_source.fields.path') >> gentrome.fa &&
-]]>
-</command>
+(zcat '$all_fasta_source.fields.path' 2>/dev/null || cat '$all_fasta_source.fields.path') | awk '{if($1 ~ /^>/) print $1}' | cut -c2- | tr -d " " > decoys.txt &&
+mkdir '$out_file.extra_files_path' &&
+salmon index
+-k $kmer_size
+-t gentrome.fa
+-d decoys.txt
+-i '$out_file.extra_files_path'
+-p "\${GALAXY_SLOTS:-12}"
+$gencode
+&&
+cp '$dmjson' '$out_file'
+]]></command>
+<configfiles>
+<configfile name="dmjson"><![CDATA[{
+#if str($sequence_id).strip() == ""
+#set sequence_id = $all_fasta_source.fields.dbkey
+#end if
+#if str($sequence_name).strip() == ""
+#set sequence_name = $all_fasta_source.fields.dbkey
+#end if
+"data_tables":{
+"salmon_indexes_versioned":[
+{
+"value": "$sequence_id",
+"dbkey": "$all_fasta_source.fields.dbkey",
+"name": "$sequence_name",
+"path": "$out_file.extra_files_path",
+"version": "@IDX_VERSION@"
+}
+]
+}
+}]]></configfile>
+</configfiles>
 <inputs>
-<param label="Source FASTA Sequence" name="all_fasta_source" type="select">
+<param label="Transcriptome sequences" name="transcriptome" optional="false" type="select">
+<options from_data_table="transcriptomes" />
+</param>
+<param label="Genome" name="all_fasta_source" optional="false" type="select">
 <options from_data_table="all_fasta" />
 </param>
 <param name="sequence_name" type="text" value="" label="Name of sequence" />
 <param name="sequence_id" type="text" value="" label="ID for sequence" />
-<param name="kmer_size" type="integer" optional='true' value="21" max="32" label="The size of the k-mer on which the index is built"
+<param name="kmer_size" type="integer" optional='true' value="31" max="32" label="The size of the k-mer on which the index is built"
-help="There is a tradeoff here between the distinctiveness of the k-mers and their robustness to errors. The shorter the k-mers, the more robust they will be to errors in the reads, but the longer the k-mers, the more distinct they will be.  We generally recommend using a k-mer size of at least 20. MUST BE AN ODD VALUE "/>
+help="There is a tradeoff here between the distinctiveness of the k-mers and their robustness to errors. The shorter the k-mers, the more robust they will be to errors in the reads, but the longer the k-mers, the more distinct they will be.  We generally recommend using a k-mer size of at least 20. MUST BE AN ODD VALUE ">
+<validator type="expression" message="Only odd values">value % 2 == 1</validator>
+</param>
+<param name="gencode" type="boolean" label="Transcript sequences are in gencode format" truevalue="--gencode" falsevalue="" checked="false" help="Will split  the transcript name at the first '|' character. These reduced names will be used in the output  and when looking for these transcripts in a gene to transcript GTF."/>
 </inputs>
 <outputs>
 <data name="out_file" format="data_manager_json" />
 </outputs>
 <tests>
 <test>
+<param name="transcriptome" value="phiX174"/>
 <param name="all_fasta_source" value="phiX174"/>
 <param name="sequence_name" value="sequence_name"/>
 <param name="sequence_id" value="sequence_id"/>
 <output name="out_file">
 <assert_contents>
-<has_line line='{"data_tables": {"salmon_indexes_versioned": [{"dbkey": "phiX174", "name": "sequence_name", "path": "sequence_id", "value": "sequence_id", "version": "q7"}]}}' />
+<has_text text='"salmon_indexes_versioned"' />
+<has_text text='"dbkey": "phiX174"' />
+<has_text text='"name": "sequence_name"' />
+<has_text text='"value": "sequence_id"' />
+<has_text text='"version": "q7"' />
+<has_text text='"path":' />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="transcriptome" value="phiX174"/>
+<param name="all_fasta_source" value="phiX174"/>
+<param name="sequence_name" value=""/>
+<param name="sequence_id" value=""/>
+<output name="out_file">
+<assert_contents>
+<has_text text='"salmon_indexes_versioned"' />
+<has_text text='"dbkey": "phiX174"' />
+<has_text text='"name": "phiX174"' />
+<has_text text='"value": "phiX174"' />
+<has_text text='"version": "q7"' />
+<has_text text='"path":' />
 </assert_contents>
 </output>
 </test>
 </tests>
 <help>
 <![CDATA[
 .. class:: infomark
-**Notice:** If you leave name, description, or id blank, it will be generated automatically.
+Indices are constructed as described here: https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/
+See also https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode
+**Notice:** If you leave name, description, or id blank, it the dbkey of the genome will be used.
 ]]>
 </help>
 <citations>
 <citation type="doi">https://doi.org/10.1038/nmeth.4197</citation>
 </citations>

Mercurial > repos > iuc > data_manager_salmon_index_builder

comparison data_manager/salmon_index_builder.xml @ 5:befb1da9de9a draft