Mercurial > repos > iuc > salsa
comparison salsa2.xml @ 4:41c4e48b0617 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/salsa2 commit 41b8952cd0739db0b5eb62d5dd9ccc7bc0f1925f
| author | iuc |
|---|---|
| date | Thu, 19 May 2022 14:16:51 +0000 |
| parents | 5af503c47367 |
| children | d6713454590c |
comparison
equal
deleted
inserted
replaced
| 3:5af503c47367 | 4:41c4e48b0617 |
|---|---|
| 1 <tool id="salsa" name="SALSA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> | 1 <tool id="salsa" name="SALSA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> |
| 2 <description>scaffold long read assemblies with Hi-C</description> | 2 <description>scaffold long read assemblies with Hi-C</description> |
| 3 <macros> | |
| 4 <token name="@TOOL_VERSION@">2.3</token> | |
| 5 <token name="@VERSION_SUFFIX@">3</token> | |
| 6 </macros> | |
| 3 <xrefs> | 7 <xrefs> |
| 4 <xref type="bio.tools">SALSA</xref> | 8 <xref type="bio.tools">SALSA</xref> |
| 5 </xrefs> | 9 </xrefs> |
| 6 <macros> | |
| 7 <token name="@TOOL_VERSION@">2.3</token> | |
| 8 <token name="@VERSION_SUFFIX@">2</token> | |
| 9 </macros> | |
| 10 <requirements> | 10 <requirements> |
| 11 <requirement type="package" version="@TOOL_VERSION@">salsa2</requirement> | 11 <requirement type="package" version="@TOOL_VERSION@">salsa2</requirement> |
| 12 <requirement type="package" version="1.11">samtools</requirement> | 12 <requirement type="package" version="1.11">samtools</requirement> |
| 13 </requirements> | 13 </requirements> |
| 14 <command detect_errors="exit_code"><![CDATA[ | 14 <command detect_errors="exit_code"><![CDATA[ |
| 37 -g '$gfa_file' | 37 -g '$gfa_file' |
| 38 #end if | 38 #end if |
| 39 #if $iter: | 39 #if $iter: |
| 40 -i '$iter' | 40 -i '$iter' |
| 41 #end if | 41 #end if |
| 42 #if $gensize: | |
| 43 -s '$gensize' | |
| 44 #end if | |
| 45 -m '$clean' | |
| 42 -o ./out | 46 -o ./out |
| 43 ]]></command> | 47 ]]></command> |
| 44 <inputs> | 48 <inputs> |
| 45 <param name="fasta_in" type="data" format="fasta" label="Initial assembly file" help="Headers must not contain ':'."/> | 49 <param name="fasta_in" type="data" format="fasta" label="Initial assembly file" help="Headers must not contain ':'."/> |
| 46 <param name="bed_file" type="data" format="bed" label="Bed alignment" help="To start scaffolding with SALSA, reads need to be mapped to the assembly. | 50 <param name="bed_file" type="data" format="bed" label="Bed alignment" help="To start scaffolding with SALSA, reads need to be mapped to the assembly. |
| 47 BWA or BOWTIE2 are recommended. SALSA requires a bed file as the input. The alignment bam file can be converted using the bamToBed command from | 51 BWA or BOWTIE2 are recommended. SALSA requires a bed file as the input. The alignment bam file can be converted using the bamToBed command from |
| 48 the Bedtools package."/> | 52 the Bedtools package."/> |
| 49 <param name="cutoff" argument="-c" type="integer" min="1" label="Cutoff" optional="true" help="Minimum contig length to scaffold"/> | 53 <param name="cutoff" argument="-c" type="integer" min="1" label="Cutoff" optional="true" help="Minimum contig length to scaffold"/> |
| 50 <param name="gfa_file" argument="-g" type="data" format="gfa1,gfa2" optional="true" label="Sequence graphs" | 54 <param name="gfa_file" argument="-g" type="data" format="gfa1,gfa2" optional="true" label="Sequence graphs" |
| 51 help="An assembly graph can be optionally provided to guide the scaffolding, potentially reducing the scaffolding errors"/> | 55 help="An assembly graph can be optionally provided to guide the scaffolding, potentially reducing the scaffolding errors"/> |
| 52 <conditional name="enzyme_conditional"> | 56 <conditional name="enzyme_conditional"> |
| 53 <param name="enzyme_options" type="select" label="Enzyme selection" help="Hi-C experiments can use different restriction enzymes. | 57 <param name="enzyme_options" type="select" label="Enzyme selection" help="Hi-C experiments can use different restriction enzymes. |
| 54 The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual | 58 The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual |
| 55 sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you | 59 sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you |
| 56 use an enzyme-free prep, e.g. Omin-C."> | 60 use an enzyme-free prep, e.g. Omin-C."> |
| 57 <option value="preconfigured">Preconfigured restriction enzymes</option> | 61 <option value="preconfigured">Preconfigured restriction enzymes</option> |
| 58 <option value="specific">Enter a specific sequence</option> | 62 <option value="specific">Enter a specific sequence</option> |
| 59 </param> | 63 </param> |
| 60 <when value="preconfigured"> | 64 <when value="preconfigured"> |
| 69 help="Restriction enzyme sequence. If multiple were used, include all as a comma separated list without spaces (ex. 'GATC,AAGCTT')."> | 73 help="Restriction enzyme sequence. If multiple were used, include all as a comma separated list without spaces (ex. 'GATC,AAGCTT')."> |
| 70 <validator type="expression" message="Only alphabetical letters and the comma can be used in to define restriction enzym sequences.">value.replace(',', '').isalpha()</validator> | 74 <validator type="expression" message="Only alphabetical letters and the comma can be used in to define restriction enzym sequences.">value.replace(',', '').isalpha()</validator> |
| 71 </param> | 75 </param> |
| 72 </when> | 76 </when> |
| 73 </conditional> | 77 </conditional> |
| 74 <param name="iter" argument="-i" type="integer" min="0" max="20" label="Iterations" optional="true" | 78 <param name="iter" argument="-i" type="integer" min="0" max="20" label="Iterations" optional="true" |
| 75 help="SALSA will scaffold through sequential iterations. The default number of iterations is 3. Increasing the number of iterations will | 79 help="SALSA will scaffold through sequential iterations. The default number of iterations is 3. Increasing the number of iterations will |
| 76 potentially increase the number of joins, however it could also introduce additional misjoins"/> | 80 potentially increase the number of joins, however it could also introduce additional misjoins"/> |
| 81 <param name="clean" argument="-m" type="boolean" label="Clean Assembly" checked="false" truevalue='yes' falsevalue="no" help="Set this option to 'yes' if you want to find misassemblies in input assembly" /> | |
| 82 <param name="gensize" argument="-s" type="integer" label="Expected Genome Size" optional="true" help="Expected Genome size of the assembled genome. If not set, Salsa will estimate genome size." /> | |
| 77 </inputs> | 83 </inputs> |
| 78 <outputs> | 84 <outputs> |
| 79 <data name="scaffolds_fasta" format="fasta" from_work_dir="out/scaffolds_FINAL.fasta" label="${tool.name} on ${on_string}: FASTA assembly"/> | 85 <data name="scaffolds_fasta" format="fasta" from_work_dir="out/scaffolds_FINAL.fasta" label="${tool.name} on ${on_string}: FASTA assembly"/> |
| 80 <data name="scaffolds_agp" format="tabular" from_work_dir="out/scaffolds_FINAL.agp" label="${tool.name} on ${on_string}: agp output"/> | 86 <data name="scaffolds_agp" format="tabular" from_work_dir="out/scaffolds_FINAL.agp" label="${tool.name} on ${on_string}: agp output"/> |
| 81 </outputs> | 87 </outputs> |
| 85 <param name="length" value="test.fai"/> | 91 <param name="length" value="test.fai"/> |
| 86 <param name="bed_file" value="test.bed"/> | 92 <param name="bed_file" value="test.bed"/> |
| 87 <param name="gfa_file" value="test.gfa1"/> | 93 <param name="gfa_file" value="test.gfa1"/> |
| 88 <conditional name="enzyme_conditional"> | 94 <conditional name="enzyme_conditional"> |
| 89 <param name="enzyme_options" value="specific"/> | 95 <param name="enzyme_options" value="specific"/> |
| 90 <param name="manual_enzyme" value="GATC,GANTC"/> | 96 <param name="manual_enzyme" value="GATC,GANTC"/> |
| 91 </conditional> | 97 </conditional> |
| 92 <param name="enzyme" value="GATC,GANTC"/> | 98 <param name="enzyme" value="GATC,GANTC"/> |
| 93 <param name="cutoff" value="1000"/> | 99 <param name="cutoff" value="1000"/> |
| 94 <param name="iter" value="3"/> | 100 <param name="iter" value="3"/> |
| 101 <param name="clean" value="yes"/> | |
| 95 <output name="scaffolds_fasta" file="out.fasta"/> | 102 <output name="scaffolds_fasta" file="out.fasta"/> |
| 96 <output name="scaffolds_agp" file="out.agp"/> | 103 <output name="scaffolds_agp" file="out.agp"/> |
| 97 </test> | 104 </test> |
| 98 <!--Test manual enzyme--> | 105 <!--Test manual enzyme--> |
| 99 <test> | 106 <test> |
| 100 <param name="fasta_in" value="test.fasta"/> | 107 <param name="fasta_in" value="test.fasta"/> |
| 101 <param name="bed_file" value="test.bed"/> | 108 <param name="bed_file" value="test.bed"/> |
| 102 <param name="gfa_file" value="test.gfa1"/> | 109 <param name="gfa_file" value="test.gfa1"/> |
| 103 <conditional name="enzyme_conditional"> | 110 <conditional name="enzyme_conditional"> |
| 104 <param name="enzyme_options" value="specific"/> | 111 <param name="enzyme_options" value="specific"/> |
| 105 <param name="manual_enzyme" value="GATC,GANTC"/> | 112 <param name="manual_enzyme" value="GATC,GANTC"/> |
| 106 </conditional> | 113 </conditional> |
| 107 <param name="cutoff" value="1000"/> | 114 <param name="cutoff" value="1000"/> |
| 108 <param name="iter" value="3"/> | 115 <param name="iter" value="3"/> |
| 116 <param name="clean" value="yes"/> | |
| 109 <output name="scaffolds_fasta" file="out.fasta"/> | 117 <output name="scaffolds_fasta" file="out.fasta"/> |
| 110 <output name="scaffolds_agp" file="out.agp"/> | 118 <output name="scaffolds_agp" file="out.agp"/> |
| 111 </test> | 119 </test> |
| 112 <!--Test predefined enzyme--> | 120 <!--Test predefined enzyme--> |
| 113 <test> | 121 <test> |
| 114 <param name="fasta_in" value="test.fasta"/> | 122 <param name="fasta_in" value="test.fasta"/> |
| 115 <param name="bed_file" value="test.bed"/> | 123 <param name="bed_file" value="test.bed"/> |
| 116 <param name="gfa_file" value="test.gfa1"/> | 124 <param name="gfa_file" value="test.gfa1"/> |
| 117 <conditional name="enzyme_conditional"> | 125 <conditional name="enzyme_conditional"> |
| 118 <param name="enzyme_options" value="preconfigured"/> | 126 <param name="enzyme_options" value="preconfigured"/> |
| 119 <param name="preconfigured_enzymes" value="arima1"/> | 127 <param name="preconfigured_enzymes" value="arima1"/> |
| 120 </conditional> | 128 </conditional> |
| 121 <param name="cutoff" value="1000"/> | 129 <param name="cutoff" value="1000"/> |
| 122 <param name="iter" value="3"/> | 130 <param name="iter" value="3"/> |
| 131 <param name="clean" value="yes"/> | |
| 123 <output name="scaffolds_fasta" file="out.fasta"/> | 132 <output name="scaffolds_fasta" file="out.fasta"/> |
| 124 <output name="scaffolds_agp" file="out.agp"/> | 133 <output name="scaffolds_agp" file="out.agp"/> |
| 125 </test> | 134 </test> |
| 126 </tests> | 135 </tests> |
| 127 <help><![CDATA[ | 136 <help><![CDATA[ |
| 136 | 145 |
| 137 .. class:: infomark | 146 .. class:: infomark |
| 138 | 147 |
| 139 **Mapping reads** | 148 **Mapping reads** |
| 140 | 149 |
| 141 To start the scaffolding, first step is to map reads to the assembly. We recommend using `BWA <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.2>`_ | 150 To start the scaffolding, first step is to map reads to the assembly. We recommend using `BWA <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.2>`_ |
| 142 or `BOWTIE2 <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.4.2+galaxy0>`_ aligner to map reads. The read mapping generates a bam file. SALSA requires | 151 or `BOWTIE2 <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.4.2+galaxy0>`_ aligner to map reads. The read mapping generates a bam file. SALSA requires |
| 143 BED file as the input. This can be done using the bamToBed command from the `Bedtools package <http://bedtools.readthedocs.io/en/latest/>`_. Also, SALSA requires BED files to be sorted by the | 152 BED file as the input. This can be done using the bamToBed command from the `Bedtools package <http://bedtools.readthedocs.io/en/latest/>`_. Also, SALSA requires BED files to be sorted by the |
| 144 read name, rather than the alignment coordinates. Once you have bam file, you can run following commands to get the bam file needed as an input to SALSA. | 153 read name, rather than the alignment coordinates. Once you have bam file, you can run following commands to get the bam file needed as an input to SALSA. |
| 145 | 154 |
| 146 Since Hi-C reads and alignments contain experimental artifacts, the alignments needs some postprocessing. To align and postprocess | 155 Since Hi-C reads and alignments contain experimental artifacts, the alignments needs some postprocessing. To align and postprocess |
| 147 the alignments, you can use the pipeline released by Arima Genomics which can be found in the `GitHub repository <https://github.com/ArimaGenomics>`_. | 156 the alignments, you can use the pipeline released by Arima Genomics which can be found in the `GitHub repository <https://github.com/ArimaGenomics>`_. |
| 148 | 157 |
| 149 Additional information on how to generate/filter the bam `here <https://github.com/marbl/SALSA#mapping-reads>`_. | 158 Additional information on how to generate/filter the bam `here <https://github.com/marbl/SALSA#mapping-reads>`_. |
| 150 | 159 |
| 151 ]]></help> | 160 ]]></help> |
