comparison gene_family_classifier.xml @ 30:e9d9632762ca draft

Uploaded
author greg
date Mon, 30 Jan 2017 14:48:51 -0500
parents 59d1ec149c64
children 80e9d6e098ec
comparison
equal deleted inserted replaced
29:59d1ec149c64 30:e9d9632762ca
11 <regex match="Error:" /> 11 <regex match="Error:" />
12 <regex match="Exception:" /> 12 <regex match="Exception:" />
13 </stdio> 13 </stdio>
14 <command> 14 <command>
15 <![CDATA[ 15 <![CDATA[
16 #set output_label = ""
16 #if str($options_type.options_type_selector) == 'advanced': 17 #if str($options_type.options_type_selector) == 'advanced':
17 #set create_orthogroup_cond = $options_type.create_orthogroup_cond 18 #set create_orthogroup_cond = $options_type.create_orthogroup_cond
18 #set create_orthogroup = $create_orthogroup_cond.create_orthogroup 19 #set create_orthogroup = $create_orthogroup_cond.create_orthogroup
19 20
20 #if str($create_orthogroup) == 'yes': 21 #if str($create_orthogroup) == 'yes':
39 #set create_corresponding_coding_sequences = False 40 #set create_corresponding_coding_sequences = False
40 #end if 41 #end if
41 42
42 GeneFamilyClassifier 43 GeneFamilyClassifier
43 --proteins "$input" 44 --proteins "$input"
45 --scaffold_dir "${GALAXY_DATA_INDEX_DIR}/plant_tribes/scaffolds"
44 --scaffold "$scaffold" 46 --scaffold "$scaffold"
45 --method $method 47 --method $method
46 --classifier $save_hmmscan_log_cond.classifier 48 --classifier $save_hmmscan_log_cond.classifier
49 --num_threads \${GALAXY_SLOTS:-4}
47 50
48 #if str($options_type.options_type_selector) == 'advanced': 51 #if str($options_type.options_type_selector) == 'advanced':
49 --super_orthogroups $options_type.super_orthogroups 52 --super_orthogroups $options_type.super_orthogroups
50 #if str($options_type.single_copy_cond) == 'taxa': 53 #if str($options_type.single_copy_cond) == 'taxa':
51 --single_copy_taxa $options_type.single_copy_cond.single_copy_taxa 54 --single_copy_taxa $options_type.single_copy_cond.single_copy_taxa
52 --taxa_present $options_type.single_copy_cond.taxa_present 55 --taxa_present $options_type.single_copy_cond.taxa_present
53 #end if 56 #end if
54 #if str($create_orthogroup) == 'yes': 57 #if str($create_orthogroup) == 'yes':
55 --orthogroup_fasta 58 --orthogroup_fasta
56 #if $create_corresponding_coding_sequences: 59 #if $create_corresponding_coding_sequences:
60 #set output_label = "Gene family clusters with corresponding coding sequences"
57 --coding_sequences "$create_corresponding_coding_sequences_cond.coding_sequences" 61 --coding_sequences "$create_corresponding_coding_sequences_cond.coding_sequences"
62 #else:
63 #set output_label = "Gene family clusters"
58 #end if 64 #end if
59 #end if 65 #end if
60 #end if 66 #end if
61
62 --scaffold_dir "${GALAXY_DATA_INDEX_DIR}/plant_tribes/scaffolds"
63 --num_threads \${GALAXY_SLOTS:-4}
64 67
65 #if str($save_hmmscan_log_cond.classifier) == 'hmmscan' or str($save_hmmscan_log_cond.classifier) == 'both': 68 #if str($save_hmmscan_log_cond.classifier) == 'hmmscan' or str($save_hmmscan_log_cond.classifier) == 'both':
66 #if str($save_hmmscan_log_cond.save_hmmscan_log) == 'yes': 69 #if str($save_hmmscan_log_cond.save_hmmscan_log) == 'yes':
67 && mv geneFamilyClassification_dir/hmmscan.log $hmmscan_log 70 && mv geneFamilyClassification_dir/hmmscan.log $hmmscan_log
68 #else: 71 #else:
70 #end if 73 #end if
71 #end if 74 #end if
72 75
73 #if $create_ortho_sequences: 76 #if $create_ortho_sequences:
74 #if $create_corresponding_coding_sequences: 77 #if $create_corresponding_coding_sequences:
75 && echo "Sequences classified into precomputed orthologous plant gene family clusters with corresponding coding sequences: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output 78 && echo "# Precomputed orthologous gene family clusters with corresponding coding sequences: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output
76 #else: 79 #else:
77 && echo "Sequences classified into precomputed orthologous plant gene family clusters: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output 80 && echo "# Precomputed orthologous gene family clusters: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output
78 #end if 81 #end if
79 && ls -al $orthogroups_fasta_src_dir | grep f >> $output 82 && ls -al $orthogroups_fasta_src_dir | grep f >> $output
80 && mv $orthogroups_fasta_src_dir/* $dest_dir || true 83 && mv $orthogroups_fasta_src_dir/* $dest_dir || true
81 #end if 84 #end if
82 85
159 </inputs> 162 </inputs>
160 <outputs> 163 <outputs>
161 <data name="hmmscan_log" format="txt" label="Protein classification hmmscan.log on ${on_string}"> 164 <data name="hmmscan_log" format="txt" label="Protein classification hmmscan.log on ${on_string}">
162 <filter>save_hmmscan_log_cond['classifier'] in ['hmmscan', 'both'] and save_hmmscan_log_cond['save_hmmscan_log'] == 'yes'</filter> 165 <filter>save_hmmscan_log_cond['classifier'] in ['hmmscan', 'both'] and save_hmmscan_log_cond['save_hmmscan_log'] == 'yes'</filter>
163 </data> 166 </data>
164 <data name="output" format="txt" label="Sequences classified into gene family clusters on ${on_string}"> 167 <data name="output" format="txt" label="${output_label} on ${on_string}">
165 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes'</filter> 168 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes'</filter>
166 </data> 169 </data>
167 <collection name="transcripts" type="list"> 170 <collection name="orthos" type="list">
168 <discover_datasets pattern="__name__" directory="geneFamilyClassification_dir" visible="false" ext="tabular" /> 171 <discover_datasets pattern="__name__" directory="geneFamilyClassification_dir" visible="false" ext="tabular" />
169 </collection> 172 </collection>
170 </outputs> 173 </outputs>
171 <tests> 174 <tests>
172 <test> 175 <test>
173 <param name="input" value="" ftype="fasta" /> 176 <!-- Not sure how to test this since the tool requires scaffolds data which is extremely large and installed using a Data Manager -->
174 <param name="prediction_method" value="transdecoder"/> 177 <param name="input" value="transcripts.cleaned.nr.pep" ftype="fasta" />
175 <param name="target_gene_family_assembly" value="no"/> 178 <param name="scaffold" value="22Gv1.1"/>
176 <param name="strand_specific" value="yes"/> 179 <param name="method" value="orthomcl"/>
180 <param name="classifier" value="blastp"/>
177 <param name="dereplicate" value="yes"/> 181 <param name="dereplicate" value="yes"/>
178 <param name="min_length" value="200"/> 182 <param name="min_length" value="200"/>
179 <output_collection name="orthos" type="list"> 183 <output_collection name="orthos" type="list">
180 184 <element name="proteins.blastp.22Gv1.1" file="proteins.blastp.22Gv1.1" ftype="tabular" compare="contains"/>
185 <element name="proteins.blastp.22Gv1.1.bestOrthos" file="proteins.blastp.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
186 <element name="proteins.blastp.22Gv1.1.bestOrthos.summary" file="proteins.blastp.22Gv1.1.bestOrthos.summary" ftype="tabular" compare="contains"/>
181 </output_collection> 187 </output_collection>
182 </test> 188 </test>
183 </tests> 189 </tests>
184 <help> 190 <help>
185 This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of 191 This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
186 complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. This tool classifies gene 192 complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. This tool classifies gene
187 sequences into precomputed orthologous gene family clusters using either blastp (faster), HMMScan (slower but more sensitive 193 sequences into precomputed orthologous gene family clusters using either blastp (faster), HMMScan (slower but more sensitive
188 to remote homologs) or both (more exhaustive). 194 to remote homologs) or both (more exhaustive).
189 195
190 This tool accepts any of the following as input. 196 This tool accepts any of the following as input:
191 197
192 * the postprocessed assemblies produced by the **Postprocess de novo assembly transcripts into putative coding sequences** tool 198 * the postprocessed assemblies produced by the **Postprocess de novo assembly transcripts into putative coding sequences** tool
193 * externally predicted coding sequences and their corresponding amino acid translations derived from a transcriptome assembly 199 * externally predicted coding sequences and their corresponding amino acid translations derived from a transcriptome assembly
194 * gene predictions from a sequenced genome 200 * gene predictions from a sequenced genome
195 201