Mercurial > repos > greg > gene_family_classifier
comparison gene_family_classifier.xml @ 30:e9d9632762ca draft
Uploaded
author | greg |
---|---|
date | Mon, 30 Jan 2017 14:48:51 -0500 |
parents | 59d1ec149c64 |
children | 80e9d6e098ec |
comparison
equal
deleted
inserted
replaced
29:59d1ec149c64 | 30:e9d9632762ca |
---|---|
11 <regex match="Error:" /> | 11 <regex match="Error:" /> |
12 <regex match="Exception:" /> | 12 <regex match="Exception:" /> |
13 </stdio> | 13 </stdio> |
14 <command> | 14 <command> |
15 <![CDATA[ | 15 <![CDATA[ |
16 #set output_label = "" | |
16 #if str($options_type.options_type_selector) == 'advanced': | 17 #if str($options_type.options_type_selector) == 'advanced': |
17 #set create_orthogroup_cond = $options_type.create_orthogroup_cond | 18 #set create_orthogroup_cond = $options_type.create_orthogroup_cond |
18 #set create_orthogroup = $create_orthogroup_cond.create_orthogroup | 19 #set create_orthogroup = $create_orthogroup_cond.create_orthogroup |
19 | 20 |
20 #if str($create_orthogroup) == 'yes': | 21 #if str($create_orthogroup) == 'yes': |
39 #set create_corresponding_coding_sequences = False | 40 #set create_corresponding_coding_sequences = False |
40 #end if | 41 #end if |
41 | 42 |
42 GeneFamilyClassifier | 43 GeneFamilyClassifier |
43 --proteins "$input" | 44 --proteins "$input" |
45 --scaffold_dir "${GALAXY_DATA_INDEX_DIR}/plant_tribes/scaffolds" | |
44 --scaffold "$scaffold" | 46 --scaffold "$scaffold" |
45 --method $method | 47 --method $method |
46 --classifier $save_hmmscan_log_cond.classifier | 48 --classifier $save_hmmscan_log_cond.classifier |
49 --num_threads \${GALAXY_SLOTS:-4} | |
47 | 50 |
48 #if str($options_type.options_type_selector) == 'advanced': | 51 #if str($options_type.options_type_selector) == 'advanced': |
49 --super_orthogroups $options_type.super_orthogroups | 52 --super_orthogroups $options_type.super_orthogroups |
50 #if str($options_type.single_copy_cond) == 'taxa': | 53 #if str($options_type.single_copy_cond) == 'taxa': |
51 --single_copy_taxa $options_type.single_copy_cond.single_copy_taxa | 54 --single_copy_taxa $options_type.single_copy_cond.single_copy_taxa |
52 --taxa_present $options_type.single_copy_cond.taxa_present | 55 --taxa_present $options_type.single_copy_cond.taxa_present |
53 #end if | 56 #end if |
54 #if str($create_orthogroup) == 'yes': | 57 #if str($create_orthogroup) == 'yes': |
55 --orthogroup_fasta | 58 --orthogroup_fasta |
56 #if $create_corresponding_coding_sequences: | 59 #if $create_corresponding_coding_sequences: |
60 #set output_label = "Gene family clusters with corresponding coding sequences" | |
57 --coding_sequences "$create_corresponding_coding_sequences_cond.coding_sequences" | 61 --coding_sequences "$create_corresponding_coding_sequences_cond.coding_sequences" |
62 #else: | |
63 #set output_label = "Gene family clusters" | |
58 #end if | 64 #end if |
59 #end if | 65 #end if |
60 #end if | 66 #end if |
61 | |
62 --scaffold_dir "${GALAXY_DATA_INDEX_DIR}/plant_tribes/scaffolds" | |
63 --num_threads \${GALAXY_SLOTS:-4} | |
64 | 67 |
65 #if str($save_hmmscan_log_cond.classifier) == 'hmmscan' or str($save_hmmscan_log_cond.classifier) == 'both': | 68 #if str($save_hmmscan_log_cond.classifier) == 'hmmscan' or str($save_hmmscan_log_cond.classifier) == 'both': |
66 #if str($save_hmmscan_log_cond.save_hmmscan_log) == 'yes': | 69 #if str($save_hmmscan_log_cond.save_hmmscan_log) == 'yes': |
67 && mv geneFamilyClassification_dir/hmmscan.log $hmmscan_log | 70 && mv geneFamilyClassification_dir/hmmscan.log $hmmscan_log |
68 #else: | 71 #else: |
70 #end if | 73 #end if |
71 #end if | 74 #end if |
72 | 75 |
73 #if $create_ortho_sequences: | 76 #if $create_ortho_sequences: |
74 #if $create_corresponding_coding_sequences: | 77 #if $create_corresponding_coding_sequences: |
75 && echo "Sequences classified into precomputed orthologous plant gene family clusters with corresponding coding sequences: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output | 78 && echo "# Precomputed orthologous gene family clusters with corresponding coding sequences: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output |
76 #else: | 79 #else: |
77 && echo "Sequences classified into precomputed orthologous plant gene family clusters: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output | 80 && echo "# Precomputed orthologous gene family clusters: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output |
78 #end if | 81 #end if |
79 && ls -al $orthogroups_fasta_src_dir | grep f >> $output | 82 && ls -al $orthogroups_fasta_src_dir | grep f >> $output |
80 && mv $orthogroups_fasta_src_dir/* $dest_dir || true | 83 && mv $orthogroups_fasta_src_dir/* $dest_dir || true |
81 #end if | 84 #end if |
82 | 85 |
159 </inputs> | 162 </inputs> |
160 <outputs> | 163 <outputs> |
161 <data name="hmmscan_log" format="txt" label="Protein classification hmmscan.log on ${on_string}"> | 164 <data name="hmmscan_log" format="txt" label="Protein classification hmmscan.log on ${on_string}"> |
162 <filter>save_hmmscan_log_cond['classifier'] in ['hmmscan', 'both'] and save_hmmscan_log_cond['save_hmmscan_log'] == 'yes'</filter> | 165 <filter>save_hmmscan_log_cond['classifier'] in ['hmmscan', 'both'] and save_hmmscan_log_cond['save_hmmscan_log'] == 'yes'</filter> |
163 </data> | 166 </data> |
164 <data name="output" format="txt" label="Sequences classified into gene family clusters on ${on_string}"> | 167 <data name="output" format="txt" label="${output_label} on ${on_string}"> |
165 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes'</filter> | 168 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes'</filter> |
166 </data> | 169 </data> |
167 <collection name="transcripts" type="list"> | 170 <collection name="orthos" type="list"> |
168 <discover_datasets pattern="__name__" directory="geneFamilyClassification_dir" visible="false" ext="tabular" /> | 171 <discover_datasets pattern="__name__" directory="geneFamilyClassification_dir" visible="false" ext="tabular" /> |
169 </collection> | 172 </collection> |
170 </outputs> | 173 </outputs> |
171 <tests> | 174 <tests> |
172 <test> | 175 <test> |
173 <param name="input" value="" ftype="fasta" /> | 176 <!-- Not sure how to test this since the tool requires scaffolds data which is extremely large and installed using a Data Manager --> |
174 <param name="prediction_method" value="transdecoder"/> | 177 <param name="input" value="transcripts.cleaned.nr.pep" ftype="fasta" /> |
175 <param name="target_gene_family_assembly" value="no"/> | 178 <param name="scaffold" value="22Gv1.1"/> |
176 <param name="strand_specific" value="yes"/> | 179 <param name="method" value="orthomcl"/> |
180 <param name="classifier" value="blastp"/> | |
177 <param name="dereplicate" value="yes"/> | 181 <param name="dereplicate" value="yes"/> |
178 <param name="min_length" value="200"/> | 182 <param name="min_length" value="200"/> |
179 <output_collection name="orthos" type="list"> | 183 <output_collection name="orthos" type="list"> |
180 | 184 <element name="proteins.blastp.22Gv1.1" file="proteins.blastp.22Gv1.1" ftype="tabular" compare="contains"/> |
185 <element name="proteins.blastp.22Gv1.1.bestOrthos" file="proteins.blastp.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/> | |
186 <element name="proteins.blastp.22Gv1.1.bestOrthos.summary" file="proteins.blastp.22Gv1.1.bestOrthos.summary" ftype="tabular" compare="contains"/> | |
181 </output_collection> | 187 </output_collection> |
182 </test> | 188 </test> |
183 </tests> | 189 </tests> |
184 <help> | 190 <help> |
185 This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of | 191 This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of |
186 complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. This tool classifies gene | 192 complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. This tool classifies gene |
187 sequences into precomputed orthologous gene family clusters using either blastp (faster), HMMScan (slower but more sensitive | 193 sequences into precomputed orthologous gene family clusters using either blastp (faster), HMMScan (slower but more sensitive |
188 to remote homologs) or both (more exhaustive). | 194 to remote homologs) or both (more exhaustive). |
189 | 195 |
190 This tool accepts any of the following as input. | 196 This tool accepts any of the following as input: |
191 | 197 |
192 * the postprocessed assemblies produced by the **Postprocess de novo assembly transcripts into putative coding sequences** tool | 198 * the postprocessed assemblies produced by the **Postprocess de novo assembly transcripts into putative coding sequences** tool |
193 * externally predicted coding sequences and their corresponding amino acid translations derived from a transcriptome assembly | 199 * externally predicted coding sequences and their corresponding amino acid translations derived from a transcriptome assembly |
194 * gene predictions from a sequenced genome | 200 * gene predictions from a sequenced genome |
195 | 201 |