Mercurial > repos > iuc > concoct
comparison concoct.xml @ 0:bf23c688912a draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 49b42f61ff37c3c33dd15c195e5705e1db066c37"
| author | iuc |
|---|---|
| date | Fri, 18 Feb 2022 14:16:53 +0000 |
| parents | |
| children | 1f4286d836a3 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:bf23c688912a |
|---|---|
| 1 <tool id="concoct" name="CONCOCT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>metagenome binning</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"/> | |
| 7 <command detect_errors="exit_code"><![CDATA[ | |
| 8 #set pca_components_file_name = 'PCA_components_data_gt' + str($advanced.length_threshold) + '.csv' | |
| 9 #set pca_transformed_file_name = 'PCA_transformed_data_gt' + str($advanced.length_threshold) + '.csv' | |
| 10 #set clustering_file_name = 'clustering_gt' + str($advanced.length_threshold) + '.csv' | |
| 11 | |
| 12 ## CONCOCT doesn't handle gzipped files. | |
| 13 #if $composition_file.ext.endswith(".gz") | |
| 14 gunzip -c '$composition_file' > composition_file.fa && | |
| 15 #else: | |
| 16 ln -s '$composition_file' composition_file.fa && | |
| 17 #end if | |
| 18 | |
| 19 mkdir outdir && | |
| 20 concoct | |
| 21 --coverage_file '$coverage_file' | |
| 22 --composition_file composition_file.fa | |
| 23 --clusters $advanced.clusters | |
| 24 --kmer_length $advanced.kmer_length | |
| 25 --threads \${GALAXY_SLOTS:-4} | |
| 26 --length_threshold $advanced.length_threshold | |
| 27 --read_length $advanced.read_length | |
| 28 --total_percentage_pca $advanced.total_percentage_pca | |
| 29 --basename 'outdir/' | |
| 30 --seed $advanced.seed | |
| 31 --iterations $advanced.iterations | |
| 32 --epsilon $advanced.epsilon | |
| 33 $advanced.no_cov_normalization | |
| 34 $advanced.no_total_coverage | |
| 35 --no_original_data | |
| 36 $advanced.converge_out | |
| 37 | |
| 38 ## Convert all CONCOCT .csv outputs to tabular. | |
| 39 && sed 's/\("\([^"]*\)"\)\?,/\2\t/g' outdir/$pca_components_file_name > '$output_pca_components' | |
| 40 && sed 's/\("\([^"]*\)"\)\?,/\2\t/g' outdir/$pca_transformed_file_name > '$output_pca_transformed' | |
| 41 && sed 's/\("\([^"]*\)"\)\?,/\2\t/g' outdir/$clustering_file_name > '$output_clustering' | |
| 42 #if str($advanced.output_process_log) == 'yes': | |
| 43 && mv outdir/log.txt '$process_log' | |
| 44 #end if | |
| 45 ]]></command> | |
| 46 <inputs> | |
| 47 <param argument="--coverage_file" type="data" format="tabular" label="Tabular coverage file" help="Columns correspond to samples and rows to contigs"/> | |
| 48 <param argument="--composition_file" type="data" format="fasta,fasta.gz" label="Fasta file" help="Used to calculate the kmer composition (the genomic signature) of each contig"/> | |
| 49 <section name="advanced" title="Advanced options"> | |
| 50 <param argument="--clusters" type="integer" value="400" label="Maximum number of clusters for the Variational Gaussian Mixture Model algorithm"/> | |
| 51 <param argument="--kmer_length" type="integer" value="4" label="Kmer length"/> | |
| 52 <param argument="--length_threshold" type="integer" value="1000" label="Sequence length threshold" help="Contigs shorter than this value will not be included"/> | |
| 53 <param argument="--read_length" type="integer" value="100" label="Read length for coverage"/> | |
| 54 <param argument="--total_percentage_pca" type="integer" value="100" label="Percentage of variance explained by the principal components for the combined data"/> | |
| 55 <param argument="--seed" type="integer" min="0" value="1" label="Integer to use as seed for clustering" help="Zero value will use random seed"/> | |
| 56 <param argument="--iterations" type="integer" value="500" label="Maximum number of iterations for the Variational Bayes Gaussian Mixture Models"/> | |
| 57 <param argument="--epsilon" type="float" value="0.000001" label="Epsilon for the Variational Gaussian Mixture Model algorithm"/> | |
| 58 <param argument="--no_cov_normalization" type="boolean" truevalue="--no_cov_normalization" falsevalue="" checked="false" label="Skip normalization and only do log transorm of the coverage?" help="By default, the coverage is normalized for samples, then normalized for contigs and finally log transformed"/> | |
| 59 <param argument="--no_total_coverage" type="boolean" truevalue="--no_total_coverage" falsevalue="" checked="false" label="Eliminate the total coverage column from the coverage data matrix?" help="By default, total coverage is included, independently of coverage normalization but previous to log transformation"/> | |
| 60 <param argument="--converge_out" type="boolean" truevalue="--converge_out" falsevalue="" checked="false" label="Output convergence information?"/> | |
| 61 <param name="output_process_log" type="select" label="Output process log file?"> | |
| 62 <option value="no" selected="true">No</option> | |
| 63 <option value="yes">Yes</option> | |
| 64 </param> | |
| 65 </section> | |
| 66 </inputs> | |
| 67 <outputs> | |
| 68 <data name="process_log" format="txt" label="${tool.name} on ${on_string} (process log)"> | |
| 69 <filter>advanced['output_process_log'] == 'yes'</filter> | |
| 70 </data> | |
| 71 <data name="output_pca_components" format="tabular" label="${tool.name} on ${on_string} (PCA components)"/> | |
| 72 <data name="output_pca_transformed" format="tabular" label="${tool.name} on ${on_string} (PCA transformed)"/> | |
| 73 <data name="output_clustering" format="tabular" label="${tool.name} on ${on_string} (Clusters)"/> | |
| 74 </outputs> | |
| 75 <tests> | |
| 76 <test expect_num_outputs="4"> | |
| 77 <param name="coverage_file" value="input1.tabular" ftype="tabular"/> | |
| 78 <param name="composition_file" value="input1.fa.gz" ftype="fasta.gz"/> | |
| 79 <param name="output_process_log" value="yes"/> | |
| 80 <output name="process_log" file="process_log.txt" ftype="txt" compare="contains"/> | |
| 81 <output name="output_pca_components" ftype="tabular"> | |
| 82 <assert_contents> | |
| 83 <has_size value="367636"/> | |
| 84 <has_text text="7377051e-02"/> | |
| 85 </assert_contents> | |
| 86 </output> | |
| 87 <output name="output_pca_transformed" ftype="tabular"> | |
| 88 <assert_contents> | |
| 89 <has_size value="737926"/> | |
| 90 <has_text text="NODE_103_length_20202_cov_8.395357.0"/> | |
| 91 </assert_contents> | |
| 92 </output> | |
| 93 <output name="output_clustering" ftype="tabular"> | |
| 94 <assert_contents> | |
| 95 <has_size value="12167"/> | |
| 96 <has_text text="NODE_103_length_20202_cov_8.395357"/> | |
| 97 </assert_contents> | |
| 98 </output> | |
| 99 </test> | |
| 100 </tests> | |
| 101 <help><![CDATA[ | |
| 102 **What it does** | |
| 103 | |
| 104 CONCOCT (Clustering cONtigs with COverage and ComposiTion) performs unsupervised binning of metagenomic contigs by | |
| 105 using nucleotide composition - kmer frequencies - and coverage data for multiple samples. CONCOCT can accurately | |
| 106 (up to species level) bin metagenomic contigs. | |
| 107 | |
| 108 The tool accepts 2 inputs; a tabular file where each row corresponds to a contig and each column corresponds to a | |
| 109 sample (the values are the average coverage for this contig in that sample) and a file containing sequences in | |
| 110 fasta format. | |
| 111 | |
| 112 Three are produced; clustering of the > 1000 kmer count, the PCA transformed matrix and the PCA components. | |
| 113 | |
| 114 ]]></help> | |
| 115 <expand macro="citations"/> | |
| 116 </tool> |
