Mercurial > repos > iuc > maaslin3
view maaslin3.xml @ 0:5f6edb762a2f draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/maaslin3 commit 2646923cefbef904855fd8716f975e86df216c09
| author | iuc |
|---|---|
| date | Thu, 22 Jan 2026 09:52:09 +0000 |
| parents | |
| children |
line wrap: on
line source
<tool id="maaslin3" name="MaAsLin3" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Microbiome Multivariable Association with Linear Models</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ mkdir 'output' && #if $fixed_effects: #set idx = [] #for $i in $fixed_effects: #silent idx.append(f'${i}') #end for #set idx_for_awk = ','.join(idx) fixed_effects=`awk -v OFS=',' -F"\t" 'NR == 1 { print $idx_for_awk }' "$input_metadata"` && echo 'Assigned fixed effects as:' \$fixed_effects && #end if #if $random_effects: #set idx = [] #for $i in $random_effects: #silent idx.append(f'${i}') #end for #set idx_for_awk = ','.join(idx) random_effects=`awk -v OFS=',' -F"\t" 'NR == 1 { print $idx_for_awk }' "$input_metadata"` && echo 'Assigned random effects as:' \$random_effects && #end if #if $group_effects: #set idx = [] #for $i in $group_effects: #silent idx.append(f'${i}') #end for #set idx_for_awk = ','.join(idx) group_effects=`awk -v OFS=',' -F"\t" 'NR == 1 { print $idx_for_awk }' "$input_metadata"` && echo 'Assigned group effects as:' \$group_effects && #end if #if $ordered_effects: #set idx = [] #for $i in $ordered_effects: #silent idx.append(f'${i}') #end for #set idx_for_awk = ','.join(idx) ordered_effects=`awk -v OFS=',' -F"\t" 'NR == 1 { print $idx_for_awk }' "$input_metadata"` && echo 'Assigned fixed effects as:' \$ordered_effects && #end if #if $strata_effects: strata_effects=`awk -v OFS=',' -F"\t" 'NR == 1 { print $strata_effects }' "$input_metadata"` && echo 'Assigned strata effects as:' \$strata_effects && #end if #if $covariate.feature_specific_covariate and $covariate.feature_specific_covariate_name: #set idx = [] #for $i in $covariate.feature_specific_covariate_name: #silent idx.append(f'${i}') #end for #set idx_for_awk = ','.join(idx) covariate.feature_specific_covariate_name=`awk -v OFS=',' -F"\t" 'NR == 1 { print $idx_for_awk }' "$covariate.feature_specific_covariate"` && echo 'Assigned covariate feature specific covariate name as:' \$covariate.feature_specific_covariate_name && #end if #if len($ref) != 0: #set ref_dict = [] #for $i, $r in enumerate($ref): #silent $ref_dict.append('%s' %(str($r.reference))) #end for #set final_ref = ';'.join(ref_dict) #end if ln -s '$input_data' 'input_data.tsv' && ln -s '$input_metadata' 'input_metadata.tsv' && #if $options.unscaled_abundance: ln -s '$options.unscaled_abundance' 'unscaled_abundance.tsv' && #end if #if $covariate.feature_specific_covariate: ln -s '$covariate.feature_specific_covariate' 'feature_specific_covariate.tsv' && #end if maaslin3.R 'input_data.tsv' 'input_metadata.tsv' 'output' #if $formula: --formula '~ $formula' #end if #if $fixed_effects: --fixed_effects \$fixed_effects #end if #if len($ref) != 0: --reference '$final_ref' #end if #if $random_effects: --random_effects \$random_effects #end if #if $group_effects: --group_effects \$group_effects #end if #if $ordered_effects: --ordered_effects \$ordered_effects #end if #if $strata_effects: --strata_effects \$strata_effects #end if #if $options.min_abundance: --min_abundance ${options.min_abundance} #end if #if $options.min_prevalence: --min_prevalence ${options.min_prevalence} #end if #if $options.zero_threshold: --zero-threshold ${options.zero_threshold} #end if #if $options.min_variance: --min_variance ${options.min_variance} #end if #if $options.max_significance: --max_significance $options.max_significance #end if --normalization '$options.normalization' --transform '$options.transform' --correction '$options.correction' ${options.standardize} #if $options.unscaled_abundance: --unscaled_abundance 'unscaled_abundance.tsv' #end if ${options.median_comparison_abundance} ${options.median_comparison_prevalence} #if $options.median_comparison_abundance_threshold: --median_comparison_abundance_threshold ${options.median_comparison_abundance_threshold} #end if #if $options.median_comparison_prevalence_threshold: --median_comparison_prevalence_threshold ${options.median_comparison_prevalence_threshold} #end if ${options.subtract_median} ${options.warn_prevalence} ${options.small_random_effects} ${options.augment} #if $options.evaluate_only != 'NULL': --evaluate_only '$options.evaluate_only' #end if #if $covariate.feature_specific_covariate: --feature_specific_covariate 'feature_specific_covariate.tsv' #end if #if $covariate.feature_specific_covariate and $covariate.feature_specific_covariate_name: --feature_specific_covariate_name \$covariate.feature_specific_covariate_name #end if #if $covariate.feature_specific_covariate and $covariate.feature_specific_covariate_record: --feature_specific_covariate_record '$covariate.feature_specific_covariate_record' #end if ${output.plot_summary_plot} #if $output.summary_plot_first_n: --summary_plot_first_n ${output.summary_plot_first_n} #end if #if $output.coef_plot_vars: --coef_plot_vars '$output.coef_plot_vars' #end if #if $output.heatmap_vars: --heatmap_vars '$output.heatmap_vars' #end if ${output.plot_associations} #if $output.max_pngs: --max_pngs ${output.max_pngs} #end if ${output.save_models} ${output.save_plots_rds} ${output.summary_plot_balanced} --cores "\${GALAXY_SLOTS:-1}" ]]></command> <inputs> <param name="input_data" type="data" format="tabular" label="Data file"/> <param name="input_metadata" type="data" format="tabular" label="Metadata file"/> <param argument="--formula" type="text" value="" label="Input formula (use header of columns for this)" help="Enter the formula which should the model use. NOTE: separate the name with and plus and have space between it like: 'header1 + header2 + ...' as an example. When no input is used all columns will be used"/> <param argument="--fixed_effects" type="data_column" data_ref="input_metadata" use_header_names="true" multiple="true" optional="true" label="Set fix effects" help="Select the fix effects for the model. DEFAULT: ALL"/> <repeat name="ref" title="Reference pairs"> <param argument="--reference" type="text" label="Reference" help="The factor to use as a reference for a variable with more than two levels provided as a string of 'variable,reference'. DEFAULT: NA"/> </repeat> <param argument="--random_effects" type="data_column" data_ref="input_metadata" use_header_names="true" multiple="true" optional="true" label="Set random effects" help="Select the random effects for the model. DEFAULT; NONE"/> <param argument="--group_effects" type="data_column" data_ref="input_metadata" use_header_names="true" multiple="true" optional="true" label="Set group effects" help="Select group effects for the model. DEFAULT: NONE"/> <param argument="--ordered_effects" type="data_column" data_ref="input_metadata" use_header_names="true" multiple="true" optional="true" label="Set ordered effects" help="Select the ordered effects for the model. DEFAULT: NONE"/> <param argument="--strata_effects" type="data_column" data_ref="input_metadata" use_header_names="true" optional="true" label="Set strata effects" help="Select only one strata effect for the model. DEFAULT: NONE"/> <section name="options" title="Additional options" expanded="true"> <param argument="--min_abundance" type="integer" value="0" label="Minimum abundance" help="Minimum abundance for each feature befor normalization and transformation"/> <param argument="--min_prevalence" type="integer" value="0" label="Mimimum prevalence" help="The minimum proportion of samples for which a feature is detected at minimum abundance"/> <param argument="--zero-threshold" type="integer" value="0" label="Zero threshold" help="The minimum abundance to be considered non-zero"/> <param argument="--min_variance" type="integer" value="0" label="Minimum variance" help="Keep features with variances greater than value"/> <param argument="--max_significance" type="float" value="0.1" label="Maximum significance" help="The q-value threshold for significance"/> <param argument="--normalization" type="select" label="Normalization method"> <option value="TSS" selected="true">TSS</option> <option value="CLR">CLR</option> <option value="NONE">NONE</option> </param> <param argument="--transform" type="select" label="Transform to apply"> <option value="LOG" selected="true">LOG</option> <option value="PLOG">PLOG</option> <option value="NONE">NONE</option> </param> <param argument="--correction" type="select" label="correction method for computing the q-value"> <option value="BH" selected="true">BH</option> <option value="BY">BY</option> <option value="bonferroni">bonferroni</option> <option value="holm">holm</option> <option value="hochberg">hochberg</option> <option value="hommel">hommel</option> <option value="fdr">fdr</option> <option value="none">none</option> </param> <param argument="--standardize" type="boolean" truevalue="--standardize TRUE" falsevalue="" checked="true" label="Standardize" help="Scale continuous metadata witth the z-score(converting the interpretation of its scale for a standardized scale)"/> <param argument="--unscaled_abundance" type="data" format="tabular" optional="true" label="unscaled abundance reference table" help="The table to use as an unscaled abundance reference. Example in the help section. NOTE: the single column name must be the same as one of the features or 'total'!"/> <param argument="--median_comparison_abundance" type="boolean" truevalue="--median_comparison_abundance TRUE" falsevalue="" checked="true" label="Median comparison abundance" help="Test abundance coefficients against the median association"/> <param argument="--median_comparison_prevalence" type="boolean" truevalue="--median_comparison_prevalence TRUE" falsevalue="--median_comparison_prevalence FALSE" checked="false" label="Median comparison prevalence" help="Test prevalence coefficients against the median association"/> <param argument="--median_comparison_abundance_threshold" type="integer" value="0" label="Median comparison abundance threshold" help="Radius within which the median adjustment gives a p-value of 1"/> <param argument="--median_comparison_prevalence_threshold" type="integer" value="0" label="Median comparison prevalence threshold" help="Radius within which the median adjustment gives a p-value of 1"/> <param argument="--subtract_median" type="boolean" truevalue="--subtract_median TRUE" falsevalue="--subtract_median FALSE" checked="false" label="Subtract median" help="Subtract the median from coefficients when doing median comparisons"/> <param argument="--warn_prevalence" type="boolean" truevalue="--warn_prevalence TRUE" falsevalue="" checked="true" label="Warn prevalence" help="Check and warn if prevalence associations are likely due to compositionality"/> <param argument="--small_random_effects" type="boolean" truevalue="--small_random_effects TRUE" falsevalue="--small_random_effects FALSE" checked="false" label="Small random effects" help=" Replace prevalence random effects with fixed effects because groups are small"/> <param argument="--augment" type="boolean" truevalue="--augment TRUE" falsevalue="--augment FALSE" checked="true" label="Augment" help="Add weighted extra 0s and 1s to avoid linear separability"/> <param argument="--evaluate_only" type="select" label="Evaluate only" help="Whether to evaluate just the abundnace or prevalence models"> <option value="NULL" selected="true">NULL</option> <option value="none">None</option> <option value="abundance">abundance</option> <option value="prevalence">prevalence</option> </param> </section> <section name="covariate" title="Feature specific covariate"> <param argument="--feature_specific_covariate" type="data" format="tabular" optional="true" label="feature-specific covariates table" help="The table to use for feature-specific covariates. Row and column names should match the data input. EXAMPLE IN THE HELP SECTION DOWN BELOW!"/> <param argument="--feature_specific_covariate_name" type="data_column" data_ref="feature_specific_covariate" use_header_names="true" multiple="true" optional="true" label="Set feature-specific covariate" help="Select the feature-specific covariate"/> <param argument="--feature_specific_covariate_record" type="text" optional="true" label="Set feature_specific_covariate data" help="Select which data (rows) should included for the feature-specific covariate"/> </section> <section name="output" title="Output options"> <param argument="--plot_summary_plot" type="boolean" truevalue="--plot_summary_plot TRUE" falsevalue="--plot_summary_plot FALSE" checked="true" label="Plot summary plot"/> <param argument="--summary_plot_first_n" type="integer" value="25" label="Summary plot first n" help="In summary plot, plot top N features with significant associations"/> <param argument="--coef_plot_vars" type="text" label="Coefficient plot variables" help="The variables to use in the coefficient plot section of the summary plot provided as a comma-separated string. Continuous variables should match the metadata column name, and categorical variables should be of the form: [metadata] [level]. DEFAULT: NA"/> <param argument="--heatmap_vars" type="text" label="Heatmap variables" help="The variables to use in the heatmap section of the summary plot provided as a comma-separated string. Continuous variables should match the metadata column name, and categorical variables should be of the form: [metadata] [level]. DEFAULT: NA"/> <param argument="--plot_associations" type="boolean" truevalue="--plot_associations TRUE" falsevalue="--plot_associations FALSE" checked="true" label="Plot associations"/> <param argument="--max_pngs" type="integer" value="30" label=" maximum number of association plots"/> <param argument="--save_models" type="boolean" truevalue="--save_models TRUE" falsevalue="--save_models FALSE" checked="false" label="Save model" help="Return the full model outputs and save to an RData file"/> <param argument="--save_plots_rds" type="boolean" truevalue="--save_plots_rds TRUE" falsevalue="--save_plots_rds FALSE" checked="false" label="Save plots as rds" help="Save the plots to RDS files"/> <param argument="--summary_plot_balanced" type="boolean" truevalue="--summary_plot_balanced TRUE" falsevalue="--summary_plot_balanced FALSE" checked="false" label="Summary plot balanced" help="If coef_plot_vars is selected this will select balanced top features"/> </section> </inputs> <outputs> <data name="data_norm" format="tabular" from_work_dir="output/features/data_norm.tsv" label="${tool.name} on ${on_string}: DATA NORMALIZED"/> <data name="data_trans" format="tabular" from_work_dir="output/features/data_transformed.tsv" label="${tool.name} on ${on_string}: DATA TRANSFORMED"/> <data name="data_filter" format="tabular" from_work_dir="output/features/filtered_data.tsv" label="${tool.name} on ${on_string}: FILTERED DATA"/> <collection name="rdata" type="list" label="${tool.name} on ${on_string}: R DATA"> <discover_datasets pattern="(?P<designation>.+)" directory="output/fits" format="rdata"/> </collection> <collection name="plots" type="list:list" label="${tool.name} on ${on_string}: PLOTS"> <discover_datasets pattern="(?P<designation>.+)\.png" directory="output/figures/association_plots" recurse="true" format="png"/> <filter>output['plot_associations'] is True</filter> </collection> <data name="summary_png" format="png" from_work_dir="output/figures/summary_plot.png" label="${tool.name} on ${on_string}: SUMMARY PLOT (PNG)"> <filter>output['summary_plot_balanced'] is False</filter> </data> <data name="summary_pdf" format="pdf" from_work_dir="output/figures/summary_plot.pdf" label="${tool.name} on ${on_string}: SUMMARY PLOT (PDF)"> <filter>output['summary_plot_balanced'] is False</filter> </data> <collection name="plots_r" type="list" label="${tool.name} on ${on_string}: PLOTS (R DATA)"> <discover_datasets pattern="(?P<designation>.+)\.RDS" directory="output/figures" recurse="true" format="rdata"/> <filter>output['plot_associations'] is True and output['save_plots_rds'] is True</filter> </collection> <data name="result_all" format="tabular" from_work_dir="output/all_results.tsv" label="${tool.name} on ${on_string}: ALL RESULT"/> <data name="result_sig" format="tabular" from_work_dir="output/significant_results.tsv" label="${tool.name} on ${on_string}: SIGNIFICANT RESULT"/> </outputs> <tests> <test expect_num_outputs="9"> <param name="input_data" value="HMP2_taxonomy.tsv" ftype="tabular"/> <param name="input_metadata" value="HMP2_metadata.tsv" ftype="tabular"/> <param name="formula" value="reads + diagnosis + dysbiosis_state + antibiotics + age"/> <repeat name="ref"> <param name="reference" value="diagnosis,CD"/> </repeat> <repeat name="ref"> <param name="reference" value="dysbiosis_state,dysbiosis_UC"/> </repeat> <output name="data_norm" ftype="tabular"> <assert_contents> <has_n_lines n="818" delta="10"/> </assert_contents> </output> <output name="data_trans" file="data_transformed.tsv" ftype="tabular"/> <output name="data_filter" ftype="tabular"> <assert_contents> <has_n_lines n="818" delta="10"/> </assert_contents> </output> <output_collection name="rdata" type="list"> <element name="fitted_linear.rds" ftype="rdata"> <assert_contents> <has_size value="327196" delta="5000"/> </assert_contents> </element> </output_collection> <output name="summary_png" ftype="png"> <assert_contents> <has_image_height height="3690" delta="20"/> <has_image_width width="4375" delta="20"/> </assert_contents> </output> <output name="summary_pdf" ftype="pdf"> <assert_contents> <has_size size="18000" delta="2000"/> </assert_contents> </output> <output name="result_all" ftype="tabular"> <assert_contents> <has_n_lines n="2036" delta="30"/> </assert_contents> </output> <output name="result_sig" ftype="tabular"> <assert_contents> <has_n_lines n="834" delta="15"/> </assert_contents> </output> </test> <test expect_num_outputs="7"> <param name="input_data" value="HMP2_taxonomy.tsv" ftype="tabular"/> <param name="input_metadata" value="HMP2_metadata.tsv" ftype="tabular"/> <param name="formula" value="reads + diagnosis + dysbiosis_state + antibiotics + age"/> <repeat name="ref"> <param name="reference" value="diagnosis,CD"/> </repeat> <repeat name="ref"> <param name="reference" value="dysbiosis_state,dysbiosis_UC"/> </repeat> <section name="output"> <param name="summary_plot_balanced" value="True"/> </section> <output name="data_norm" ftype="tabular"> <assert_contents> <has_n_lines n="818" delta="10"/> </assert_contents> </output> <output name="data_trans" file="data_transformed.tsv" ftype="tabular"/> <output name="data_filter" ftype="tabular"> <assert_contents> <has_n_lines n="818" delta="10"/> </assert_contents> </output> <output_collection name="rdata" type="list"> <element name="fitted_linear.rds" ftype="rdata"> <assert_contents> <has_size value="327196" delta="5000"/> </assert_contents> </element> </output_collection> <output name="result_all" ftype="tabular"> <assert_contents> <has_n_lines n="2036" delta="30"/> </assert_contents> </output> <output name="result_sig" ftype="tabular"> <assert_contents> <has_n_lines n="834" delta="15"/> </assert_contents> </output> </test> <test expect_num_outputs="10"> <param name="input_data" value="HMP2_taxonomy.tsv" ftype="tabular"/> <param name="input_metadata" value="HMP2_metadata.tsv" ftype="tabular"/> <param name="formula" value="reads + diagnosis + dysbiosis_state + antibiotics + age"/> <repeat name="ref"> <param name="reference" value="diagnosis,CD"/> </repeat> <repeat name="ref"> <param name="reference" value="dysbiosis_state,dysbiosis_UC"/> </repeat> <section name="output"> <param name="save_plots_rds" value="True"/> </section> <output name="data_norm" ftype="tabular"> <assert_contents> <has_n_lines n="818" delta="10"/> </assert_contents> </output> <output name="data_trans" file="data_transformed.tsv" ftype="tabular"/> <output name="data_filter" ftype="tabular"> <assert_contents> <has_n_lines n="818" delta="10"/> </assert_contents> </output> <output_collection name="rdata" type="list"> <element name="fitted_linear.rds" ftype="rdata"> <assert_contents> <has_size value="327196" delta="5000"/> </assert_contents> </element> </output_collection> <output name="summary_png" ftype="png"> <assert_contents> <has_image_height height="3690" delta="20"/> <has_image_width width="4375" delta="20"/> </assert_contents> </output> <output name="summary_pdf" ftype="pdf"> <assert_contents> <has_size size="18000" delta="2000"/> </assert_contents> </output> <output name="result_all" ftype="tabular"> <assert_contents> <has_n_lines n="2036" delta="30"/> </assert_contents> </output> <output name="result_sig" ftype="tabular"> <assert_contents> <has_n_lines n="834" delta="15"/> </assert_contents> </output> <output_collection name="plots_r" type="list" count="4"> <element name="summary_plot_gg" ftype="rdata"> <assert_contents> <has_size size="720000" delta="100000"/> </assert_contents> </element> </output_collection> </test> <test expect_num_outputs="8"> <param name="input_data" value="HMP2_taxonomy.tsv" ftype="tabular"/> <param name="input_metadata" value="HMP2_metadata.tsv" ftype="tabular"/> <param name="formula" value="reads + diagnosis + dysbiosis_state + antibiotics + age"/> <repeat name="ref"> <param name="reference" value="diagnosis,CD"/> </repeat> <repeat name="ref"> <param name="reference" value="dysbiosis_state,dysbiosis_UC"/> </repeat> <section name="output"> <param name="save_models" value="True"/> <param name="plot_associations" value="False"/> </section> <output name="data_norm" ftype="tabular"> <assert_contents> <has_n_lines n="818" delta="10"/> </assert_contents> </output> <output name="data_trans" file="data_transformed.tsv" ftype="tabular"/> <output name="data_filter" ftype="tabular"> <assert_contents> <has_n_lines n="818" delta="10"/> </assert_contents> </output> <output_collection name="rdata" type="list"> <element name="fitted_linear.rds" ftype="rdata"> <assert_contents> <has_size value="327196" delta="5000"/> </assert_contents> </element> </output_collection> <output name="summary_png" ftype="png"> <assert_contents> <has_image_height height="3690" delta="20"/> <has_image_width width="4375" delta="20"/> </assert_contents> </output> <output name="summary_pdf" ftype="pdf"> <assert_contents> <has_size size="18000" delta="2000"/> </assert_contents> </output> <output name="result_all" ftype="tabular"> <assert_contents> <has_n_lines n="2036" delta="30"/> </assert_contents> </output> <output name="result_sig" ftype="tabular"> <assert_contents> <has_n_lines n="834" delta="15"/> </assert_contents> </output> </test> </tests> <help> <![CDATA[ **IMPORTANT** random_effects, group_effects, ordered_effects, and strata_effects must be NULL when formula is not NULL!! If warn_prevalence (Warn prevalence) is set to True please selecet evaluate_only (Evaluate only) to NULL. If you want to use evaluate_only (Evaluate only) please do not set warn_prevalence (Warn prevalence) to True! Both argument can not be set at the same time! **Input** - A data table - A metadata table - unscaled abundance reference table (optional) Example:: SampleID total S1 2.3e8 S2 1.8e8 S3 3.1e8 S4 2.6e8 For a none specific case and here is a example for a specific case with *SpikeIn1*:: SampleID SpikeIn1 S1 1000 S2 2000 S3 1500 S4 1800 - A feature-specific covariates table (optional) Example:: Feature GC_content Batch_flag GeneA 0.42 1 GeneB 0.65 0 PathwayX NA 1 PathwayY 0.58 1 **Output** - multiple plots (optional) - Plots in R format (optional) - certain R format data files (some optional) - Mulitple data tables ]]> </help> <citations> <citation type="doi">10.1101/2024.12.13.628459</citation> </citations> </tool>
