comparison ppanggolin_rarefaction.xml @ 0:d848a49b3303 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ppanggolin commit 6f13ba99c86ba17b6a83baedf328e04190cec247
author iuc
date Tue, 16 Sep 2025 13:10:58 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d848a49b3303
1 <tool id="ppanggolin_rarefaction" name="PPanGGOLiN rarefaction" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0">
2 <description>computes the rarefaction curve of the pangenome</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="xrefs"/>
7 <expand macro="requirements"/>
8
9 <command detect_errors="exit_code"><![CDATA[
10 mkdir -p ./tmp_ppanggolin/rarefaction &&
11 mkdir -p ./tmp_ppanggolin/tmpdir_rarefaction &&
12
13 ppanggolin rarefaction
14 --pangenome '$pangenome_h5'
15 --output ./tmp_ppanggolin/rarefaction
16 --tmpdir ./tmp_ppanggolin/tmpdir_rarefaction
17 --force
18 --cpu "\${GALAXY_SLOTS:-4}"
19 --disable_prog_bar
20
21 --depth $depth
22 --min $min
23 --max $max
24
25 #if str($nb_of_partitions) != "":
26 --nb_of_partitions $nb_of_partitions
27 #end if
28
29 #if "output_rarefaction_csv" in $advanced_pangenome_optional_files:
30 && cat ./tmp_ppanggolin/rarefaction/rarefaction.csv > '${rarefaction_csv}'
31 #end if
32 #if "output_rarefaction_parameters_csv" in $advanced_pangenome_optional_files:
33 && cat ./tmp_ppanggolin/rarefaction/rarefaction_parameters.csv > '${rarefaction_parameters_csv}'
34 #end if
35 && cat ./tmp_ppanggolin/rarefaction/rarefaction_curve.html > '${rarefaction_curve_html}'
36
37 ]]></command>
38
39 <inputs>
40
41 <expand macro="inputs_pangenome"/>
42
43 <param argument="--depth" type="integer" value="30" min="1" max="100" label="The number of sampling for each genome" help="Default=30 ; min=1 ; max=100. Warning: if this value is greater than 30, the computation will be VERY intensive and it will take a long time.">
44 </param>
45
46 <param argument="--min" type="integer" value="1" min="1" max="499" label="The minimal number of genomes in a sample" help="Default=1 ; min=1 ; max=499. The min value must be lower than the max value.">
47 </param>
48
49 <param argument="--max" type="integer" value="100" min="1" max="500" label="The maximal number of genomes in a sample" help="Default=1 ; min=1 ; max=500. The min value must be lower than the max value. Warning: if if this value is greater than 100, the computation will be VERY intensive and it will take a long time.">
50 </param>
51
52 <expand macro="inputs_nb_of_partitions"/>
53
54 <param name="advanced_pangenome_optional_files" type="select" label="Add the following output files in the Galaxy history" multiple="true" optional="true" display="checkboxes" >
55 <option value="output_rarefaction_csv" selected="true">Rarefaction data in tabular format (csv)</option>
56 <option value="output_rarefaction_parameters_csv" selected="true">Rarefaction parameters in tabular format (csv)</option>
57 </param>
58
59 </inputs>
60
61 <outputs>
62
63 <data name="rarefaction_csv" format="csv" label="PPanGGOLiN rarefaction on ${on_string}: Rarefaction data (csv)" >
64 <filter>advanced_pangenome_optional_files and "output_rarefaction_csv" in advanced_pangenome_optional_files</filter>
65 </data>
66 <data name="rarefaction_parameters_csv" format="csv" label="PPanGGOLiN rarefaction on ${on_string}: Rarefaction parameters (csv)" >
67 <filter>advanced_pangenome_optional_files and "output_rarefaction_parameters_csv" in advanced_pangenome_optional_files</filter>
68 </data>
69
70 <data name="rarefaction_curve_html" format="html" label="PPanGGOLiN rarefaction on ${on_string}: Rarefaction curve" />
71
72 </outputs>
73
74 <tests>
75 <test expect_num_outputs="3">
76 <param name="pangenome_h5" value="h5/test_data.h5" ftype="h5"/>
77 <param name="depth" value="30"/>
78 <param name="min" value="1"/>
79 <param name="max" value="100"/>
80 <output name="rarefaction_csv" >
81 <assert_contents>
82 <has_size value="4045" delta="100"/>
83 </assert_contents>
84 </output>
85 <output name="rarefaction_parameters_csv" >
86 <assert_contents>
87 <has_size value="324" delta="50"/>
88 </assert_contents>
89 </output>
90 <output name="rarefaction_curve_html" >
91 <assert_contents>
92 <has_size value="4575692" delta="100"/>
93 </assert_contents>
94 </output>
95 </test>
96 </tests>
97
98 <help><![CDATA[
99
100 PPanGGOLiN_ (Gautreau et al. 2020) is a software suite used to create and manipulate prokaryotic pangenomes from a set of either assembled
101 genomic DNA sequences or provided genome annotations. PPanGGOLiN builds pangenomes through a graphical model and a statistical method to partition gene
102 families in persistent, shell and cloud genomes. It integrates both information on protein-coding genes and their genomic neighborhood to build a graph
103 of gene families where each node is a gene family, and each edge is a relation of genetic contiguity.
104
105 The `ppanggolin rarefaction` command generates a rarefaction curve. It represents the evolution of the number of gene families for each partition as you add more genomes to the pangenome. It has been used a lot in the literature as an indicator of the diversity that you are missing with your dataset on your taxonomic group (Tettelin et al., 2005). The idea is that if at some point when you keep adding genomes to your pangenome you do not add any more gene families, you might have access to your entire taxonomic group’s diversity. On the contrary, if you are still adding a lot of genes you may be still missing a lot of gene families.
106
107 There are 8 partitions represented. For each of the partitions, there are multiple representations of the observed data. You can find the observed means, medians, 1st and 3rd quartiles of the number of gene families per number of genome used. You can also find the best fitting of the data by the Heaps’ law, which is usually used to represent this evolution of the diversity in terms of gene families in each of the partitions.
108
109 .. _PPanGGOLiN: https://github.com/labgem/PPanGGOLiN
110 .. _documentation: https://ppanggolin.readthedocs.io/en/latest/user/PangenomeAnalyses/pangenomeAnalyses.html#rarefaction-curve
111
112 ]]></help>
113
114 <expand macro="citation"/>
115
116 </tool>
117