comparison sculpt_sequences.xml @ 1:a0cd867780ec draft default tip

planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6b6ce806b5d016b3c7f20318180eff2dbe64395a-dirty
author tduigou
date Thu, 17 Jul 2025 13:24:49 +0000
parents 0c7f75a2338b
children
comparison
equal deleted inserted replaced
0:0c7f75a2338b 1:a0cd867780ec
16 <requirement type="package" version="0.3.9">pdf-reports</requirement> 16 <requirement type="package" version="0.3.9">pdf-reports</requirement>
17 <requirement type="package" version="0.1.8">sequenticon</requirement> 17 <requirement type="package" version="0.1.8">sequenticon</requirement>
18 <requirement type="package" version="3.1.5">dna_features_viewer</requirement> 18 <requirement type="package" version="3.1.5">dna_features_viewer</requirement>
19 </requirements> 19 </requirements>
20 <command detect_errors="exit_code"><![CDATA[ 20 <command detect_errors="exit_code"><![CDATA[
21 #set avoid_list = [line.strip() for line in str($avoid_patterns).strip().split('\n') if line.strip()] 21 #if str($json_use.use_json_param) == "false":
22 #set avoid_patterns = ','.join($avoid_list) 22 #set avoid_list = [line.strip() for line in str($avoid_patterns).strip().split('\n') if line.strip()]
23 23 #set avoid_patterns = ','.join($avoid_list)
24 #set hairpin_lines = [line.strip() for line in str($adv.hairpin_constraints).strip().split('\n') if line.strip()] 24
25 #set hairpin_constraints = '__cn__'.join($hairpin_lines) 25 #set hairpin_lines = [line.strip() for line in str($json_use.hairpin_constraints).strip().split('\n') if line.strip()]
26 26 #set hairpin_constraints = '__cn__'.join($hairpin_lines)
27 #set gc_lines = [line.strip() for line in str($adv.gc_constraints).strip().split('\n') if line.strip()] 27
28 #set gc_constraints = '__cn__'.join($gc_lines) 28 #set gc_lines = [line.strip() for line in str($json_use.gc_constraints).strip().split('\n') if line.strip()]
29 #set gc_constraints = '__cn__'.join($gc_lines)
30
31 #set kmer_size = $json_use.kmer_size
32
33 #else:
34 #set avoid_patterns = ''
35 #set hairpin_constraints = ''
36 #set gc_constraints = ''
37 #set kmer_size = ''
38 #end if
29 39
30 #set genbank_file_paths = ','.join([str(f) for f in $genbank_files]) 40 #set genbank_file_paths = ','.join([str(f) for f in $genbank_files])
31 #set $file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $genbank_files]) 41 #set $file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $genbank_files])
32 42
33 mkdir 'outdir_scul' && 43 mkdir 'outdir_scul' &&
34 mkdir 'outdir_unscul' && 44 mkdir 'outdir_unscul' &&
35 python '$__tool_directory__/sculpt_sequences.py' 45 python '$__tool_directory__/sculpt_sequences.py'
46 --use_json_param '$json_use.use_json_param'
36 --files_to_sculpt '$genbank_file_paths' 47 --files_to_sculpt '$genbank_file_paths'
37 --file_name_mapping '$file_name_mapping' 48 --file_name_mapping '$file_name_mapping'
38 --outdir_scul 'outdir_scul' 49 --outdir_scul 'outdir_scul'
39 --outdir_unscul 'outdir_unscul' 50 --outdir_unscul 'outdir_unscul'
40 --use_file_names_as_id '$adv.use_file_names_as_ids' 51 --use_file_names_as_id '$use_file_names_as_ids'
41 --avoid_patterns '$avoid_patterns' 52 --avoid_patterns '$avoid_patterns'
42 --gc_constraints '$adv.gc_constraints' 53 --gc_constraints '$gc_constraints'
43 --DnaOptimizationProblemClass '$DnaOptimizationProblemClass' 54 --kmer_size '$kmer_size'
44 --kmer_size '$adv.kmer_size' 55 --hairpin_constraints '$hairpin_constraints'
45 --hairpin_constraints '$adv.hairpin_constraints' 56 #if $json_use.use_json_param:
57 --json_params '$json_use.json_params'
58 #else:
59 --json_params ''
60 --DnaOptimizationProblemClass '$json_use.DnaOptimizationProblemClass'
61 #end if
46 ]]></command> 62 ]]></command>
47 <inputs> 63 <inputs>
48 <param name="genbank_files" type="data_collection" collection_type="list" format="genbank" label="GenBank File(s)"/> 64 <param name="genbank_files" type="data_collection" collection_type="list" format="genbank" label="GenBank File(s)"/>
49 <param name="DnaOptimizationProblemClass" type="select" label="DnaOptimizationProblem Calss" help="select the assambly class"> 65 <conditional name='json_use'>
50 <option value="DnaOptimizationProblem" selected="True">DnaOptimizationProblem</option> 66 <param name="use_json_param" type="boolean" checked="false" label="Use parameter from a JSON file" />
51 <option value="CircularDnaOptimizationProblem">CircularDnaOptimizationProblem</option> 67 <when value="false">
52 </param> 68 <param name="DnaOptimizationProblemClass" type="select" label="DnaOptimizationProblem Calss" help="select the assambly class">
53 <param name="avoid_patterns" type="text" area="true" label="Pattern to Avoid" helps="Each pattern on a line" /> 69 <option value="DnaOptimizationProblem" selected="True">DnaOptimizationProblem</option>
54 <section name="adv" title="Advanced Options" expanded="false"> 70 <option value="CircularDnaOptimizationProblem">CircularDnaOptimizationProblem</option>
55 <param name="hairpin_constraints" type="text" area="true" label="Hairpins Constraints" optional="true" help="e.g. (you can add others Hairpins Constraints on a new line): stem_size=20, hairpin_window=200"/> 71 </param>
56 <param name="gc_constraints" type="text" area="true" label="Enforce GC Content Constraints" optional="true" help="e.g. (you can add others Enforce GC Content Constraints on a new line): mini=0.3, maxi=0.7, window=100"/> 72 <param name="avoid_patterns" type="text" area="true" label="Avoid Pattern Constraints" help="Each pattern on a line" />
57 <param name="kmer_size" type="integer" label="K-mer Uniqueness Size" value="" optional="true" help="e.g.: 15"/> 73 <param name="hairpin_constraints" type="text" area="true" label="Hairpins Constraints" optional="true" help="e.g. (you can add others Hairpins Constraints on a new line): stem_size=20, hairpin_window=200"/>
58 <param name="use_file_names_as_ids" type="boolean" checked="True" label="Use File Names As Sequence IDs" /> 74 <param name="gc_constraints" type="text" area="true" label="Enforce GC Content Constraints" optional="true" help="e.g. (you can add others Enforce GC Content Constraints on a new line): mini=0.3, maxi=0.7, window=100"/>
59 </section> 75 <param name="kmer_size" type="integer" label="K-mer Uniqueness Size" value="" optional="true" help="e.g.: 15"/>
76 </when>
77 <when value="true">
78 <param name="json_params" type="data" format="json" optional="true" label="JSON parameters file" help="Contains tool's parameters" />
79 </when>
80 </conditional>
81 <param name="use_file_names_as_ids" type="boolean" checked="True" label="Use File Names As Sequence IDs" />
60 </inputs> 82 </inputs>
61 <outputs> 83 <outputs>
62 <collection name="scul" type="list" label="scul group" > 84 <collection name="scul" type="list" label="scul group" >
63 <discover_datasets pattern="(?P&lt;name&gt;.*).zip" format="zip" directory="outdir_scul" /> 85 <discover_datasets pattern="(?P&lt;name&gt;.*).zip" format="zip" directory="outdir_scul" />
64 </collection> 86 </collection>
66 <discover_datasets pattern="(?P&lt;name&gt;.*).gb" format="genbank" directory="outdir_unscul" /> 88 <discover_datasets pattern="(?P&lt;name&gt;.*).gb" format="genbank" directory="outdir_unscul" />
67 </collection> 89 </collection>
68 </outputs> 90 </outputs>
69 <tests> 91 <tests>
70 <test> 92 <test>
71 <!-- test for DnaOptimizationProblem --> 93 <!-- test for DnaOptimizationProblem -->
72 <param name="genbank_files"> 94 <param name="genbank_files">
73 <collection type="list"> 95 <collection type="list">
74 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> 96 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" />
75 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> 97 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" />
76 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" /> 98 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" />
81 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> 103 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" />
82 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> 104 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" />
83 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> 105 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" />
84 </collection> 106 </collection>
85 </param> 107 </param>
86 <param name="DnaOptimizationProblemClass" value="DnaOptimizationProblem" /> 108 <conditional name="json_use">
87 <param name="adv|use_file_names_as_ids" value="True" /> 109 <param name="use_json_param" value="false" />
88 <!-- AvoidPatterns --> 110 <param name="DnaOptimizationProblemClass" value="DnaOptimizationProblem" />
89 <param name="avoid_patterns" value="BsaI_site 111 <!-- AvoidPatterns -->
90 NotI_site 112 <param name="avoid_patterns" value="BsaI_site
91 XbaI_site 113 NotI_site
92 ClaI_site 114 XbaI_site
93 8x1mer" /> 115 ClaI_site
94 <!-- EnforceGCContent --> 116 8x1mer" />
95 <param name="adv|gc_constraints" value="mini=0.1, maxi=0.9, window=50"/> 117 <!-- EnforceGCContent -->
118 <param name="gc_constraints" value="mini=0.1, maxi=0.9, window=50"/>
119 </conditional>
120 <param name="use_file_names_as_ids" value="True" />
96 <output_collection name="scul" count="10"> 121 <output_collection name="scul" count="10">
97 </output_collection> 122 </output_collection>
98 <output_collection name="unscul" count="10"> 123 <output_collection name="unscul" count="10">
99 </output_collection> 124 </output_collection>
100 </test> 125 </test>
101 <test> 126 <test>
102 <!-- test for CircularDnaOptimizationProblem --> 127 <!-- test for CircularDnaOptimizationProblem -->
103 <param name="genbank_files"> 128 <param name="genbank_files">
104 <collection type="list"> 129 <collection type="list">
105 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> 130 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" />
106 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> 131 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" />
107 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" /> 132 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" />
112 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> 137 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" />
113 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> 138 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" />
114 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> 139 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" />
115 </collection> 140 </collection>
116 </param> 141 </param>
117 <param name="DnaOptimizationProblemClass" value="CircularDnaOptimizationProblem" /> 142 <conditional name="json_use">
118 <param name="adv|use_file_names_as_ids" value="True" /> 143 <param name="use_json_param" value="false" />
119 <!-- AvoidPatterns --> 144 <param name="DnaOptimizationProblemClass" value="CircularDnaOptimizationProblem" />
120 <param name="avoid_patterns" value="BsaI_site 145 <!-- AvoidPatterns -->
121 NotI_site 146 <param name="avoid_patterns" value="BsaI_site
122 XbaI_site 147 NotI_site
123 ClaI_site 148 XbaI_site
124 8x1mer" /> 149 ClaI_site
125 <!-- EnforceGCContent --> 150 8x1mer" />
126 <param name="adv|gc_constraints" value="mini=0.1, maxi=0.9 151 <!-- EnforceGCContent -->
127 mini-0.3, maxi=0.7, window=50"/> 152 <param name="gc_constraints" value="mini=0.1, maxi=0.9
128 <param name="adv|hairpin_constraints" value="stem_size=20, hairpin_window=200 153 mini=0.3, maxi=0.7, window=50"/>
129 stem_size=10, hairpin_window=100"/> 154 <param name="hairpin_constraints" value="stem_size=20, hairpin_window=200
155 stem_size=10, hairpin_window=100"/>
156 </conditional>
157 <param name="use_file_names_as_ids" value="True" />
158 <output_collection name="scul" count="10">
159 </output_collection>
160 <output_collection name="unscul" count="10">
161 </output_collection>
162 </test>
163 <test>
164 <!-- test json params -->
165 <param name="genbank_files">
166 <collection type="list">
167 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" />
168 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" />
169 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" />
170 <element name="p15_p9_BSDRPuroR" value="10_emma_genbanks/p9_BSDR.gb" />
171 <element name="p8_Linker1" value="10_emma_genbanks/p8_Linker1.gb" />
172 <element name="p7_L7Ae-Weiss" value="10_emma_genbanks/p7_L7Ae-Weiss.gb" />
173 <element name="p6_Nt-IgKLsequence" value="10_emma_genbanks/p6_Nt-IgKLsequence.gb" />
174 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" />
175 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" />
176 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" />
177 </collection>
178 </param>
179 <conditional name="json_use">
180 <param name="use_json_param" value="true" />
181 <param name="json_params" value="test_json_workflow2.json" />
182 </conditional>
183 <param name="use_file_names_as_ids" value="True" />
130 <output_collection name="scul" count="10"> 184 <output_collection name="scul" count="10">
131 </output_collection> 185 </output_collection>
132 <output_collection name="unscul" count="10"> 186 <output_collection name="unscul" count="10">
133 </output_collection> 187 </output_collection>
134 </test> 188 </test>
140 Sculpt Sequences is a Python library from the EGF Biofoundry for problem detection and sequence optimization using `dnachisel <https://github.com/Edinburgh-Genome-Foundry/DnaChisel/tree/master/dnachisel>`_ (Complete documentation available `here <https://edinburgh-genome-foundry.github.io/DnaChisel/>`_) 194 Sculpt Sequences is a Python library from the EGF Biofoundry for problem detection and sequence optimization using `dnachisel <https://github.com/Edinburgh-Genome-Foundry/DnaChisel/tree/master/dnachisel>`_ (Complete documentation available `here <https://edinburgh-genome-foundry.github.io/DnaChisel/>`_)
141 195
142 **Parameters**: 196 **Parameters**:
143 --------------- 197 ---------------
144 * **GenBank File(s)**: List of GenBank files to be processed. 198 * **GenBank File(s)**: List of GenBank files to be processed.
199 * **Use parameter from a JSON file**:
200 Yes/No parameter to indicate if user want to set parameter manually or using a json file
201 If Yes, user should provide a JSON file contains all parameters
145 * **DnaOptimizationProblem Class**: 202 * **DnaOptimizationProblem Class**:
146 - "DnaOptimizationProblem": is the class to define and solve an optimization problems. Its methods implement all the solver logics. 203 - "DnaOptimizationProblem": is the class to define and solve an optimization problems. Its methods implement all the solver logics.
147 - "CircularDnaOptimizationProblem": is a variant of DnaOptimizationProblem whose optimization algorithm assumes that the sequence is circular. 204 - "CircularDnaOptimizationProblem": is a variant of DnaOptimizationProblem whose optimization algorithm assumes that the sequence is circular.
148 * **Avoid Pattern Constraints**: is a sequence design rules that can be used as constraints. It define pattern(s) to avoid during problem optimisation. 205 * **Avoid Pattern Constraints**: is a sequence design rules that can be used as constraints. It define pattern(s) to avoid during problem optimisation.
149 This can include enzyme sites like "BsaI_site", "NotI_site", "XbaI_site"... `enzyme dict <https://github.com/biopython/biopython/blob/master/Bio/Restriction/Restriction_Dictionary.py>`_ . Custom patterns are also supported, such as "5x3mer" means "any 5 consecutive 3-nucleotide sequences — typically 5 unique 3-mers in a row. 206 This can include enzyme sites like "BsaI_site", "NotI_site", "XbaI_site"... `enzyme dict <https://github.com/biopython/biopython/blob/master/Bio/Restriction/Restriction_Dictionary.py>`_ . Custom patterns are also supported, such as "5x3mer" means "any 5 consecutive 3-nucleotide sequences — typically 5 unique 3-mers in a row.