Mercurial > repos > tduigou > sculpt_sequences
comparison sculpt_sequences.xml @ 1:a0cd867780ec draft default tip
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6b6ce806b5d016b3c7f20318180eff2dbe64395a-dirty
| author | tduigou |
|---|---|
| date | Thu, 17 Jul 2025 13:24:49 +0000 |
| parents | 0c7f75a2338b |
| children |
comparison
equal
deleted
inserted
replaced
| 0:0c7f75a2338b | 1:a0cd867780ec |
|---|---|
| 16 <requirement type="package" version="0.3.9">pdf-reports</requirement> | 16 <requirement type="package" version="0.3.9">pdf-reports</requirement> |
| 17 <requirement type="package" version="0.1.8">sequenticon</requirement> | 17 <requirement type="package" version="0.1.8">sequenticon</requirement> |
| 18 <requirement type="package" version="3.1.5">dna_features_viewer</requirement> | 18 <requirement type="package" version="3.1.5">dna_features_viewer</requirement> |
| 19 </requirements> | 19 </requirements> |
| 20 <command detect_errors="exit_code"><![CDATA[ | 20 <command detect_errors="exit_code"><![CDATA[ |
| 21 #set avoid_list = [line.strip() for line in str($avoid_patterns).strip().split('\n') if line.strip()] | 21 #if str($json_use.use_json_param) == "false": |
| 22 #set avoid_patterns = ','.join($avoid_list) | 22 #set avoid_list = [line.strip() for line in str($avoid_patterns).strip().split('\n') if line.strip()] |
| 23 | 23 #set avoid_patterns = ','.join($avoid_list) |
| 24 #set hairpin_lines = [line.strip() for line in str($adv.hairpin_constraints).strip().split('\n') if line.strip()] | 24 |
| 25 #set hairpin_constraints = '__cn__'.join($hairpin_lines) | 25 #set hairpin_lines = [line.strip() for line in str($json_use.hairpin_constraints).strip().split('\n') if line.strip()] |
| 26 | 26 #set hairpin_constraints = '__cn__'.join($hairpin_lines) |
| 27 #set gc_lines = [line.strip() for line in str($adv.gc_constraints).strip().split('\n') if line.strip()] | 27 |
| 28 #set gc_constraints = '__cn__'.join($gc_lines) | 28 #set gc_lines = [line.strip() for line in str($json_use.gc_constraints).strip().split('\n') if line.strip()] |
| 29 #set gc_constraints = '__cn__'.join($gc_lines) | |
| 30 | |
| 31 #set kmer_size = $json_use.kmer_size | |
| 32 | |
| 33 #else: | |
| 34 #set avoid_patterns = '' | |
| 35 #set hairpin_constraints = '' | |
| 36 #set gc_constraints = '' | |
| 37 #set kmer_size = '' | |
| 38 #end if | |
| 29 | 39 |
| 30 #set genbank_file_paths = ','.join([str(f) for f in $genbank_files]) | 40 #set genbank_file_paths = ','.join([str(f) for f in $genbank_files]) |
| 31 #set $file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $genbank_files]) | 41 #set $file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $genbank_files]) |
| 32 | 42 |
| 33 mkdir 'outdir_scul' && | 43 mkdir 'outdir_scul' && |
| 34 mkdir 'outdir_unscul' && | 44 mkdir 'outdir_unscul' && |
| 35 python '$__tool_directory__/sculpt_sequences.py' | 45 python '$__tool_directory__/sculpt_sequences.py' |
| 46 --use_json_param '$json_use.use_json_param' | |
| 36 --files_to_sculpt '$genbank_file_paths' | 47 --files_to_sculpt '$genbank_file_paths' |
| 37 --file_name_mapping '$file_name_mapping' | 48 --file_name_mapping '$file_name_mapping' |
| 38 --outdir_scul 'outdir_scul' | 49 --outdir_scul 'outdir_scul' |
| 39 --outdir_unscul 'outdir_unscul' | 50 --outdir_unscul 'outdir_unscul' |
| 40 --use_file_names_as_id '$adv.use_file_names_as_ids' | 51 --use_file_names_as_id '$use_file_names_as_ids' |
| 41 --avoid_patterns '$avoid_patterns' | 52 --avoid_patterns '$avoid_patterns' |
| 42 --gc_constraints '$adv.gc_constraints' | 53 --gc_constraints '$gc_constraints' |
| 43 --DnaOptimizationProblemClass '$DnaOptimizationProblemClass' | 54 --kmer_size '$kmer_size' |
| 44 --kmer_size '$adv.kmer_size' | 55 --hairpin_constraints '$hairpin_constraints' |
| 45 --hairpin_constraints '$adv.hairpin_constraints' | 56 #if $json_use.use_json_param: |
| 57 --json_params '$json_use.json_params' | |
| 58 #else: | |
| 59 --json_params '' | |
| 60 --DnaOptimizationProblemClass '$json_use.DnaOptimizationProblemClass' | |
| 61 #end if | |
| 46 ]]></command> | 62 ]]></command> |
| 47 <inputs> | 63 <inputs> |
| 48 <param name="genbank_files" type="data_collection" collection_type="list" format="genbank" label="GenBank File(s)"/> | 64 <param name="genbank_files" type="data_collection" collection_type="list" format="genbank" label="GenBank File(s)"/> |
| 49 <param name="DnaOptimizationProblemClass" type="select" label="DnaOptimizationProblem Calss" help="select the assambly class"> | 65 <conditional name='json_use'> |
| 50 <option value="DnaOptimizationProblem" selected="True">DnaOptimizationProblem</option> | 66 <param name="use_json_param" type="boolean" checked="false" label="Use parameter from a JSON file" /> |
| 51 <option value="CircularDnaOptimizationProblem">CircularDnaOptimizationProblem</option> | 67 <when value="false"> |
| 52 </param> | 68 <param name="DnaOptimizationProblemClass" type="select" label="DnaOptimizationProblem Calss" help="select the assambly class"> |
| 53 <param name="avoid_patterns" type="text" area="true" label="Pattern to Avoid" helps="Each pattern on a line" /> | 69 <option value="DnaOptimizationProblem" selected="True">DnaOptimizationProblem</option> |
| 54 <section name="adv" title="Advanced Options" expanded="false"> | 70 <option value="CircularDnaOptimizationProblem">CircularDnaOptimizationProblem</option> |
| 55 <param name="hairpin_constraints" type="text" area="true" label="Hairpins Constraints" optional="true" help="e.g. (you can add others Hairpins Constraints on a new line): stem_size=20, hairpin_window=200"/> | 71 </param> |
| 56 <param name="gc_constraints" type="text" area="true" label="Enforce GC Content Constraints" optional="true" help="e.g. (you can add others Enforce GC Content Constraints on a new line): mini=0.3, maxi=0.7, window=100"/> | 72 <param name="avoid_patterns" type="text" area="true" label="Avoid Pattern Constraints" help="Each pattern on a line" /> |
| 57 <param name="kmer_size" type="integer" label="K-mer Uniqueness Size" value="" optional="true" help="e.g.: 15"/> | 73 <param name="hairpin_constraints" type="text" area="true" label="Hairpins Constraints" optional="true" help="e.g. (you can add others Hairpins Constraints on a new line): stem_size=20, hairpin_window=200"/> |
| 58 <param name="use_file_names_as_ids" type="boolean" checked="True" label="Use File Names As Sequence IDs" /> | 74 <param name="gc_constraints" type="text" area="true" label="Enforce GC Content Constraints" optional="true" help="e.g. (you can add others Enforce GC Content Constraints on a new line): mini=0.3, maxi=0.7, window=100"/> |
| 59 </section> | 75 <param name="kmer_size" type="integer" label="K-mer Uniqueness Size" value="" optional="true" help="e.g.: 15"/> |
| 76 </when> | |
| 77 <when value="true"> | |
| 78 <param name="json_params" type="data" format="json" optional="true" label="JSON parameters file" help="Contains tool's parameters" /> | |
| 79 </when> | |
| 80 </conditional> | |
| 81 <param name="use_file_names_as_ids" type="boolean" checked="True" label="Use File Names As Sequence IDs" /> | |
| 60 </inputs> | 82 </inputs> |
| 61 <outputs> | 83 <outputs> |
| 62 <collection name="scul" type="list" label="scul group" > | 84 <collection name="scul" type="list" label="scul group" > |
| 63 <discover_datasets pattern="(?P<name>.*).zip" format="zip" directory="outdir_scul" /> | 85 <discover_datasets pattern="(?P<name>.*).zip" format="zip" directory="outdir_scul" /> |
| 64 </collection> | 86 </collection> |
| 66 <discover_datasets pattern="(?P<name>.*).gb" format="genbank" directory="outdir_unscul" /> | 88 <discover_datasets pattern="(?P<name>.*).gb" format="genbank" directory="outdir_unscul" /> |
| 67 </collection> | 89 </collection> |
| 68 </outputs> | 90 </outputs> |
| 69 <tests> | 91 <tests> |
| 70 <test> | 92 <test> |
| 71 <!-- test for DnaOptimizationProblem --> | 93 <!-- test for DnaOptimizationProblem --> |
| 72 <param name="genbank_files"> | 94 <param name="genbank_files"> |
| 73 <collection type="list"> | 95 <collection type="list"> |
| 74 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> | 96 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> |
| 75 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> | 97 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> |
| 76 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" /> | 98 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" /> |
| 81 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> | 103 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> |
| 82 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> | 104 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> |
| 83 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> | 105 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> |
| 84 </collection> | 106 </collection> |
| 85 </param> | 107 </param> |
| 86 <param name="DnaOptimizationProblemClass" value="DnaOptimizationProblem" /> | 108 <conditional name="json_use"> |
| 87 <param name="adv|use_file_names_as_ids" value="True" /> | 109 <param name="use_json_param" value="false" /> |
| 88 <!-- AvoidPatterns --> | 110 <param name="DnaOptimizationProblemClass" value="DnaOptimizationProblem" /> |
| 89 <param name="avoid_patterns" value="BsaI_site | 111 <!-- AvoidPatterns --> |
| 90 NotI_site | 112 <param name="avoid_patterns" value="BsaI_site |
| 91 XbaI_site | 113 NotI_site |
| 92 ClaI_site | 114 XbaI_site |
| 93 8x1mer" /> | 115 ClaI_site |
| 94 <!-- EnforceGCContent --> | 116 8x1mer" /> |
| 95 <param name="adv|gc_constraints" value="mini=0.1, maxi=0.9, window=50"/> | 117 <!-- EnforceGCContent --> |
| 118 <param name="gc_constraints" value="mini=0.1, maxi=0.9, window=50"/> | |
| 119 </conditional> | |
| 120 <param name="use_file_names_as_ids" value="True" /> | |
| 96 <output_collection name="scul" count="10"> | 121 <output_collection name="scul" count="10"> |
| 97 </output_collection> | 122 </output_collection> |
| 98 <output_collection name="unscul" count="10"> | 123 <output_collection name="unscul" count="10"> |
| 99 </output_collection> | 124 </output_collection> |
| 100 </test> | 125 </test> |
| 101 <test> | 126 <test> |
| 102 <!-- test for CircularDnaOptimizationProblem --> | 127 <!-- test for CircularDnaOptimizationProblem --> |
| 103 <param name="genbank_files"> | 128 <param name="genbank_files"> |
| 104 <collection type="list"> | 129 <collection type="list"> |
| 105 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> | 130 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> |
| 106 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> | 131 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> |
| 107 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" /> | 132 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" /> |
| 112 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> | 137 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> |
| 113 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> | 138 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> |
| 114 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> | 139 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> |
| 115 </collection> | 140 </collection> |
| 116 </param> | 141 </param> |
| 117 <param name="DnaOptimizationProblemClass" value="CircularDnaOptimizationProblem" /> | 142 <conditional name="json_use"> |
| 118 <param name="adv|use_file_names_as_ids" value="True" /> | 143 <param name="use_json_param" value="false" /> |
| 119 <!-- AvoidPatterns --> | 144 <param name="DnaOptimizationProblemClass" value="CircularDnaOptimizationProblem" /> |
| 120 <param name="avoid_patterns" value="BsaI_site | 145 <!-- AvoidPatterns --> |
| 121 NotI_site | 146 <param name="avoid_patterns" value="BsaI_site |
| 122 XbaI_site | 147 NotI_site |
| 123 ClaI_site | 148 XbaI_site |
| 124 8x1mer" /> | 149 ClaI_site |
| 125 <!-- EnforceGCContent --> | 150 8x1mer" /> |
| 126 <param name="adv|gc_constraints" value="mini=0.1, maxi=0.9 | 151 <!-- EnforceGCContent --> |
| 127 mini-0.3, maxi=0.7, window=50"/> | 152 <param name="gc_constraints" value="mini=0.1, maxi=0.9 |
| 128 <param name="adv|hairpin_constraints" value="stem_size=20, hairpin_window=200 | 153 mini=0.3, maxi=0.7, window=50"/> |
| 129 stem_size=10, hairpin_window=100"/> | 154 <param name="hairpin_constraints" value="stem_size=20, hairpin_window=200 |
| 155 stem_size=10, hairpin_window=100"/> | |
| 156 </conditional> | |
| 157 <param name="use_file_names_as_ids" value="True" /> | |
| 158 <output_collection name="scul" count="10"> | |
| 159 </output_collection> | |
| 160 <output_collection name="unscul" count="10"> | |
| 161 </output_collection> | |
| 162 </test> | |
| 163 <test> | |
| 164 <!-- test json params --> | |
| 165 <param name="genbank_files"> | |
| 166 <collection type="list"> | |
| 167 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> | |
| 168 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> | |
| 169 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" /> | |
| 170 <element name="p15_p9_BSDRPuroR" value="10_emma_genbanks/p9_BSDR.gb" /> | |
| 171 <element name="p8_Linker1" value="10_emma_genbanks/p8_Linker1.gb" /> | |
| 172 <element name="p7_L7Ae-Weiss" value="10_emma_genbanks/p7_L7Ae-Weiss.gb" /> | |
| 173 <element name="p6_Nt-IgKLsequence" value="10_emma_genbanks/p6_Nt-IgKLsequence.gb" /> | |
| 174 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> | |
| 175 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> | |
| 176 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> | |
| 177 </collection> | |
| 178 </param> | |
| 179 <conditional name="json_use"> | |
| 180 <param name="use_json_param" value="true" /> | |
| 181 <param name="json_params" value="test_json_workflow2.json" /> | |
| 182 </conditional> | |
| 183 <param name="use_file_names_as_ids" value="True" /> | |
| 130 <output_collection name="scul" count="10"> | 184 <output_collection name="scul" count="10"> |
| 131 </output_collection> | 185 </output_collection> |
| 132 <output_collection name="unscul" count="10"> | 186 <output_collection name="unscul" count="10"> |
| 133 </output_collection> | 187 </output_collection> |
| 134 </test> | 188 </test> |
| 140 Sculpt Sequences is a Python library from the EGF Biofoundry for problem detection and sequence optimization using `dnachisel <https://github.com/Edinburgh-Genome-Foundry/DnaChisel/tree/master/dnachisel>`_ (Complete documentation available `here <https://edinburgh-genome-foundry.github.io/DnaChisel/>`_) | 194 Sculpt Sequences is a Python library from the EGF Biofoundry for problem detection and sequence optimization using `dnachisel <https://github.com/Edinburgh-Genome-Foundry/DnaChisel/tree/master/dnachisel>`_ (Complete documentation available `here <https://edinburgh-genome-foundry.github.io/DnaChisel/>`_) |
| 141 | 195 |
| 142 **Parameters**: | 196 **Parameters**: |
| 143 --------------- | 197 --------------- |
| 144 * **GenBank File(s)**: List of GenBank files to be processed. | 198 * **GenBank File(s)**: List of GenBank files to be processed. |
| 199 * **Use parameter from a JSON file**: | |
| 200 Yes/No parameter to indicate if user want to set parameter manually or using a json file | |
| 201 If Yes, user should provide a JSON file contains all parameters | |
| 145 * **DnaOptimizationProblem Class**: | 202 * **DnaOptimizationProblem Class**: |
| 146 - "DnaOptimizationProblem": is the class to define and solve an optimization problems. Its methods implement all the solver logics. | 203 - "DnaOptimizationProblem": is the class to define and solve an optimization problems. Its methods implement all the solver logics. |
| 147 - "CircularDnaOptimizationProblem": is a variant of DnaOptimizationProblem whose optimization algorithm assumes that the sequence is circular. | 204 - "CircularDnaOptimizationProblem": is a variant of DnaOptimizationProblem whose optimization algorithm assumes that the sequence is circular. |
| 148 * **Avoid Pattern Constraints**: is a sequence design rules that can be used as constraints. It define pattern(s) to avoid during problem optimisation. | 205 * **Avoid Pattern Constraints**: is a sequence design rules that can be used as constraints. It define pattern(s) to avoid during problem optimisation. |
| 149 This can include enzyme sites like "BsaI_site", "NotI_site", "XbaI_site"... `enzyme dict <https://github.com/biopython/biopython/blob/master/Bio/Restriction/Restriction_Dictionary.py>`_ . Custom patterns are also supported, such as "5x3mer" means "any 5 consecutive 3-nucleotide sequences — typically 5 unique 3-mers in a row. | 206 This can include enzyme sites like "BsaI_site", "NotI_site", "XbaI_site"... `enzyme dict <https://github.com/biopython/biopython/blob/master/Bio/Restriction/Restriction_Dictionary.py>`_ . Custom patterns are also supported, such as "5x3mer" means "any 5 consecutive 3-nucleotide sequences — typically 5 unique 3-mers in a row. |
