diff domestication.xml @ 4:f78651af72e4 draft

planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
author tduigou
date Mon, 19 May 2025 13:01:50 +0000
parents f730b81671fb
children cc02a5978551
line wrap: on
line diff
--- a/domestication.xml	Fri May 16 09:30:35 2025 +0000
+++ b/domestication.xml	Mon May 19 13:01:50 2025 +0000
@@ -18,7 +18,7 @@
     <command detect_errors="exit_code"><![CDATA[
         #set genbank_file_paths = ','.join([str(f) for f in $genbank_files])
         #set $file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $genbank_files])
-        mkdir 'outdir_gb' && mkdir 'outdir_zip' &&
+        mkdir 'metoutdir_gb' && mkdir 'outdir_zip' && mkdir 'outdir_gb'&&
         python '$__tool_directory__/domestication.py'
             --files_to_domestication '$genbank_file_paths'
             --csv_file '$domestication_csv'
@@ -26,14 +26,15 @@
             --use_file_names_as_id '$adv.use_file_names_as_ids'
             --allow_edits '$adv.allow_edits'
             --output_dom 'output_zip.zip'
-            --output_methprot 'outdir_gb'
+            --output_gb_dom 'outdir_gb'
+            --output_methprot 'metoutdir_gb'
             --methylation_protection '$methylation_protection' &&
         cp 'output_zip.zip' '$output_zip'
     ]]></command>
     <inputs>
         <param name="genbank_files" type="data_collection" collection_type="list" format="genbank" label="GenBank File(s)"/>
         <param name="domestication_csv" type="data" format="csv" label="deomestication csv"/>
-        <param name="methylation_protection" type="boolean" checked="False" label="Enabel Methylation Protection"/>
+        <param name="methylation_protection" type="boolean" checked="True" label="Enabel Methylation Protection"/>
         <section name="adv" title="Advanced Options" expanded="false">
             <param name="use_file_names_as_ids" type="boolean" checked="True" label="Use file names as sequence IDs?" />
             <param name="allow_edits" type="boolean" checked="True" label="Allow sequence edits" help="If False, sequences cannot be edited by the domesticator, only extended
@@ -44,14 +45,18 @@
     </inputs>   
     <outputs>
         <data format="zip" name="output_zip" label="domestication results"/>
+        <collection name="domesticated_gb" type="list" label="Domesticated GenBank Files">
+            <filter>methylation_protection == False</filter>
+            <discover_datasets pattern="(?P&lt;name&gt;.*).gb" format="genbank" directory="outdir_gb" />
+        </collection>
         <collection name="methprot_gb" type="list" label="GenBank Files methylation protected">
             <filter>methylation_protection == True</filter>
-            <discover_datasets pattern="(?P&lt;name&gt;.*).gb" format="genbank" directory="outdir_gb" />
+            <discover_datasets pattern="(?P&lt;name&gt;.*).gb" format="genbank" directory="metoutdir_gb" />
         </collection>
     </outputs>
     <tests>
         <!--test without methylation protection-->
-        <test expect_num_outputs="1"> 
+        <test expect_num_outputs="2"> 
             <param name="genbank_files">
                 <collection type="list">
                     <element name="p3_2_CAG" value="EMMA_undomesticated_parts/p3_2_CAG.gb" />
@@ -75,6 +80,43 @@
                     </has_archive_member>
                 </assert_contents>
             </output>
+            <output_collection name="domesticated_gb" type="list" count="7">
+                <element name="p14_CMVp">
+                    <assert_contents>
+                     <has_n_lines min="20" />
+                    </assert_contents>
+                </element>
+                <element name="p16_bGHpolyA">
+                    <assert_contents>
+                     <has_n_lines min="20" />
+                    </assert_contents>
+                </element>
+                <element name="p18_CMVp">
+                    <assert_contents>
+                     <has_n_lines min="20" />
+                    </assert_contents>
+                </element>
+                 <element name="p19_mNeoGreen">
+                    <assert_contents>
+                     <has_n_lines min="20" />
+                    </assert_contents>
+                </element>
+                <element name="p22_PGKpolyA">
+                    <assert_contents>
+                     <has_n_lines min="20" />
+                    </assert_contents>
+                </element>
+                <element name="p3_2_CAG">
+                    <assert_contents>
+                     <has_n_lines min="20" />
+                    </assert_contents>
+                </element>
+                <element name="p7_gfp_sequence">
+                    <assert_contents>
+                     <has_n_lines min="20" />
+                    </assert_contents>
+                </element>
+            </output_collection>
         </test>
         <!--test with methylation protection-->
         <test expect_num_outputs="2"> 
@@ -144,8 +186,17 @@
 Domestication
 =================
 
-The Domestication Tool in the EGF Biofoundry, powered by Genedom, enables rapid and accurate preparation of DNA parts for modular cloning systems, such as Golden Gate Assembly (complete documentation `here <https://edinburgh-genome-foundry.github.io/genedom/>_`)
-This tool streamlines the domestication process by identifying and removing internal restriction site like "BsaI_site", "NotI_site", "XbaI_site"... or costumised like "5x3mer" means "any 5 consecutive 3-nucleotide sequences — typically 5 unique 3-mers in a row.
+The Domestication Tool in the EGF Biofoundry, powered by Genedom, enables rapid and accurate preparation of DNA parts for modular cloning systems, such as Golden Gate Assembly (complete documentation `here <https://edinburgh-genome-foundry.github.io/genedom/>`_)
+This tool streamlines the domestication process by identifying and removing internal restriction site like "BsaI_site", "NotI_site", "XbaI_site"...(`enzyme dict <https://github.com/biopython/biopython/blob/master/Bio/Restriction/Restriction_Dictionary.py>`_ ) or costumised like "5x3mer" means "any 5 consecutive 3-nucleotide sequences — typically 5 unique 3-mers in a row.
+
+**Parameters**:
+---------------
+* **GenBank File(s)**: GenBank files for domestication (can be output of sculpt tool)
+* **deomestication csv**: csv file containes the construct names with their parts.`example <https://cuba.genomefoundry.org/static/file_examples/domesticate_part_batches/EMMA.csv>`_
+* **Enabel Methylation Protection**: Enable the methylation protection.
+* **Allow sequence edits**: Recommanded. If False, sequences cannot be edited by the domesticator, only extended with flanks. If a sequence has for instance forbidden restriction sites, the domesticaton will fail for this sequence (and this will be noted in the report.
+* **Use file names as sequence IDs?**: Recommended if the GenBank file names represent the fragment names.
+* **output**: Zip report for domestication process + domesticated gb files (methylation pretected in case of enable of methylation protecton option).
     ]]></help>
     <citations>
         <citation type="bibtex">