changeset 1:787f1ca9045a draft default tip

Uploaded
author greg
date Wed, 13 Oct 2021 20:12:48 +0000
parents d69ebf52c233
children
files data_manager/malt_index_builder.py data_manager/malt_index_builder.xml data_manager_conf.xml test-data/malt_index_builder.json test-data/malt_index_builder1.json test-data/malt_index_builder2.json tool_data_table_conf.xml.sample
diffstat 7 files changed, 83 insertions(+), 43 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/malt_index_builder.py	Tue Oct 12 14:15:35 2021 +0000
+++ b/data_manager/malt_index_builder.py	Wed Oct 13 20:12:48 2021 +0000
@@ -6,8 +6,6 @@
 import subprocess
 import sys
 
-DEFAULT_DATA_TABLE_NAME = "malt_indices"
-
 
 def get_id_name(params, dbkey, fasta_description=None):
     sequence_id = params['param_dict']['sequence_id']
@@ -22,29 +20,40 @@
     return sequence_id, sequence_name
 
 
-def build_malt_index(data_manager_dict, fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, sequence_type, shapes, max_hits_per_seed, protein_reduct, data_table_name=DEFAULT_DATA_TABLE_NAME):
+def build_malt_index(data_manager_dict, fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, sequence_type, shapes, max_hits_per_seed, protein_reduct):
+    # The malt-build program produces a directory of files,
+    # so the data table path entry will be a directory and
+    # not an index file.
     fasta_base_name = os.path.split(fasta_filename)[-1]
     sym_linked_fasta_filename = os.path.join(target_directory, fasta_base_name)
     os.symlink(fasta_filename, sym_linked_fasta_filename)
-    args = ['malt-build', '--input', sym_linked_fasta_filename, '--sequenceType', sequence_type, '--index', 'index']
+    args = ['malt-build', '--input', sym_linked_fasta_filename, '--sequenceType', sequence_type, '--index', target_directory]
     threads = os.environ.get('GALAXY_SLOTS')
     if threads:
         args.extend(['--threads', threads])
     if shapes is not None:
-        args.extend('--shapes', shapes)
+        args.extend(['--shapes', shapes])
     if max_hits_per_seed is not None:
-        args.extend('--maxHitsPerSeed', max_hits_per_seed)
+        args.extend(['--maxHitsPerSeed', max_hits_per_seed])
     if protein_reduct is not None:
-        args.extend('--proteinReduct', protein_reduct)
+        args.extend(['--proteinReduct', protein_reduct])
     proc = subprocess.Popen(args=args, shell=False, cwd=target_directory)
     return_code = proc.wait()
     if return_code:
         sys.exit('Error building index, return_code: %d' % return_code)
-    data_table_entry = dict(value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_name)
-    _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)
+    # Remove unwanted files from the output directory.
+    os.remove(sym_linked_fasta_filename)
+    # The path entry here is the directory
+    # where the index files will be located,
+    # not a single index file (malt-build
+    # produces a directory if files, which
+    # is considered an index..
+    data_table_entry = dict(value=sequence_id, dbkey=dbkey, name=sequence_name, path=None)
+    _add_data_table_entry(data_manager_dict, data_table_entry)
 
 
-def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry):
+def _add_data_table_entry(data_manager_dict, data_table_entry):
+    data_table_name = "malt_indices"
     data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
     data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, [])
     data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
@@ -58,7 +67,7 @@
     parser.add_option('-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta description')
     parser.add_option('-e', '--sequence_type', dest='sequence_type', action='store', type="string", help='DNA or Protein sequences')
     parser.add_option('-p', '--shapes', dest='shapes', action='store', type="string", default=None, help='Comma-separated list of seed shapes')
-    parser.add_option('-m', '--max_hits_per_seed', dest='max_hits_per_seed', action='store', type="int", default=None, help='Maximum number of hits per seed')
+    parser.add_option('-m', '--max_hits_per_seed', dest='max_hits_per_seed', action='store', type="string", default=None, help='Maximum number of hits per seed')
     parser.add_option('-r', '--protein_reduct', dest='protein_reduct', action='store', type="string", default=None, help='Name or definition of protein alphabet reduction')
     (options, args) = parser.parse_args()
 
@@ -78,7 +87,7 @@
     sequence_id, sequence_name = get_id_name(params, dbkey=dbkey, fasta_description=options.fasta_description)
 
     # Build the index.
-    build_malt_index(data_manager_dict, options.fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, options.sequence_type, options.shapes, options.max_hits_per_seed, options.protein_reduct, data_table_name=DEFAULT_DATA_TABLE_NAME)
+    build_malt_index(data_manager_dict, options.fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, options.sequence_type, options.shapes, options.max_hits_per_seed, options.protein_reduct)
 
     # Save info to json file.
     with open(filename, 'w') as fh:
--- a/data_manager/malt_index_builder.xml	Tue Oct 12 14:15:35 2021 +0000
+++ b/data_manager/malt_index_builder.xml	Wed Oct 13 20:12:48 2021 +0000
@@ -10,40 +10,32 @@
 --fasta_dbkey '${all_fasta_source.fields.dbkey}'
 --fasta_description '${all_fasta_source.fields.name}'
 --sequence_type '${sequence_type}'
+#if str($protein_reduct_setting_cond.protein_reduct_setting) == 'yes':
+    --protein_reduct '${protein_reduct_setting_cond.protein_reduct}'
+#end if
 #if str($seed_setting_cond.seed_setting) == 'yes':
     --shapes '${seed_setting_cond.shapes}'
-    --max_hits_per_seed $seed_setting_cond.max_hits_per_seed
-    --protein_reduct '${seed_setting_cond.protein_reduct}'
+    ## malt-build requires a string here.
+    --max_hits_per_seed '${seed_setting_cond.max_hits_per_seed}'
 #end if
     ]]></command>
     <inputs>
         <param name="all_fasta_source" type="select" label="Source FASTA Sequence">
             <options from_data_table="all_fasta"/>
         </param>
-        <param name="sequence_name" type="text" value="" label="Name of sequence" />
-        <param name="sequence_id" type="text" value="" label="ID for sequence" />
-        <param  name="sequence_type" type="select" label="Specify whether the reference sequences are DNA or Protein sequences" help="Use the DNA setting For RNA sequences">
+        <param name="sequence_name" type="text" value="" label="Name of sequence"/>
+        <param name="sequence_id" type="text" value="" label="ID for sequence"/>
+        <param  name="sequence_type" type="select" label="Reference sequences type" help="Use the DNA setting For RNA sequences">
             <option value="DNA" selected="true">DNA</option>
             <option value="Protein">Protein</option>
         </param>
-        <conditional name="seed_setting_cond">
-            <param name="seed_setting" type="select" label="Specify seed settings?">
+        <conditional name="protein_reduct_setting_cond">
+            <param name="protein_reduct_setting" type="select" label="Specify protein alphabet resuction?" help="Used only if the reference sequences are Protein sequences">
                 <option selected="true" value="no">No</option>
                 <option value="yes">Yes</option>
             </param>
             <when value="no"/>
             <when value="yes">
-                <param name="shapes" type="text" value="" label="Comma-separated list of seed shapes" help="See help text below">
-                    <sanitizer invalid_char="">
-                        <valid initial="string.printable">
-                            <remove value="&apos;" />
-                        </valid>
-                        <mapping initial="none">
-                            <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;" />
-                        </mapping>
-                    </sanitizer>
-                </param>
-                <param name="max_hits_per_seed" type="integer" value="1" min="1" label="Maximum number of hits per seed"/>
                 <param name="protein_reduct" type="select" label="Name or definition of protein alphabet reduction">
                     <option selected="true" value="DIAMOND_11">DIAMOND_11</option>
                     <option value="BLOSUM50_10">BLOSUM50_10</option>
@@ -59,20 +51,59 @@
                 </param>
             </when>
         </conditional>
+        <conditional name="seed_setting_cond">
+            <param name="seed_setting" type="select" label="Specify seed settings?">
+                <option selected="true" value="no">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no"/>
+            <when value="yes">
+                <param name="shapes" type="text" value="" label="Comma-separated list of seed shapes" help="See help text below">
+                    <sanitizer invalid_char="">
+                        <valid initial="string.printable">
+                            <remove value="&apos;"/>
+                        </valid>
+                        <mapping initial="none">
+                            <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;"/>
+                        </mapping>
+                    </sanitizer>
+                </param>
+                <param name="max_hits_per_seed" type="integer" value="1" min="1" label="Maximum number of hits per seed"/>
+            </when>
+        </conditional>
     </inputs>
     <outputs>
-        <data name="out_file" format="data_manager_json" />
+        <data name="out_file" format="data_manager_json"/>
     </outputs>
     <tests>
         <test>
             <param name="all_fasta_source" value="phiX174"/>
-            <output name="out_file" value="malt_index_builder.json"/>
+            <param name="sequence_name" value="Pretty name for phiX174"/>
+            <param name="sequence_id" value="phiX174-1"/>
+            <output name="out_file" value="malt_index_builder1.json"/>
+        </test>
+        <test>
+            <param name="all_fasta_source" value="phiX174"/>
+            <param name="sequence_name" value="Pretty name for phiX174"/>
+            <param name="sequence_id" value="phiX174-1"/>
+            <param name="seed_setting" value="yes"/>
+            <param name="shapes" value="111110111011110110111111"/>
+            <output name="out_file" value="malt_index_builder1.json"/>
+        </test>
+        <test>
+            <param name="all_fasta_source" value="phiX174"/>
+            <param name="sequence_name" value="Pretty name for phiX174"/>
+            <param name="sequence_id" value="phiX174-2"/>
+            <param name="sequence_type" value="Protein"/>
+            <param name="protein_reduct_setting" value="yes"/>
+            <param name="protein_reduct" value="BLOSUM50_10"/>
+            <output name="out_file" value="malt_index_builder2.json"/>
         </test>
     </tests>
     <help>
 .. class:: infomark
 
-**Notice:** Values for name, description, and id will be generated automatically if left blank.
+**Notice:** Values for Name and ID of sequence will be generated automatically if left blank.
 
 **What it does**
 
--- a/data_manager_conf.xml	Tue Oct 12 14:15:35 2021 +0000
+++ b/data_manager_conf.xml	Wed Oct 13 20:12:48 2021 +0000
@@ -1,16 +1,15 @@
 <data_managers>
-    <data_manager tool_file="data_manager/malt_index_builder.xml" id="malt_index_builder" version="0.5.3">
+    <data_manager tool_file="data_manager/malt_index_builder.xml" id="data_manager_malt_index_builder">
         <data_table name="malt_indices">
             <output>
-                <column name="value" />
-                <column name="dbkey" />
-                <column name="name" />
-                <column name="path" output_ref="out_file" >
+                <column name="value"/>
+                <column name="dbkey"/>
+                <column name="name"/>
+                <column name="path" output_ref="out_file">
                     <move type="directory" relativize_symlinks="True">
-                        <!-- <source>${path}</source>--> <!-- out_file.extra_files_path is used as base by default --> <!-- if no source, eg for type=directory, then refers to base -->
-                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/malt_index/${value}</target>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">malt_index/${value}</target>
                     </move>
-                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/malt_index/${value}/${path}</value_translation>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/malt_index/${value}</value_translation>
                     <value_translation type="function">abspath</value_translation>
                 </column>
             </output>
--- a/test-data/malt_index_builder.json	Tue Oct 12 14:15:35 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-{"data_tables": {"malt_indices": [{"dbkey": "phiX174", "name": "phiX174", "path": "phiX174.fasta", "value": "phiX174"}]}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/malt_index_builder1.json	Wed Oct 13 20:12:48 2021 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"malt_indices": [{"dbkey": "phiX174", "name": "Pretty name for phiX174", "path": null, "value": "phiX174-1"}]}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/malt_index_builder2.json	Wed Oct 13 20:12:48 2021 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"malt_indices": [{"dbkey": "phiX174", "name": "Pretty name for phiX174", "path": null, "value": "phiX174-2"}]}}
\ No newline at end of file
--- a/tool_data_table_conf.xml.sample	Tue Oct 12 14:15:35 2021 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Oct 13 20:12:48 2021 +0000
@@ -1,7 +1,7 @@
 <tables>
     <!-- Locations of indices MALT versions 0.5.3 and higher -->
     <table name="malt_indices" comment_char="#">
-        <columns>value, name, path, description</columns>
+        <columns>value, dbkey, name, path</columns>
         <file path="tool-data/malt_indices.loc" />
     </table>
 </tables>