diff cherri_train.xml @ 0:f8733988a102 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cherri commit f9348123725f421ddbdbd8d372d038da4880dbac
author iuc
date Fri, 09 Dec 2022 17:40:06 +0000
parents
children 6c6924324783
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cherri_train.xml	Fri Dec 09 17:40:06 2022 +0000
@@ -0,0 +1,231 @@
+<tool id="cherri_train" name="Train a CheRRI model using RRIs" version="@VERSION@" profile="@PROFILE@">
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        export PYTHONHASHSEED=31337 &&
+        mkdir mixed_model &&
+        #set experiments = []
+        #for $experiment in $rep_experiment:
+            mkdir $experiment.exp_name &&
+            ln -s '$experiment.ref_source.genome_fasta' '$experiment.exp_name/genome.fa' &&
+            $experiments.append(str($experiment.exp_name))
+            #set replicates = []
+            #for $i, $sample in enumerate($experiment.rep_samples):
+                ln -s '$sample.file' '$experiment.exp_name/${i}.tabular' &&
+                $replicates.append(str($i) + ".tabular")
+            #end for
+            cherri train
+            -i1 '$experiment.exp_name'
+            -r #echo ' '.join($replicates)
+            -g '$experiment.exp_name/genome.fa'
+            -l '$experiment.chrom_len_file'
+            -n '$experiment.exp_name'
+            #if $experiment.occupied_regions:
+                -i2 '$experiment.occupied_regions'
+            #end if
+            -o .
+            -on '$experiment.exp_name' 
+            @COMMONPARAMS@ &&
+            #if len($rep_experiment) > 1:
+                ln -s '../$experiment.exp_name' 'mixed_model/$experiment.exp_name' &&
+            #else:
+                ln -s $experiment.exp_name/model/optimized/${experiment.exp_name}_context_${context}.model final_full.model &&
+                #if $use_structure == 'off':
+                    ln -s $experiment.exp_name/model/features/${experiment.exp_name}_context_${context}.npz features.npz &&
+                #else:
+                    ln -s $experiment.exp_name/feature_files/training_data_${experiment.exp_name}_context_${context}.npz features.npz &&
+                #end if
+            #end if
+        #end for
+        #if len($rep_experiment) > 1:
+            cherri train 
+                -mi on
+                -i1 mixed_model
+                -r #echo ' '.join($experiments)
+                -g /not/needed/
+                -l /not/needed/
+                -n mixed
+                -o .
+                -on mixed_model
+                @COMMONPARAMS@ &&
+                ln -s mixed_model/mixed/model/optimized/full_mixed_context_${context}.model final_full.model &&
+                #if $use_structure == 'off':
+                    ln -s mixed_model/mixed/model/features/mixed_context_${context}.npz features.npz &&
+                #else:
+                    ln -s mixed_model/mixed/feature_files/training_data_mixed_context_${context}.npz features.npz &&
+                #end if
+        #end if
+        tar -zhcvf model.tgz final_full.model features.npz
+    ]]></command>
+    <inputs>
+        <repeat name="rep_experiment" title="Experiment" min="1" default="1">
+            <param name="exp_name" type="text" value="myExperiment" label="Name of the experiment" help="Only letters, numbers and underscores will be retained in this field. If more than one experiment is provided, then the tool generates a mixed model by combining datasets.">
+                <sanitizer>
+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                </sanitizer>
+            </param>
+            <expand macro="reference_source_conditional"/>
+            <repeat name="rep_samples" title="Interaction summary file" min="1" default="1">
+                <param name="file" type="data" format="tabular" multiple="false" label="Interaction summary file of a replicate" />
+            </repeat>
+            <param name="occupied_regions" optional="True" type="data" format="bed" label="Path to the genomic RBP crosslink or binding site locations (in BED format)" />
+        </repeat>
+        <param name="context" type="integer" value="150" label="How much context should be added at up- and downstream of each sequence" />
+        <param name="intarna_param_file" optional="True" type="data" format="txt" label="IntaRNA parameters file" />
+        <param name="use_structure" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Set 'off' if you want to disable structure, default 'on'" />
+        <param name="run_time" type="integer" value="43200" label="Time used for the optimization in seconds, default: 43200 (12h)" />
+        <param name="filter_hybrid" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Filter the data for hybrids already detected by ChiRA" />
+    </inputs>
+    <outputs>
+        <data name="out_model" format="tgz" from_work_dir="model.tgz" label="Trained model and features file on ${on_string}"/>
+    </outputs>
+    <tests>
+        <!-- Single experiment -st on -->
+        <test>
+            <repeat name="rep_experiment">
+                <param name="exp_name" value="myExperiment1"/>
+                <conditional name="ref_source">
+                    <param name="ref_source_selector" value="history"/>
+                    <param name="genome_fasta" value="train_1.fa"/>
+                </conditional>
+                <param name="chrom_len_file" value="train_2_len.tabular" />
+                <repeat name="rep_samples">
+                    <param name="file" value="train_1_pos.tabular" />
+                </repeat>
+                <repeat name="rep_samples">
+                    <param name="file" value="train_1_pos.tabular" />
+                </repeat>
+                <repeat name="rep_samples">
+                    <param name="file" value="train_1_pos.tabular" />
+                </repeat>
+            </repeat>
+            <param name="run_time" value="60" />
+            <output name="out_model">
+                <assert_contents>
+                    <has_size min="100000"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Single experiment -st off -->
+        <test>
+            <repeat name="rep_experiment">
+                <param name="exp_name" value="myExperiment1"/>
+                <conditional name="ref_source">
+                    <param name="ref_source_selector" value="history"/>
+                    <param name="genome_fasta" value="train_1.fa"/>
+                </conditional>
+                <param name="chrom_len_file" value="train_2_len.tabular" />
+                <repeat name="rep_samples">
+                    <param name="file" value="train_1_pos.tabular" />
+                </repeat>
+                <repeat name="rep_samples">
+                    <param name="file" value="train_1_pos.tabular" />
+                </repeat>
+                <repeat name="rep_samples">
+                    <param name="file" value="train_1_pos.tabular" />
+                </repeat>
+            </repeat>
+            <param name="run_time" value="60" />
+            <param name="use_structure" value="off" />
+            <output name="out_model">
+                <assert_contents>
+                    <has_size min="100000"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Mixed model -st on -->
+        <test>
+            <repeat name="rep_experiment">
+                <param name="exp_name" value="myExperiment1"/>
+                <conditional name="ref_source">
+                    <param name="ref_source_selector" value="history"/>
+                    <param name="genome_fasta" value="train_1.fa"/>
+                </conditional>
+                <param name="chrom_len_file" value="train_2_len.tabular" />
+                <repeat name="rep_samples">
+                    <param name="file" value="train_1_pos.tabular" />
+                </repeat>
+                <repeat name="rep_samples">
+                    <param name="file" value="train_1_pos.tabular" />
+                </repeat>
+                <repeat name="rep_samples">
+                    <param name="file" value="train_1_pos.tabular" />
+                </repeat>
+            </repeat>
+            <repeat name="rep_experiment">
+                <param name="exp_name" value="myExperiment2"/>
+                <conditional name="ref_source">
+                    <param name="ref_source_selector" value="history"/>
+                    <param name="genome_fasta" value="train_2.fa"/>
+                </conditional>
+                <param name="chrom_len_file" value="train_2_len.tabular" />
+                <repeat name="rep_samples">
+                    <param name="file" value="train_2_pos.tabular" />
+                </repeat>
+                <repeat name="rep_samples">
+                    <param name="file" value="train_2_pos.tabular" />
+                </repeat>
+                <repeat name="rep_samples">
+                    <param name="file" value="train_2_pos.tabular" />
+                </repeat>
+            </repeat>
+            <param name="run_time" value="60" />
+            <param name="context" value="100" />
+            <output name="out_model">
+                <assert_contents>
+                    <has_size min="100000"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Mixed model -st off -->
+        <test>
+            <repeat name="rep_experiment">
+                <param name="exp_name" value="myExperiment1"/>
+                <conditional name="ref_source">
+                    <param name="ref_source_selector" value="history"/>
+                    <param name="genome_fasta" value="train_1.fa"/>
+                </conditional>
+                <param name="chrom_len_file" value="train_2_len.tabular" />
+                <repeat name="rep_samples">
+                    <param name="file" value="train_1_pos.tabular" />
+                </repeat>
+                <repeat name="rep_samples">
+                    <param name="file" value="train_1_pos.tabular" />
+                </repeat>
+                <repeat name="rep_samples">
+                    <param name="file" value="train_1_pos.tabular" />
+                </repeat>
+            </repeat>
+            <repeat name="rep_experiment">
+                <param name="exp_name" value="myExperiment2"/>
+                <conditional name="ref_source">
+                    <param name="ref_source_selector" value="history"/>
+                    <param name="genome_fasta" value="train_2.fa"/>
+                </conditional>
+                <param name="chrom_len_file" value="train_2_len.tabular" />
+                <repeat name="rep_samples">
+                    <param name="file" value="train_2_pos.tabular" />
+                </repeat>
+                <repeat name="rep_samples">
+                    <param name="file" value="train_2_pos.tabular" />
+                </repeat>
+                <repeat name="rep_samples">
+                    <param name="file" value="train_2_pos.tabular" />
+                </repeat>
+            </repeat>
+            <param name="run_time" value="60" />
+            <param name="use_structure" value="off" />
+            <output name="out_model">
+                <assert_contents>
+                    <has_size min="100000"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+        CheRRI in train mode to build a predictive model. Takes interactions summary files from ChiRA tool as input. Generates a model file and a features file that can be used in CheRRI eval mode.
+    ]]></help>
+    <expand macro="citations" />
+</tool>