Mercurial > repos > recetox > spec2vec_similarity
diff spec2vec_similarity.xml @ 0:881742fff7ff draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit f79a5b51599254817727bc9028b9797ea994cb4e
author | recetox |
---|---|
date | Tue, 27 Jun 2023 14:23:58 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spec2vec_similarity.xml Tue Jun 27 14:23:58 2023 +0000 @@ -0,0 +1,84 @@ +<tool id="spec2vec_similarity" name="spec2vec similarity" version="@TOOL_VERSION@+galaxy0" profile="21.09"> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="creator"/> + <expand macro="edam" /> + + <requirements> + <requirement type="package" version="@TOOL_VERSION@">spec2vec</requirement> + </requirements> + + <command detect_errors="aggressive"><![CDATA[ + python3 ${python_wrapper} + ]]></command> +<configfiles> +<configfile name="python_wrapper"> +@init_logger@ +@init_model@ + +import numpy as np +from spec2vec import Spec2Vec + +similarity = Spec2Vec( + model, + intensity_weighting_power=${intensity_power}, + allowed_missing_percentage=${allow_missing_percentage} * 100 +) +name="Spec2Vec_${intensity_power}_${allow_missing_percentage}" + +@init_scores@ + +from matchms.filtering import normalize_intensities + +layer = similarity.sparse_array( + references=np.asarray(list(map(normalize_intensities, scores.references))), + queries=np.asarray(list(map(normalize_intensities, scores.queries))), + idx_row = scores._scores.row, + idx_col = scores._scores.col, + is_symmetric=False) + +scores._scores.add_sparse_data(scores._scores.row, scores._scores.col, layer, name) + +scores.filter_by_range(inplace=True, name=name, low=0) +scores.to_json("$similarity_scores") +</configfile> +</configfiles> + + <inputs> + <expand macro="input_param" /> + <param label="Model JSON file" name="model_metadata" type="data" format="json" + help="Model JSON file to use for Spec2Vec similarity computing."/> + <param label="Model NPY file" name="model_weights" type="data" format="binary" + help="Model NPY file to use for Spec2Vec similarity computing."/> + <param label="intensity_power" name="intensity_power" type="float" value="0.0" + help="Spectrum vectors are a weighted sum of the word vectors. The given word intensities will be raised to the given power. + The default is 0, which means that no weighing will be done."/> + <param label="Maximum share of new peaks" name="allow_missing_percentage" type="float" value="0.1" max="1.0" min="0.0" + help="Maximum allowed share of the peaks that are new to the model in relation to the whole peak corpus."/> + </inputs> + <outputs> + <data label="Spec2Vec scores of ${on_string}" name="similarity_scores" format="json"/> + </outputs> + + <tests> + <test> <!-- TEST #1: Test Spec2Vec. --> + <param name="references" value="inp_filtered_library.msp" ftype="msp"/> + <param name="queries" value="inp_filtered_spectra.msp" ftype="msp"/> + <param name="model_metadata" value="model_100.json" ftype="json"/> + <param name="model_weights" value="weights_100.binary" ftype="auto"/> + <param name="allow_missing_percentage" value="1.0"/> + <output name="similarity_scores" file="s2v_scores_test1_out.json" ftype="json"/> + </test> + <test> + <param name="use_scores" value="True"/> + <param name="scores_in" value="ri_match_60.json" ftype="json"/> + <param name="model_metadata" value="model_100.json" ftype="json"/> + <param name="model_weights" value="weights_100.binary" ftype="auto"/> + <param name="allow_missing_percentage" value="1.0"/> + <output name="similarity_scores" value="s2v_scores_test2_out.json" ftype="json" /> + </test> + </tests> + + <expand macro="citations"/> +</tool> \ No newline at end of file