Mercurial > repos > bgruening > simsearch
annotate simsearch.xml @ 14:47d2bf691908
ChemicalToolBoX update.
| author | Bjoern Gruening <bjoern.gruening@gmail.com> |
|---|---|
| date | Sun, 02 Jun 2013 19:54:51 +0200 |
| parents | dbe9c82028d1 |
| children | b4eaf75cac77 |
| rev | line source |
|---|---|
|
13
dbe9c82028d1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
12
diff
changeset
|
1 <tool id="ctb_simsearch" name="Similarity Search" version="0.1.1"> |
| 0 | 2 <description>of fingerprint data sets</description> |
| 3 <requirements> | |
| 4 <requirement type="package" version="1.1p1">chemfp</requirement> | |
| 7 | 5 <requirement type="package" version="2.3.2">openbabel</requirement> |
| 0 | 6 </requirements> |
| 7 <command> | |
| 8 #if $method_opts.method_opts_selector == "chemfp": | |
|
12
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
9 simsearch |
|
14
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
10 #if int($method_opts.knn) == 0: |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
11 #set $k = 'all' |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
12 ## count is only available if k nearest neighbor search is disabled |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
13 $method_opts.counts |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
14 #else: |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
15 #set $k = int($method_opts.knn) |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
16 #end if |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
17 |
|
12
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
18 -k $k |
|
14
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
19 |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
20 --threshold $method_opts.threshold |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
21 --query-format fps |
|
12
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
22 --target-format fps |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
23 -o "${outfile}" |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
24 |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
25 ## build and search an in-memory data structure (faster for multiple queries) |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
26 --memory |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
27 |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
28 #if $method_opts.query_opts.query_opts_selector == "normal": |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
29 -q "${method_opts.query_opts.query}" |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
30 #else: |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
31 --NxN |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
32 #end if |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
33 |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
34 "${method_opts.query_opts.targets}" |
| 0 | 35 #else: |
| 36 ## OpenBabel needs the original molecule file (molecule.'fileformat') next to the fastsearch index (molecule.fs). We use a composite datatype to accomplish that. | |
| 37 ## Furthermore OpenBabel is really picky with fileextensions. We need to specify every datatype. I did not find a solution to specify the query-filetype. | |
| 38 ## A workaround is to create a symlink with a proper file-extension. | |
| 39 #import tempfile | |
| 40 #set $temp_file = tempfile.NamedTemporaryFile() | |
| 41 #set $temp_link = "%s.%s" % ($temp_file.name, $method_opts.query.ext) | |
| 42 $temp_file.close() | |
| 43 ln -s $method_opts.query $temp_link; | |
| 44 obabel -i fs "${os.path.join($method_opts.fastsearch.extra_files_path,'molecule.fs')}" -S "${temp_link}" -at${method_opts.threshold} -O "${outfile}" -osmi -aa 2>&1; | |
| 45 rm $temp_link | |
| 46 #end if | |
| 47 </command> | |
| 48 <inputs> | |
| 49 | |
| 50 <conditional name="method_opts"> | |
| 51 <param name="method_opts_selector" type="select" label="Subject database/sequences"> | |
| 52 <option value="chemfp">Chemfp fingerprint file</option> | |
| 53 <option value="obabel">OpenBabel Fastsearch Index</option> | |
| 54 </param> | |
| 55 <when value="chemfp"> | |
|
12
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
56 <conditional name="query_opts"> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
57 <param name="query_opts_selector" type="select" label="Query Mode"> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
58 <option value="normal">Query molecules are stores in a separate file</option> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
59 <option value="nxn">Target molecules are also queries (NxN)</option> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
60 </param> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
61 <when value="normal"> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
62 <param name='query' type='data' format="fps" label='Query molecules'/> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
63 <param name='targets' type='data' format="fps" label='Target molecules'/> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
64 </when> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
65 <when value="nxn"> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
66 <param name='targets' type='data' format="fps" label='Target moleculs'/> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
67 </when> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
68 </conditional> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
69 <param name='knn' type='integer' value='0' label='select the k nearest neighbors' help='0 means all neighbors'> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
70 <validator type="in_range" min="0" /> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
71 </param> |
|
14
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
72 <param name='threshold' type='float' value='0.7' label='threshold' /> |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
73 <param name="counts" type="boolean" truevalue="-c" falsevalue="" checked="false" label="report counts (-c)" help="Is ignored if k nearest neighbor search is enabled" /> |
| 0 | 74 </when> |
| 75 <when value="obabel"> | |
| 76 <param name="query" type='data' format="smi,mol,sdf,inchi" label="query"/> | |
| 77 <param name="fastsearch" type='data' format="obfs" label="OpenBabel Fastsearch Index"/> | |
| 78 <param name="threshold" type='float' label="threshold" value='0.7'/> | |
| 79 </when> | |
| 80 </conditional> | |
| 81 | |
| 82 </inputs> | |
| 83 <outputs> | |
| 84 <data name="outfile" format="tabular" /> | |
| 85 </outputs> | |
| 86 <tests> | |
| 87 <test> | |
| 88 <param name="targets" ftype="fps" value="targets.fps"/> | |
| 89 <param name="query" ftype="fps" value="q.fps"/> | |
| 90 <param name="k" value='4'/> | |
| 91 <param name="th" value='0.7'/> | |
| 92 <output name="outfile" ftype="tabular" file="simsearch_on_tragets_and_q.tabular"/> | |
| 93 </test> | |
| 94 </tests> | |
| 95 <help> | |
| 96 | |
| 97 | |
| 98 **What it does** | |
| 99 | |
|
14
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
100 Similarity search of compounds using fingerprint data sets. Depending on the input either chemfp_ of `Open Babel`_ is used. |
| 0 | 101 |
| 102 | |
|
14
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
103 **References** |
| 0 | 104 |
|
14
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
105 Please reference the `Open Babel`_ or the chemfp_ project. |
| 0 | 106 |
|
14
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
107 N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch, and G R Hutchison. "Open Babel: An open chemical toolbox." J. Cheminf. (2011), 3, 33. `DOI:10.1186/1758-2946-3-33`_ |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
108 The Open Babel Package http://openbabel.sourceforge.net/ |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
109 |
| 0 | 110 |
|
14
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
111 .. _DOI:10.1186/1758-2946-3-33: http://www.jcheminf.com/content/3/1/33 |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
112 .. _chemfp: http://chemfp.com/ |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
113 .. _`Open Babel`: http://openbabel.org/ |
| 0 | 114 |
| 115 </help> | |
| 116 </tool> |
