Mercurial > repos > bgruening > simsearch
annotate simsearch.xml @ 15:b4eaf75cac77
ChemicalToolBoX update.
| author | Bjoern Gruening <bjoern.gruening@gmail.com> |
|---|---|
| date | Fri, 19 Jul 2013 16:28:47 +0200 |
| parents | 47d2bf691908 |
| children | 987b62da416d |
| rev | line source |
|---|---|
|
13
dbe9c82028d1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
12
diff
changeset
|
1 <tool id="ctb_simsearch" name="Similarity Search" version="0.1.1"> |
| 0 | 2 <description>of fingerprint data sets</description> |
| 3 <requirements> | |
| 4 <requirement type="package" version="1.1p1">chemfp</requirement> | |
| 7 | 5 <requirement type="package" version="2.3.2">openbabel</requirement> |
| 0 | 6 </requirements> |
| 7 <command> | |
| 8 #if $method_opts.method_opts_selector == "chemfp": | |
|
12
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
9 simsearch |
|
14
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
10 #if int($method_opts.knn) == 0: |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
11 #set $k = 'all' |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
12 ## count is only available if k nearest neighbor search is disabled |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
13 $method_opts.counts |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
14 #else: |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
15 #set $k = int($method_opts.knn) |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
16 #end if |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
17 |
|
12
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
18 -k $k |
|
14
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
19 |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
20 --threshold $method_opts.threshold |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
21 --query-format fps |
|
12
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
22 --target-format fps |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
23 -o "${outfile}" |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
24 |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
25 ## build and search an in-memory data structure (faster for multiple queries) |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
26 --memory |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
27 |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
28 #if $method_opts.query_opts.query_opts_selector == "normal": |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
29 -q "${method_opts.query_opts.query}" |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
30 #else: |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
31 --NxN |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
32 #end if |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
33 |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
34 "${method_opts.query_opts.targets}" |
| 0 | 35 #else: |
| 36 ## OpenBabel needs the original molecule file (molecule.'fileformat') next to the fastsearch index (molecule.fs). We use a composite datatype to accomplish that. | |
|
15
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
37 ## Furthermore OpenBabel is really picky with file extensions. We need to specify every datatype. I did not find a solution to specify the query-filetype. |
| 0 | 38 ## A workaround is to create a symlink with a proper file-extension. |
| 39 #import tempfile | |
| 40 #set $temp_file = tempfile.NamedTemporaryFile() | |
| 41 #set $temp_link = "%s.%s" % ($temp_file.name, $method_opts.query.ext) | |
| 42 $temp_file.close() | |
| 43 ln -s $method_opts.query $temp_link; | |
| 44 obabel -i fs "${os.path.join($method_opts.fastsearch.extra_files_path,'molecule.fs')}" -S "${temp_link}" -at${method_opts.threshold} -O "${outfile}" -osmi -aa 2>&1; | |
| 45 rm $temp_link | |
| 46 #end if | |
| 47 </command> | |
| 48 <inputs> | |
| 49 | |
| 50 <conditional name="method_opts"> | |
| 51 <param name="method_opts_selector" type="select" label="Subject database/sequences"> | |
| 52 <option value="chemfp">Chemfp fingerprint file</option> | |
| 53 <option value="obabel">OpenBabel Fastsearch Index</option> | |
| 54 </param> | |
| 55 <when value="chemfp"> | |
|
12
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
56 <conditional name="query_opts"> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
57 <param name="query_opts_selector" type="select" label="Query Mode"> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
58 <option value="normal">Query molecules are stores in a separate file</option> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
59 <option value="nxn">Target molecules are also queries (NxN)</option> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
60 </param> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
61 <when value="normal"> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
62 <param name='query' type='data' format="fps" label='Query molecules'/> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
63 <param name='targets' type='data' format="fps" label='Target molecules'/> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
64 </when> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
65 <when value="nxn"> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
66 <param name='targets' type='data' format="fps" label='Target moleculs'/> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
67 </when> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
68 </conditional> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
69 <param name='knn' type='integer' value='0' label='select the k nearest neighbors' help='0 means all neighbors'> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
70 <validator type="in_range" min="0" /> |
|
526f6e88fbe7
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
7
diff
changeset
|
71 </param> |
|
14
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
72 <param name='threshold' type='float' value='0.7' label='threshold' /> |
|
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
73 <param name="counts" type="boolean" truevalue="-c" falsevalue="" checked="false" label="report counts (-c)" help="Is ignored if k nearest neighbor search is enabled" /> |
| 0 | 74 </when> |
| 75 <when value="obabel"> | |
| 76 <param name="query" type='data' format="smi,mol,sdf,inchi" label="query"/> | |
| 77 <param name="fastsearch" type='data' format="obfs" label="OpenBabel Fastsearch Index"/> | |
| 78 <param name="threshold" type='float' label="threshold" value='0.7'/> | |
| 79 </when> | |
| 80 </conditional> | |
| 81 | |
| 82 </inputs> | |
| 83 <outputs> | |
| 84 <data name="outfile" format="tabular" /> | |
| 85 </outputs> | |
| 86 <tests> | |
| 87 <test> | |
| 88 <param name="targets" ftype="fps" value="targets.fps"/> | |
| 89 <param name="query" ftype="fps" value="q.fps"/> | |
| 90 <param name="k" value='4'/> | |
| 91 <param name="th" value='0.7'/> | |
| 92 <output name="outfile" ftype="tabular" file="simsearch_on_tragets_and_q.tabular"/> | |
| 93 </test> | |
| 94 </tests> | |
| 95 <help> | |
| 96 | |
| 97 | |
|
15
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
98 .. class:: infomark |
|
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
99 |
|
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
100 **What this tool does** |
| 0 | 101 |
|
15
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
102 Similarity searches using a variety of different fingerprints using either the chemfp_ FPS type or the Open Babel FastSearch_ index. |
| 0 | 103 |
|
15
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
104 .. _chemfp: http://chemfp.com/ |
|
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
105 .. _FastSearch: http://openbabel.org/wiki/FastSearch |
| 0 | 106 |
|
15
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
107 ----- |
| 0 | 108 |
|
15
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
109 .. class:: infomark |
| 0 | 110 |
|
15
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
111 **Cite** |
|
14
47d2bf691908
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
112 |
|
15
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
113 | The chemfp_ project |
|
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
114 | |
|
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
115 | N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch and G R Hutchison - `Open Babel: An open chemical toolbox`_ |
| 0 | 116 |
|
15
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
117 .. _`Open Babel: An open chemical toolbox`: http://www.jcheminf.com/content/3/1/33 |
|
b4eaf75cac77
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
14
diff
changeset
|
118 |
| 0 | 119 |
| 120 </help> | |
| 121 </tool> |
