comparison simsearch.xml @ 0:b4cbee77896d

Uploaded
author bgruening
date Tue, 26 Mar 2013 14:57:22 -0400
parents
children 512145f2e417
comparison
equal deleted inserted replaced
-1:000000000000 0:b4cbee77896d
1 <tool id="chemfp_simsearch" name="Similarity Search" version="0.1">
2 <description>of fingerprint data sets</description>
3 <!--<parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="10000" shared_inputs="targets,fastsearch" merge_outputs="outfile"></parallelism>-->
4 <requirements>
5 <requirement type="package" version="1.1p1">chemfp</requirement>
6 </requirements>
7 <command>
8 #if $method_opts.method_opts_selector == "chemfp":
9 ## TODO: remove all comment lines, in muti mode many comment lines will be appear, also in the middle of the result file, remove them
10 simsearch -k $method_opts.k --threshold $method_opts.threshold --in fps --target-format fps -q "${method_opts.query}" "${method_opts.targets}" -o "${outfile}"
11 #else:
12 ## OpenBabel needs the original molecule file (molecule.'fileformat') next to the fastsearch index (molecule.fs). We use a composite datatype to accomplish that.
13 ## Furthermore OpenBabel is really picky with fileextensions. We need to specify every datatype. I did not find a solution to specify the query-filetype.
14 ## A workaround is to create a symlink with a proper file-extension.
15 #import tempfile
16 #set $temp_file = tempfile.NamedTemporaryFile()
17 #set $temp_link = "%s.%s" % ($temp_file.name, $method_opts.query.ext)
18 $temp_file.close()
19 ln -s $method_opts.query $temp_link;
20 obabel -i fs "${os.path.join($method_opts.fastsearch.extra_files_path,'molecule.fs')}" -S "${temp_link}" -at${method_opts.threshold} -O "${outfile}" -osmi -aa 2>&#38;1;
21 rm $temp_link
22 #end if
23 </command>
24 <inputs>
25
26 <conditional name="method_opts">
27 <param name="method_opts_selector" type="select" label="Subject database/sequences">
28 <option value="chemfp">Chemfp fingerprint file</option>
29 <option value="obabel">OpenBabel Fastsearch Index</option>
30 </param>
31 <when value="chemfp">
32 <param name="query" type='data' format="fps" label="query"/>
33 <param name="targets" type='data' format="fps" label="Target database"/>
34 <param name="k" type='text' label="k nearest neighbor" value='all'/>
35 <param name="threshold" type='float' label="threshold" value='0.7'/>
36 </when>
37 <when value="obabel">
38 <param name="query" type='data' format="smi,mol,sdf,inchi" label="query"/>
39 <param name="fastsearch" type='data' format="obfs" label="OpenBabel Fastsearch Index"/>
40 <param name="threshold" type='float' label="threshold" value='0.7'/>
41 </when>
42 </conditional>
43
44 </inputs>
45 <outputs>
46 <data name="outfile" format="tabular" />
47 </outputs>
48 <tests>
49 <test>
50 <param name="targets" ftype="fps" value="targets.fps"/>
51 <param name="query" ftype="fps" value="q.fps"/>
52 <param name="k" value='4'/>
53 <param name="th" value='0.7'/>
54 <output name="outfile" ftype="tabular" file="simsearch_on_tragets_and_q.tabular"/>
55 </test>
56 </tests>
57 <help>
58
59
60 **What it does**
61
62 similarity search of fingerprint data sets
63
64 -----
65
66 **Example**
67
68 * input::
69
70 - Target Database in FPS format
71
72 #FPS1
73 #num_bits=881
74 #type=CACTVS-E_SCREEN/1.0 extended=2
75 #software=CACTVS/unknown
76 #source=Desktop/3579363516810334491.sdf
77 #date=2012-02-03T13:07:47
78 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960
79 cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2926101609401b13e408000000000001
80 00200000040080000010000002000000000000 55169009
81 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960
82 cc0bed3248000580644626004101b4844805901b041c2e19111e45039b8b2926105609401313e408000000000001
83 00200000040080000010000002000000000000 55079807
84 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e960
85 cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e408000000000001
86 00200000040080000010000002000000000000 3153534
87 07ce0c000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e960
88 cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2926101609401b13e408000000000001
89 00200000040080000010000002000000000000 55168823
90 07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960
91 cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e408000000000001
92 00200000040080000010000002000000000000 55102353
93 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e9
94 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800003000
95 000100200000040080000010000002000000000000 55091849
96 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e9
97 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
98 000100200000040080000010000002000000000000 55091752
99 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e9
100 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000
101 000100200000040080000010000002000000000000 55091467
102 07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9
103 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
104 000100200000040080000010000002000000000000 55091466
105 07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9
106 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
107 000100200000040080000010000002000000000000 55091416
108 03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9
109 60cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
110 000100200000040080000010000002000000000000 6499094
111 03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9
112 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
113 000100200000040080000010000002000000000000 6485578
114 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e9
115 60cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000
116 000100200000040080000010000002000000000000 6485577
117
118 - query :
119 #FPS1
120 #num_bits=881
121 #type=CACTVS-E_SCREEN/1.0 extended=2
122 #software=CACTVS/unknown
123 #source=CID_28434379.sdf
124 #date=2012-02-03T13:08:39
125 07ce04000000000000000000000000000080060000000c060000000000001a800f0000780008100000101487e9
126 608c0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
127 000100200000040080000010000002000000000000 28434379
128 - k : 4
129 - threshold : 0.7
130
131 * output::
132
133 #Simsearch/1
134 #num_bits=881
135 #type=Tanimoto k=4 threshold=0.7
136 #software=chemfp/1.0
137 #queries=q.fps
138 #targets=target.fps
139 #query_sources=CID_28434379.sdf
140 #target_sources=Desktop/3579363516810334491.sdf
141 4 28434379 55091752 0.9684 55091466 0.9682 55091416 0.9682 55102353 0.9682
142
143
144 </help>
145 </tool>