comparison Galaxy-Workflow-mt_analysis_0.01_strand-specific_(fastq_double).ga @ 0:39ec6ecd0e3b default tip

Uploaded the workflow
author greg
date Thu, 27 Oct 2011 09:19:13 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:39ec6ecd0e3b
1 {
2 "a_galaxy_workflow": "true",
3 "annotation": "",
4 "format-version": "0.1",
5 "name": "mt analysis 0.01 strand-specific (fastq double)",
6 "steps": {
7 "0": {
8 "annotation": "The second input dataset. Usually the second PCR replicate for a given sample.",
9 "id": 0,
10 "input_connections": {},
11 "inputs": [],
12 "name": "Map with BWA",
13 "outputs": [
14 {
15 "name": "output",
16 "type": "sam"
17 }
18 ],
19 "position": {
20 "left": 229,
21 "top": 388
22 },
23 "tool_errors": null,
24 "tool_id": "bwa_wrapper",
25 "tool_state": "{\"genomeSource\": \"{\\\"indices\\\": \\\"/mnt/galaxyIndices/hg19/bwa/base/hg19\\\", \\\"refGenomeSource\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0}\", \"paired\": \"{\\\"sPaired\\\": \\\"single\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 0}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"True\\\"\", \"__page__\": 0}",
26 "tool_version": "1.0.3",
27 "type": "tool",
28 "user_outputs": []
29 },
30 "1": {
31 "annotation": "The first input dataset. Usually the first PCR replicate for a given sample.",
32 "id": 1,
33 "input_connections": {},
34 "inputs": [],
35 "name": "Map with BWA",
36 "outputs": [
37 {
38 "name": "output",
39 "type": "sam"
40 }
41 ],
42 "position": {
43 "left": 229,
44 "top": 154
45 },
46 "tool_errors": null,
47 "tool_id": "bwa_wrapper",
48 "tool_state": "{\"genomeSource\": \"{\\\"indices\\\": \\\"/mnt/galaxyIndices/hg19/bwa/base/hg19\\\", \\\"refGenomeSource\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0}\", \"paired\": \"{\\\"sPaired\\\": \\\"single\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 0}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"True\\\"\", \"__page__\": 0}",
49 "tool_version": "1.0.3",
50 "type": "tool",
51 "user_outputs": []
52 },
53 "2": {
54 "annotation": "Here SAM datasets generated by BWA are merged together",
55 "id": 2,
56 "input_connections": {
57 "input1": {
58 "id": 1,
59 "output_name": "output"
60 },
61 "queries_0|input2": {
62 "id": 0,
63 "output_name": "output"
64 }
65 },
66 "inputs": [],
67 "name": "Concatenate queries",
68 "outputs": [
69 {
70 "name": "out_file1",
71 "type": "input"
72 }
73 ],
74 "position": {
75 "left": 410,
76 "top": 257
77 },
78 "tool_errors": null,
79 "tool_id": "cat1",
80 "tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"queries\": \"[{\\\"input2\\\": null, \\\"__index__\\\": 0}]\"}",
81 "tool_version": "1.0.0",
82 "type": "tool",
83 "user_outputs": []
84 },
85 "3": {
86 "annotation": "By selecting only lines containing the X0:i:1 tag we eliminate reads that map multiple times. Thus this step removes reads that map more than once.",
87 "id": 3,
88 "input_connections": {
89 "input": {
90 "id": 2,
91 "output_name": "out_file1"
92 }
93 },
94 "inputs": [],
95 "name": "Select",
96 "outputs": [
97 {
98 "name": "out_file1",
99 "type": "input"
100 }
101 ],
102 "position": {
103 "left": 492,
104 "top": 468
105 },
106 "tool_errors": null,
107 "tool_id": "Grep1",
108 "tool_state": "{\"__page__\": 0, \"input\": \"null\", \"invert\": \"\\\"false\\\"\", \"pattern\": \"\\\"X0:i:1\\\"\"}",
109 "tool_version": "1.0.1",
110 "type": "tool",
111 "user_outputs": []
112 },
113 "4": {
114 "annotation": "Selecting reads mapping to the PLUS strand",
115 "id": 4,
116 "input_connections": {
117 "input1": {
118 "id": 3,
119 "output_name": "out_file1"
120 }
121 },
122 "inputs": [],
123 "name": "Filter SAM",
124 "outputs": [
125 {
126 "name": "out_file1",
127 "type": "sam"
128 }
129 ],
130 "position": {
131 "left": 333,
132 "top": 610
133 },
134 "tool_errors": null,
135 "tool_id": "sam_bw_filter",
136 "tool_state": "{\"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"input1\": \"null\", \"bits\": \"[{\\\"states\\\": \\\"0\\\", \\\"__index__\\\": 0, \\\"flags\\\": \\\"--0x0004\\\"}, {\\\"states\\\": \\\"0\\\", \\\"__index__\\\": 1, \\\"flags\\\": \\\"--0x0010\\\"}]\", \"__page__\": 0}",
137 "tool_version": "1.0.0",
138 "type": "tool",
139 "user_outputs": []
140 },
141 "5": {
142 "annotation": "Selecting reads mapping to the MINUS strand",
143 "id": 5,
144 "input_connections": {
145 "input1": {
146 "id": 3,
147 "output_name": "out_file1"
148 }
149 },
150 "inputs": [],
151 "name": "Filter SAM",
152 "outputs": [
153 {
154 "name": "out_file1",
155 "type": "sam"
156 }
157 ],
158 "position": {
159 "left": 271,
160 "top": 879
161 },
162 "tool_errors": null,
163 "tool_id": "sam_bw_filter",
164 "tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"bits\": \"[{\\\"states\\\": \\\"0\\\", \\\"__index__\\\": 0, \\\"flags\\\": \\\"--0x0004\\\"}, {\\\"states\\\": \\\"1\\\", \\\"__index__\\\": 1, \\\"flags\\\": \\\"--0x0010\\\"}]\"}",
165 "tool_version": "1.0.0",
166 "type": "tool",
167 "user_outputs": []
168 },
169 "6": {
170 "annotation": "Converting SAM to its binary representation (BAM) to allow pileup generation",
171 "id": 6,
172 "input_connections": {
173 "source|input1": {
174 "id": 4,
175 "output_name": "out_file1"
176 }
177 },
178 "inputs": [],
179 "name": "SAM-to-BAM",
180 "outputs": [
181 {
182 "name": "output1",
183 "type": "bam"
184 }
185 ],
186 "position": {
187 "left": 438,
188 "top": 785
189 },
190 "tool_errors": null,
191 "tool_id": "sam_to_bam",
192 "tool_state": "{\"source\": \"{\\\"index_source\\\": \\\"cached\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 0}\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"__page__\": 0}",
193 "tool_version": "1.1.0",
194 "type": "tool",
195 "user_outputs": []
196 },
197 "7": {
198 "annotation": "Converting SAM to its binary representation (BAM) to allow pileup generation",
199 "id": 7,
200 "input_connections": {
201 "source|input1": {
202 "id": 5,
203 "output_name": "out_file1"
204 }
205 },
206 "inputs": [],
207 "name": "SAM-to-BAM",
208 "outputs": [
209 {
210 "name": "output1",
211 "type": "bam"
212 }
213 ],
214 "position": {
215 "left": 404,
216 "top": 1043
217 },
218 "tool_errors": null,
219 "tool_id": "sam_to_bam",
220 "tool_state": "{\"source\": \"{\\\"index_source\\\": \\\"cached\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 0}\", \"__page__\": 0}",
221 "tool_version": "1.1.0",
222 "type": "tool",
223 "user_outputs": []
224 },
225 "8": {
226 "annotation": "Generating standard 6 column pileup without MAQ consensus option for PLUS strand",
227 "id": 8,
228 "input_connections": {
229 "refOrHistory|input1": {
230 "id": 6,
231 "output_name": "output1"
232 }
233 },
234 "inputs": [],
235 "name": "Generate pileup",
236 "outputs": [
237 {
238 "name": "output1",
239 "type": "tabular"
240 }
241 ],
242 "position": {
243 "left": 643,
244 "top": 756
245 },
246 "tool_errors": null,
247 "tool_id": "sam_pileup",
248 "tool_state": "{\"__page__\": 0, \"c\": \"{\\\"consensus\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\", \"indels\": \"\\\"no\\\"\", \"refOrHistory\": \"{\\\"input1\\\": null, \\\"reference\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0}\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"lastCol\": \"\\\"no\\\"\", \"mapCap\": \"\\\"60\\\"\"}",
249 "tool_version": "1.1.0",
250 "type": "tool",
251 "user_outputs": []
252 },
253 "9": {
254 "annotation": "Generating standard 6 column pileup without MAQ consensus option for MINUS strand",
255 "id": 9,
256 "input_connections": {
257 "refOrHistory|input1": {
258 "id": 7,
259 "output_name": "output1"
260 }
261 },
262 "inputs": [],
263 "name": "Generate pileup",
264 "outputs": [
265 {
266 "name": "output1",
267 "type": "tabular"
268 }
269 ],
270 "position": {
271 "left": 630,
272 "top": 1039
273 },
274 "tool_errors": null,
275 "tool_id": "sam_pileup",
276 "tool_state": "{\"__page__\": 0, \"c\": \"{\\\"consensus\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\", \"indels\": \"\\\"no\\\"\", \"refOrHistory\": \"{\\\"input1\\\": null, \\\"reference\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0}\", \"lastCol\": \"\\\"no\\\"\", \"mapCap\": \"\\\"60\\\"\"}",
277 "tool_version": "1.1.0",
278 "type": "tool",
279 "user_outputs": []
280 },
281 "10": {
282 "annotation": "PLUS strand. Here pileup is filtered to restrict the dataset to positions covered by al least 100 reads containing high quality bases (phred 30 or above).",
283 "id": 10,
284 "input_connections": {
285 "input": {
286 "id": 8,
287 "output_name": "output1"
288 }
289 },
290 "inputs": [],
291 "name": "Filter pileup",
292 "outputs": [
293 {
294 "name": "out_file1",
295 "type": "tabular"
296 }
297 ],
298 "position": {
299 "left": 774,
300 "top": 532
301 },
302 "tool_errors": null,
303 "tool_id": "pileup_parser",
304 "tool_state": "{\"snps_only\": \"\\\"No\\\"\", \"__page__\": 0, \"qc_base\": \"\\\"No\\\"\", \"cvrg_cutoff\": \"\\\"100\\\"\", \"interval\": \"\\\"Yes\\\"\", \"pileup_type\": \"{\\\"type_select\\\": \\\"six\\\", \\\"__current_case__\\\": 1}\", \"input\": \"null\", \"diff\": \"\\\"Yes\\\"\", \"qv_cutoff\": \"\\\"30\\\"\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\"}",
305 "tool_version": "1.0.2",
306 "type": "tool",
307 "user_outputs": []
308 },
309 "11": {
310 "annotation": "MINUS strand. Here pileup is filtered to restrict the dataset to positions covered by al least 100 reads containing high quality bases (phred 30 or above).",
311 "id": 11,
312 "input_connections": {
313 "input": {
314 "id": 9,
315 "output_name": "output1"
316 }
317 },
318 "inputs": [],
319 "name": "Filter pileup",
320 "outputs": [
321 {
322 "name": "out_file1",
323 "type": "tabular"
324 }
325 ],
326 "position": {
327 "left": 737,
328 "top": 1228
329 },
330 "tool_errors": null,
331 "tool_id": "pileup_parser",
332 "tool_state": "{\"snps_only\": \"\\\"No\\\"\", \"__page__\": 0, \"qc_base\": \"\\\"No\\\"\", \"cvrg_cutoff\": \"\\\"100\\\"\", \"interval\": \"\\\"Yes\\\"\", \"pileup_type\": \"{\\\"type_select\\\": \\\"six\\\", \\\"__current_case__\\\": 1}\", \"input\": \"null\", \"diff\": \"\\\"Yes\\\"\", \"qv_cutoff\": \"\\\"30\\\"\"}",
333 "tool_version": "1.0.2",
334 "type": "tool",
335 "user_outputs": []
336 },
337 "12": {
338 "annotation": "PLUS strand. Removing all non mtDNA positions from the dataset (as mtDNA was enriched by PCR there is some genomic DNA contamination which is being removed here)",
339 "id": 12,
340 "input_connections": {
341 "input": {
342 "id": 10,
343 "output_name": "out_file1"
344 }
345 },
346 "inputs": [],
347 "name": "Filter",
348 "outputs": [
349 {
350 "name": "out_file1",
351 "type": "input"
352 }
353 ],
354 "position": {
355 "left": 885,
356 "top": 233
357 },
358 "tool_errors": null,
359 "tool_id": "Filter1",
360 "tool_state": "{\"__page__\": 0, \"cond\": \"\\\"c1=='chrM' and c10 >= 100\\\"\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"input\": \"null\"}",
361 "tool_version": "1.0.1",
362 "type": "tool",
363 "user_outputs": []
364 },
365 "13": {
366 "annotation": "MINUS strand. Removing all non mtDNA positions from the dataset (as mtDNA was enriched by PCR there is some genomic DNA contamination which is being removed here)",
367 "id": 13,
368 "input_connections": {
369 "input": {
370 "id": 11,
371 "output_name": "out_file1"
372 }
373 },
374 "inputs": [],
375 "name": "Filter",
376 "outputs": [
377 {
378 "name": "out_file1",
379 "type": "input"
380 }
381 ],
382 "position": {
383 "left": 889,
384 "top": 1445
385 },
386 "tool_errors": null,
387 "tool_id": "Filter1",
388 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c1=='chrM' and c10 >= 100\\\"\", \"__page__\": 0}",
389 "tool_version": "1.0.1",
390 "type": "tool",
391 "user_outputs": []
392 },
393 "14": {
394 "annotation": "PLUS strand. Filtering all positions where frequency of G allele (c8 is the number of Gs, c10 is the coverage, c6/c10 = frequency of Gs) is higher than 1%. The second expression (c11/c10) verifies that the position contains a variant (c11 contains the total number of variants computed by \"Filter Pileup\" tool).",
395 "id": 14,
396 "input_connections": {
397 "input": {
398 "id": 12,
399 "output_name": "out_file1"
400 }
401 },
402 "inputs": [],
403 "name": "Filter",
404 "outputs": [
405 {
406 "name": "out_file1",
407 "type": "input"
408 }
409 ],
410 "position": {
411 "left": 1130,
412 "top": 434
413 },
414 "tool_errors": null,
415 "tool_id": "Filter1",
416 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c8/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
417 "tool_version": "1.0.1",
418 "type": "tool",
419 "user_outputs": []
420 },
421 "15": {
422 "annotation": "PLUS strand. Filtering all positions where frequency of C allele (c7 is the number of Cs, c10 is the coverage, c6/c10 = frequency of Cs) is higher than 1%. The second expression (c11/c10) verifies that the position contains a variant (c11 contains the total number of variants computed by \"Filter Pileup\" tool).",
423 "id": 15,
424 "input_connections": {
425 "input": {
426 "id": 12,
427 "output_name": "out_file1"
428 }
429 },
430 "inputs": [],
431 "name": "Filter",
432 "outputs": [
433 {
434 "name": "out_file1",
435 "type": "input"
436 }
437 ],
438 "position": {
439 "left": 1134,
440 "top": 303
441 },
442 "tool_errors": null,
443 "tool_id": "Filter1",
444 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c7/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
445 "tool_version": "1.0.1",
446 "type": "tool",
447 "user_outputs": []
448 },
449 "16": {
450 "annotation": "PLUS strand. Filtering all positions where frequency of T allele (c9 is the number of Ts, c10 is the coverage, c6/c10 = frequency of Ts) is higher than 1%. The second expression (c11/c10) verifies that the position contains a variant (c11 contains the total number of variants computed by \"Filter Pileup\" tool).",
451 "id": 16,
452 "input_connections": {
453 "input": {
454 "id": 12,
455 "output_name": "out_file1"
456 }
457 },
458 "inputs": [],
459 "name": "Filter",
460 "outputs": [
461 {
462 "name": "out_file1",
463 "type": "input"
464 }
465 ],
466 "position": {
467 "left": 1134,
468 "top": 566
469 },
470 "tool_errors": null,
471 "tool_id": "Filter1",
472 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c9/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
473 "tool_version": "1.0.1",
474 "type": "tool",
475 "user_outputs": []
476 },
477 "17": {
478 "annotation": "PLUS strand. Filtering all positions where frequency of A allele (c6 is the number of As, c10 is the coverage, c6/c10 = frequency of As) is higher than 1%. The second expression (c11/c10) verifies that the position contains a variant (c11 contains the total number of variants computed by \"Filter Pileup\" tool).",
479 "id": 17,
480 "input_connections": {
481 "input": {
482 "id": 12,
483 "output_name": "out_file1"
484 }
485 },
486 "inputs": [],
487 "name": "Filter",
488 "outputs": [
489 {
490 "name": "out_file1",
491 "type": "input"
492 }
493 ],
494 "position": {
495 "left": 1129,
496 "top": 175
497 },
498 "tool_errors": null,
499 "tool_id": "Filter1",
500 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c6/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
501 "tool_version": "1.0.1",
502 "type": "tool",
503 "user_outputs": []
504 },
505 "18": {
506 "annotation": "",
507 "id": 18,
508 "input_connections": {
509 "input": {
510 "id": 13,
511 "output_name": "out_file1"
512 }
513 },
514 "inputs": [],
515 "name": "Filter",
516 "outputs": [
517 {
518 "name": "out_file1",
519 "type": "input"
520 }
521 ],
522 "position": {
523 "left": 1135,
524 "top": 1354
525 },
526 "tool_errors": null,
527 "tool_id": "Filter1",
528 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c8/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
529 "tool_version": "1.0.1",
530 "type": "tool",
531 "user_outputs": []
532 },
533 "19": {
534 "annotation": "MINUS strand. Filtering all positions where frequency of A allele (c6 is the number of As, c10 is the coverage, c6/c10 = frequency of As) is higher than 1%. The second expression (c11/c10) verifies that the position contains a variant (c11 contains the total number of variants computed by \"Filter Pileup\" tool).",
535 "id": 19,
536 "input_connections": {
537 "input": {
538 "id": 13,
539 "output_name": "out_file1"
540 }
541 },
542 "inputs": [],
543 "name": "Filter",
544 "outputs": [
545 {
546 "name": "out_file1",
547 "type": "input"
548 }
549 ],
550 "position": {
551 "left": 1136,
552 "top": 1105
553 },
554 "tool_errors": null,
555 "tool_id": "Filter1",
556 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c6/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
557 "tool_version": "1.0.1",
558 "type": "tool",
559 "user_outputs": []
560 },
561 "20": {
562 "annotation": "",
563 "id": 20,
564 "input_connections": {
565 "input": {
566 "id": 13,
567 "output_name": "out_file1"
568 }
569 },
570 "inputs": [],
571 "name": "Filter",
572 "outputs": [
573 {
574 "name": "out_file1",
575 "type": "input"
576 }
577 ],
578 "position": {
579 "left": 1128,
580 "top": 1449
581 },
582 "tool_errors": null,
583 "tool_id": "Filter1",
584 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c9/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
585 "tool_version": "1.0.1",
586 "type": "tool",
587 "user_outputs": []
588 },
589 "21": {
590 "annotation": "MINUS strand. Filtering all positions where frequency of C allele (c7 is the number of Cs, c10 is the coverage, c6/c10 = frequency of Cs) is higher than 1%. The second expression (c11/c10) verifies that the position contains a variant (c11 contains the total number of variants computed by \"Filter Pileup\" tool).",
591 "id": 21,
592 "input_connections": {
593 "input": {
594 "id": 13,
595 "output_name": "out_file1"
596 }
597 },
598 "inputs": [],
599 "name": "Filter",
600 "outputs": [
601 {
602 "name": "out_file1",
603 "type": "input"
604 }
605 ],
606 "position": {
607 "left": 1128,
608 "top": 1233
609 },
610 "tool_errors": null,
611 "tool_id": "Filter1",
612 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c7/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
613 "tool_version": "1.0.1",
614 "type": "tool",
615 "user_outputs": []
616 },
617 "22": {
618 "annotation": "PLUS strand. Results of the fout filtering steps are concatenated. If a position has two alleles with frequency above 1% it will be listed twice in this dataset. The next tool (\"Group\") takes advantage of this.",
619 "id": 22,
620 "input_connections": {
621 "input1": {
622 "id": 17,
623 "output_name": "out_file1"
624 },
625 "queries_0|input2": {
626 "id": 15,
627 "output_name": "out_file1"
628 },
629 "queries_1|input2": {
630 "id": 14,
631 "output_name": "out_file1"
632 },
633 "queries_2|input2": {
634 "id": 16,
635 "output_name": "out_file1"
636 }
637 },
638 "inputs": [],
639 "name": "Concatenate queries",
640 "outputs": [
641 {
642 "name": "out_file1",
643 "type": "input"
644 }
645 ],
646 "position": {
647 "left": 1394,
648 "top": 636
649 },
650 "tool_errors": null,
651 "tool_id": "cat1",
652 "tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"queries\": \"[{\\\"input2\\\": null, \\\"__index__\\\": 0}, {\\\"input2\\\": null, \\\"__index__\\\": 1}, {\\\"input2\\\": null, \\\"__index__\\\": 2}]\"}",
653 "tool_version": "1.0.0",
654 "type": "tool",
655 "user_outputs": []
656 },
657 "23": {
658 "annotation": "MINUS strand. Results of the fout filtering steps are concatenated. If a position has two alleles with frequency above 1% it will be listed twice in this dataset. The next tool (\"Group\") takes advantage of this.",
659 "id": 23,
660 "input_connections": {
661 "input1": {
662 "id": 19,
663 "output_name": "out_file1"
664 },
665 "queries_0|input2": {
666 "id": 21,
667 "output_name": "out_file1"
668 },
669 "queries_1|input2": {
670 "id": 18,
671 "output_name": "out_file1"
672 },
673 "queries_2|input2": {
674 "id": 20,
675 "output_name": "out_file1"
676 }
677 },
678 "inputs": [],
679 "name": "Concatenate queries",
680 "outputs": [
681 {
682 "name": "out_file1",
683 "type": "input"
684 }
685 ],
686 "position": {
687 "left": 1397,
688 "top": 862
689 },
690 "tool_errors": null,
691 "tool_id": "cat1",
692 "tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"queries\": \"[{\\\"input2\\\": null, \\\"__index__\\\": 0}, {\\\"input2\\\": null, \\\"__index__\\\": 1}, {\\\"input2\\\": null, \\\"__index__\\\": 2}]\"}",
693 "tool_version": "1.0.0",
694 "type": "tool",
695 "user_outputs": []
696 },
697 "24": {
698 "annotation": "PLUS strand. The previous tool outputs alleles with frequency above 1%. If a position has two alleles (i.e., a heteroplasmy), it will be listed twice. Group tool counts the individual positions and outputs them in the form \"position[tab]count\"",
699 "id": 24,
700 "input_connections": {
701 "input1": {
702 "id": 22,
703 "output_name": "out_file1"
704 }
705 },
706 "inputs": [],
707 "name": "Group",
708 "outputs": [
709 {
710 "name": "out_file1",
711 "type": "tabular"
712 }
713 ],
714 "position": {
715 "left": 1448,
716 "top": 464
717 },
718 "tool_errors": null,
719 "tool_id": "Grouping1",
720 "tool_state": "{\"ignorecase\": \"\\\"False\\\"\", \"groupcol\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"input1\": \"null\", \"operations\": \"[{\\\"opcol\\\": {\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}, \\\"__index__\\\": 0, \\\"optype\\\": \\\"length\\\", \\\"opround\\\": \\\"yes\\\"}]\", \"__page__\": 0}",
721 "tool_version": "1.9.1",
722 "type": "tool",
723 "user_outputs": []
724 },
725 "25": {
726 "annotation": "MINUS strand. The previous tool outputs alleles with frequency above 1%. If a position has two alleles (i.e., a heteroplasmy), it will be listed twice. Group tool counts the individual positions and outputs them in the form \"position[tab]count\"",
727 "id": 25,
728 "input_connections": {
729 "input1": {
730 "id": 23,
731 "output_name": "out_file1"
732 }
733 },
734 "inputs": [],
735 "name": "Group",
736 "outputs": [
737 {
738 "name": "out_file1",
739 "type": "tabular"
740 }
741 ],
742 "position": {
743 "left": 1490,
744 "top": 1104
745 },
746 "tool_errors": null,
747 "tool_id": "Grouping1",
748 "tool_state": "{\"ignorecase\": \"\\\"False\\\"\", \"groupcol\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"input1\": \"null\", \"operations\": \"[{\\\"opcol\\\": {\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}, \\\"__index__\\\": 0, \\\"optype\\\": \\\"length\\\", \\\"opround\\\": \\\"yes\\\"}]\", \"__page__\": 0}",
749 "tool_version": "1.9.1",
750 "type": "tool",
751 "user_outputs": []
752 },
753 "26": {
754 "annotation": "PLUS strand. Here the workflow joins the original dataset with the output of the group tools. As a result the output contains the original data plus the count of allelic variants at that site, If there are no allelic variants at a site, the missing column is filled with \"0\" as specified by \"Fill columns by\" option of the Join tool.",
755 "id": 26,
756 "input_connections": {
757 "input1": {
758 "id": 12,
759 "output_name": "out_file1"
760 },
761 "input2": {
762 "id": 24,
763 "output_name": "out_file1"
764 }
765 },
766 "inputs": [],
767 "name": "Join two Queries",
768 "outputs": [
769 {
770 "name": "out_file1",
771 "type": "input"
772 }
773 ],
774 "position": {
775 "left": 1505,
776 "top": 282
777 },
778 "tool_errors": null,
779 "tool_id": "join1",
780 "tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"field1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"partial\": \"\\\"\\\"\", \"field2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}\", \"fill_empty_columns\": \"{\\\"fill_empty_columns_switch\\\": \\\"fill_empty\\\", \\\"do_fill_empty_columns\\\": {\\\"column_fill_type\\\": \\\"single_fill_value\\\", \\\"fill_value\\\": \\\"0\\\", \\\"__current_case__\\\": 0}, \\\"fill_columns_by\\\": \\\"fill_unjoined_only\\\", \\\"__current_case__\\\": 1}\", \"unmatched\": \"\\\"-u\\\"\", \"input1\": \"null\"}",
781 "tool_version": "2.0.2",
782 "type": "tool",
783 "user_outputs": []
784 },
785 "27": {
786 "annotation": "MINUS strand. Here the workflow joins the original dataset with the output of the group tools. As a result the output contains the original data plus the count of allelic variants at that site, If there are no allelic variants at a site, the missing column is filled with \"0\" as specified by \"Fill columns by\" option of the Join tool.",
787 "id": 27,
788 "input_connections": {
789 "input1": {
790 "id": 13,
791 "output_name": "out_file1"
792 },
793 "input2": {
794 "id": 25,
795 "output_name": "out_file1"
796 }
797 },
798 "inputs": [],
799 "name": "Join two Queries",
800 "outputs": [
801 {
802 "name": "out_file1",
803 "type": "input"
804 }
805 ],
806 "position": {
807 "left": 1538,
808 "top": 1223
809 },
810 "tool_errors": null,
811 "tool_id": "join1",
812 "tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"field1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"partial\": \"\\\"\\\"\", \"field2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}\", \"fill_empty_columns\": \"{\\\"fill_empty_columns_switch\\\": \\\"fill_empty\\\", \\\"do_fill_empty_columns\\\": {\\\"column_fill_type\\\": \\\"single_fill_value\\\", \\\"fill_value\\\": \\\"0\\\", \\\"__current_case__\\\": 0}, \\\"fill_columns_by\\\": \\\"fill_unjoined_only\\\", \\\"__current_case__\\\": 1}\", \"unmatched\": \"\\\"-u\\\"\", \"input1\": \"null\"}",
813 "tool_version": "2.0.2",
814 "type": "tool",
815 "user_outputs": []
816 },
817 "28": {
818 "annotation": "PLUS strand. Removing unnecessary columns from the data.",
819 "id": 28,
820 "input_connections": {
821 "input": {
822 "id": 26,
823 "output_name": "out_file1"
824 }
825 },
826 "inputs": [],
827 "name": "Cut",
828 "outputs": [
829 {
830 "name": "out_file1",
831 "type": "tabular"
832 }
833 ],
834 "position": {
835 "left": 1690,
836 "top": 706
837 },
838 "tool_errors": null,
839 "tool_id": "Cut1",
840 "tool_state": "{\"columnList\": \"\\\"c1,c2,c3,c4,c6,c7,c8,c9,c10,c11,c13\\\"\", \"input\": \"null\", \"delimiter\": \"\\\"T\\\"\", \"__page__\": 0}",
841 "tool_version": "1.0.1",
842 "type": "tool",
843 "user_outputs": []
844 },
845 "29": {
846 "annotation": "MINUS strand. Removing unnecessary columns from the data.",
847 "id": 29,
848 "input_connections": {
849 "input": {
850 "id": 27,
851 "output_name": "out_file1"
852 }
853 },
854 "inputs": [],
855 "name": "Cut",
856 "outputs": [
857 {
858 "name": "out_file1",
859 "type": "tabular"
860 }
861 ],
862 "position": {
863 "left": 1677,
864 "top": 823
865 },
866 "tool_errors": null,
867 "tool_id": "Cut1",
868 "tool_state": "{\"columnList\": \"\\\"c1,c2,c3,c4,c6,c7,c8,c9,c10,c11,c13\\\"\", \"input\": \"null\", \"delimiter\": \"\\\"T\\\"\", \"__page__\": 0}",
869 "tool_version": "1.0.1",
870 "type": "tool",
871 "user_outputs": []
872 },
873 "30": {
874 "annotation": "Joining PLUS and MINUS strands side by side.",
875 "id": 30,
876 "input_connections": {
877 "input1": {
878 "id": 28,
879 "output_name": "out_file1"
880 },
881 "input2": {
882 "id": 29,
883 "output_name": "out_file1"
884 }
885 },
886 "inputs": [],
887 "name": "Join two Queries",
888 "outputs": [
889 {
890 "name": "out_file1",
891 "type": "input"
892 }
893 ],
894 "position": {
895 "left": 1786,
896 "top": 195
897 },
898 "tool_errors": null,
899 "tool_id": "join1",
900 "tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"field1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"partial\": \"\\\"\\\"\", \"field2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"fill_empty_columns\": \"{\\\"fill_empty_columns_switch\\\": \\\"no_fill\\\", \\\"__current_case__\\\": 0}\", \"unmatched\": \"\\\"\\\"\", \"input1\": \"null\"}",
901 "tool_version": "2.0.2",
902 "type": "tool",
903 "user_outputs": []
904 },
905 "31": {
906 "annotation": "Validating strandedness of variants. We require true variants to be validated on both strands. Columns 11 and 22 in the input dataset contain counts of variants computed with the Group tool three steps earlier (\"Group\" step for PLUS and MINUS strand). If the counts are identical (c11== c22) the tool will return \"True\". Otherwise it will return \"Fasle\".",
907 "id": 31,
908 "input_connections": {
909 "input": {
910 "id": 30,
911 "output_name": "out_file1"
912 }
913 },
914 "inputs": [],
915 "name": "Compute",
916 "outputs": [
917 {
918 "name": "out_file1",
919 "type": "input"
920 }
921 ],
922 "position": {
923 "left": 1896,
924 "top": 369
925 },
926 "tool_errors": null,
927 "tool_id": "Add_a_column1",
928 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c11 == c22\\\"\", \"round\": \"\\\"no\\\"\", \"__page__\": 0}",
929 "tool_version": "1.1.0",
930 "type": "tool",
931 "user_outputs": []
932 },
933 "32": {
934 "annotation": "Filtering sites where variants did not validate",
935 "id": 32,
936 "input_connections": {
937 "input": {
938 "id": 31,
939 "output_name": "out_file1"
940 }
941 },
942 "inputs": [],
943 "name": "Filter",
944 "outputs": [
945 {
946 "name": "out_file1",
947 "type": "input"
948 }
949 ],
950 "position": {
951 "left": 2105,
952 "top": 404
953 },
954 "tool_errors": null,
955 "tool_id": "Filter1",
956 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c23=='False'\\\"\", \"__page__\": 0}",
957 "tool_version": "1.0.1",
958 "type": "tool",
959 "user_outputs": []
960 },
961 "33": {
962 "annotation": "Filtering all True variants",
963 "id": 33,
964 "input_connections": {
965 "input": {
966 "id": 31,
967 "output_name": "out_file1"
968 }
969 },
970 "inputs": [],
971 "name": "Filter",
972 "outputs": [
973 {
974 "name": "out_file1",
975 "type": "input"
976 }
977 ],
978 "position": {
979 "left": 2108,
980 "top": 168
981 },
982 "tool_errors": null,
983 "tool_id": "Filter1",
984 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c23=='True'\\\"\", \"__page__\": 0}",
985 "tool_version": "1.0.1",
986 "type": "tool",
987 "user_outputs": []
988 },
989 "34": {
990 "annotation": "For sites that do not contain true variants we force the value of new column to \"0\".",
991 "id": 34,
992 "input_connections": {
993 "input": {
994 "id": 32,
995 "output_name": "out_file1"
996 }
997 },
998 "inputs": [],
999 "name": "Add column",
1000 "outputs": [
1001 {
1002 "name": "out_file1",
1003 "type": "input"
1004 }
1005 ],
1006 "position": {
1007 "left": 1864,
1008 "top": 1247
1009 },
1010 "tool_errors": null,
1011 "tool_id": "addValue",
1012 "tool_state": "{\"__page__\": 0, \"input\": \"null\", \"exp\": \"\\\"0\\\"\", \"iterate\": \"\\\"no\\\"\"}",
1013 "tool_version": "1.0.0",
1014 "type": "tool",
1015 "user_outputs": []
1016 },
1017 "35": {
1018 "annotation": "For True variants we add additional column duplicating column c22, which contains the number of true variants at a site.",
1019 "id": 35,
1020 "input_connections": {
1021 "input": {
1022 "id": 33,
1023 "output_name": "out_file1"
1024 }
1025 },
1026 "inputs": [],
1027 "name": "Compute",
1028 "outputs": [
1029 {
1030 "name": "out_file1",
1031 "type": "input"
1032 }
1033 ],
1034 "position": {
1035 "left": 1861,
1036 "top": 1082
1037 },
1038 "tool_errors": null,
1039 "tool_id": "Add_a_column1",
1040 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c22\\\"\", \"round\": \"\\\"yes\\\"\", \"__page__\": 0}",
1041 "tool_version": "1.1.0",
1042 "type": "tool",
1043 "user_outputs": []
1044 },
1045 "36": {
1046 "annotation": "Concatenating true variants and other sites tail-to-head.",
1047 "id": 36,
1048 "input_connections": {
1049 "input1": {
1050 "id": 35,
1051 "output_name": "out_file1"
1052 },
1053 "queries_0|input2": {
1054 "id": 34,
1055 "output_name": "out_file1"
1056 }
1057 },
1058 "inputs": [],
1059 "name": "Concatenate queries",
1060 "outputs": [
1061 {
1062 "name": "out_file1",
1063 "type": "input"
1064 }
1065 ],
1066 "position": {
1067 "left": 2073,
1068 "top": 1156
1069 },
1070 "tool_errors": null,
1071 "tool_id": "cat1",
1072 "tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"queries\": \"[{\\\"input2\\\": null, \\\"__index__\\\": 0}]\"}",
1073 "tool_version": "1.0.0",
1074 "type": "tool",
1075 "user_outputs": []
1076 },
1077 "37": {
1078 "annotation": "Because the input dataset contains plus and minus strand data side by side (remember we joined PLUS and MINUS five steps ago using \"Join two Queries\" tool), we need to combine allelic counts by summing up all As (column 5 in PLUS and column 16 in MINUS)",
1079 "id": 37,
1080 "input_connections": {
1081 "input": {
1082 "id": 36,
1083 "output_name": "out_file1"
1084 }
1085 },
1086 "inputs": [],
1087 "name": "Compute",
1088 "outputs": [
1089 {
1090 "name": "out_file1",
1091 "type": "input"
1092 }
1093 ],
1094 "position": {
1095 "left": 2137,
1096 "top": 1030
1097 },
1098 "tool_errors": null,
1099 "tool_id": "Add_a_column1",
1100 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c5+c16\\\"\", \"round\": \"\\\"yes\\\"\", \"__page__\": 0}",
1101 "tool_version": "1.1.0",
1102 "type": "tool",
1103 "user_outputs": []
1104 },
1105 "38": {
1106 "annotation": "... summing up all Cs (column 6 in PLUS and column 17 in MINUS)",
1107 "id": 38,
1108 "input_connections": {
1109 "input": {
1110 "id": 37,
1111 "output_name": "out_file1"
1112 }
1113 },
1114 "inputs": [],
1115 "name": "Compute",
1116 "outputs": [
1117 {
1118 "name": "out_file1",
1119 "type": "input"
1120 }
1121 ],
1122 "position": {
1123 "left": 2185,
1124 "top": 936
1125 },
1126 "tool_errors": null,
1127 "tool_id": "Add_a_column1",
1128 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c6+c17\\\"\", \"round\": \"\\\"yes\\\"\", \"__page__\": 0}",
1129 "tool_version": "1.1.0",
1130 "type": "tool",
1131 "user_outputs": []
1132 },
1133 "39": {
1134 "annotation": "... summing up all Gs (column 7 in PLUS and column 18 in MINUS)",
1135 "id": 39,
1136 "input_connections": {
1137 "input": {
1138 "id": 38,
1139 "output_name": "out_file1"
1140 }
1141 },
1142 "inputs": [],
1143 "name": "Compute",
1144 "outputs": [
1145 {
1146 "name": "out_file1",
1147 "type": "input"
1148 }
1149 ],
1150 "position": {
1151 "left": 2221,
1152 "top": 831
1153 },
1154 "tool_errors": null,
1155 "tool_id": "Add_a_column1",
1156 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c7+c18\\\"\", \"round\": \"\\\"yes\\\"\", \"__page__\": 0}",
1157 "tool_version": "1.1.0",
1158 "type": "tool",
1159 "user_outputs": []
1160 },
1161 "40": {
1162 "annotation": "... summing up all Ts (column 8 in PLUS and column 19 in MINUS)",
1163 "id": 40,
1164 "input_connections": {
1165 "input": {
1166 "id": 39,
1167 "output_name": "out_file1"
1168 }
1169 },
1170 "inputs": [],
1171 "name": "Compute",
1172 "outputs": [
1173 {
1174 "name": "out_file1",
1175 "type": "input"
1176 }
1177 ],
1178 "position": {
1179 "left": 2289,
1180 "top": 716
1181 },
1182 "tool_errors": null,
1183 "tool_id": "Add_a_column1",
1184 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c8+c19\\\"\", \"round\": \"\\\"yes\\\"\", \"__page__\": 0}",
1185 "tool_version": "1.1.0",
1186 "type": "tool",
1187 "user_outputs": []
1188 },
1189 "41": {
1190 "annotation": "... summing up coverage (column 9 in PLUS and column 20 in MINUS)",
1191 "id": 41,
1192 "input_connections": {
1193 "input": {
1194 "id": 40,
1195 "output_name": "out_file1"
1196 }
1197 },
1198 "inputs": [],
1199 "name": "Compute",
1200 "outputs": [
1201 {
1202 "name": "out_file1",
1203 "type": "input"
1204 }
1205 ],
1206 "position": {
1207 "left": 2344,
1208 "top": 598
1209 },
1210 "tool_errors": null,
1211 "tool_id": "Add_a_column1",
1212 "tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c9+c20\\\"\", \"round\": \"\\\"yes\\\"\", \"__page__\": 0}",
1213 "tool_version": "1.1.0",
1214 "type": "tool",
1215 "user_outputs": []
1216 },
1217 "42": {
1218 "annotation": "Finally, we clean up all unnecessary columns and generate a combined dataset that will contain 10 columns:\n[1] Chromosome\n[2] Start\n[3] End\n[4] Base in the reference genome\n[5] Number of As\n[6] Number of Cs\n[7] Number of Gs\n[8] Number of Ts\n[9] Coverage at that position\n[10] Number of variants (differences from reference abve 1% level).",
1219 "id": 42,
1220 "input_connections": {
1221 "input": {
1222 "id": 41,
1223 "output_name": "out_file1"
1224 }
1225 },
1226 "inputs": [],
1227 "name": "Cut",
1228 "outputs": [
1229 {
1230 "name": "out_file1",
1231 "type": "tabular"
1232 }
1233 ],
1234 "position": {
1235 "left": 2439,
1236 "top": 1185
1237 },
1238 "tool_errors": null,
1239 "tool_id": "Cut1",
1240 "tool_state": "{\"columnList\": \"\\\"c1,c2,c3,c4,c25,c26,c27,c28,c29,c24\\\"\", \"input\": \"null\", \"delimiter\": \"\\\"T\\\"\", \"__page__\": 0}",
1241 "tool_version": "1.0.1",
1242 "type": "tool",
1243 "user_outputs": []
1244 }
1245 }
1246 }