heteroplasmy_workflow: Galaxy-Workflow-mt_analysis_0.01_strand-specific_(fastq

comparison Galaxy-Workflow-mt_analysis_0.01_strand-specific_(fastq_double).ga @ 0:39ec6ecd0e3b default tip

Uploaded the workflow

author	greg
date	Thu, 27 Oct 2011 09:19:13 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:39ec6ecd0e3b
+{
+"a_galaxy_workflow": "true",
+"annotation": "",
+"format-version": "0.1",
+"name": "mt analysis 0.01 strand-specific (fastq double)",
+"steps": {
+"0": {
+"annotation": "The second input dataset. Usually the second PCR replicate for a given sample.",
+"id": 0,
+"input_connections": {},
+"inputs": [],
+"name": "Map with BWA",
+"outputs": [
+{
+"name": "output",
+"type": "sam"
+}
+],
+"position": {
+"left": 229,
+"top": 388
+},
+"tool_errors": null,
+"tool_id": "bwa_wrapper",
+"tool_state": "{\"genomeSource\": \"{\\\"indices\\\": \\\"/mnt/galaxyIndices/hg19/bwa/base/hg19\\\", \\\"refGenomeSource\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0}\", \"paired\": \"{\\\"sPaired\\\": \\\"single\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 0}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"True\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.3",
+"type": "tool",
+"user_outputs": []
+},
+"1": {
+"annotation": "The first input dataset. Usually the first PCR replicate for a given sample.",
+"id": 1,
+"input_connections": {},
+"inputs": [],
+"name": "Map with BWA",
+"outputs": [
+{
+"name": "output",
+"type": "sam"
+}
+],
+"position": {
+"left": 229,
+"top": 154
+},
+"tool_errors": null,
+"tool_id": "bwa_wrapper",
+"tool_state": "{\"genomeSource\": \"{\\\"indices\\\": \\\"/mnt/galaxyIndices/hg19/bwa/base/hg19\\\", \\\"refGenomeSource\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0}\", \"paired\": \"{\\\"sPaired\\\": \\\"single\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 0}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"True\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.3",
+"type": "tool",
+"user_outputs": []
+},
+"2": {
+"annotation": "Here SAM datasets generated by BWA are merged together",
+"id": 2,
+"input_connections": {
+"input1": {
+"id": 1,
+"output_name": "output"
+},
+"queries_0|input2": {
+"id": 0,
+"output_name": "output"
+}
+},
+"inputs": [],
+"name": "Concatenate queries",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 410,
+"top": 257
+},
+"tool_errors": null,
+"tool_id": "cat1",
+"tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"queries\": \"[{\\\"input2\\\": null, \\\"__index__\\\": 0}]\"}",
+"tool_version": "1.0.0",
+"type": "tool",
+"user_outputs": []
+},
+"3": {
+"annotation": "By selecting only lines containing the X0:i:1 tag we eliminate reads that map multiple times. Thus this step removes reads that map more than once.",
+"id": 3,
+"input_connections": {
+"input": {
+"id": 2,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Select",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 492,
+"top": 468
+},
+"tool_errors": null,
+"tool_id": "Grep1",
+"tool_state": "{\"__page__\": 0, \"input\": \"null\", \"invert\": \"\\\"false\\\"\", \"pattern\": \"\\\"X0:i:1\\\"\"}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"4": {
+"annotation": "Selecting reads mapping to the PLUS strand",
+"id": 4,
+"input_connections": {
+"input1": {
+"id": 3,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter SAM",
+"outputs": [
+{
+"name": "out_file1",
+"type": "sam"
+}
+],
+"position": {
+"left": 333,
+"top": 610
+},
+"tool_errors": null,
+"tool_id": "sam_bw_filter",
+"tool_state": "{\"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"input1\": \"null\", \"bits\": \"[{\\\"states\\\": \\\"0\\\", \\\"__index__\\\": 0, \\\"flags\\\": \\\"--0x0004\\\"}, {\\\"states\\\": \\\"0\\\", \\\"__index__\\\": 1, \\\"flags\\\": \\\"--0x0010\\\"}]\", \"__page__\": 0}",
+"tool_version": "1.0.0",
+"type": "tool",
+"user_outputs": []
+},
+"5": {
+"annotation": "Selecting reads mapping to the MINUS strand",
+"id": 5,
+"input_connections": {
+"input1": {
+"id": 3,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter SAM",
+"outputs": [
+{
+"name": "out_file1",
+"type": "sam"
+}
+],
+"position": {
+"left": 271,
+"top": 879
+},
+"tool_errors": null,
+"tool_id": "sam_bw_filter",
+"tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"bits\": \"[{\\\"states\\\": \\\"0\\\", \\\"__index__\\\": 0, \\\"flags\\\": \\\"--0x0004\\\"}, {\\\"states\\\": \\\"1\\\", \\\"__index__\\\": 1, \\\"flags\\\": \\\"--0x0010\\\"}]\"}",
+"tool_version": "1.0.0",
+"type": "tool",
+"user_outputs": []
+},
+"6": {
+"annotation": "Converting SAM to its binary representation (BAM) to allow pileup generation",
+"id": 6,
+"input_connections": {
+"source|input1": {
+"id": 4,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "SAM-to-BAM",
+"outputs": [
+{
+"name": "output1",
+"type": "bam"
+}
+],
+"position": {
+"left": 438,
+"top": 785
+},
+"tool_errors": null,
+"tool_id": "sam_to_bam",
+"tool_state": "{\"source\": \"{\\\"index_source\\\": \\\"cached\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 0}\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"__page__\": 0}",
+"tool_version": "1.1.0",
+"type": "tool",
+"user_outputs": []
+},
+"7": {
+"annotation": "Converting SAM to its binary representation (BAM) to allow pileup generation",
+"id": 7,
+"input_connections": {
+"source|input1": {
+"id": 5,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "SAM-to-BAM",
+"outputs": [
+{
+"name": "output1",
+"type": "bam"
+}
+],
+"position": {
+"left": 404,
+"top": 1043
+},
+"tool_errors": null,
+"tool_id": "sam_to_bam",
+"tool_state": "{\"source\": \"{\\\"index_source\\\": \\\"cached\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 0}\", \"__page__\": 0}",
+"tool_version": "1.1.0",
+"type": "tool",
+"user_outputs": []
+},
+"8": {
+"annotation": "Generating standard 6 column pileup without MAQ consensus option for PLUS strand",
+"id": 8,
+"input_connections": {
+"refOrHistory|input1": {
+"id": 6,
+"output_name": "output1"
+}
+},
+"inputs": [],
+"name": "Generate pileup",
+"outputs": [
+{
+"name": "output1",
+"type": "tabular"
+}
+],
+"position": {
+"left": 643,
+"top": 756
+},
+"tool_errors": null,
+"tool_id": "sam_pileup",
+"tool_state": "{\"__page__\": 0, \"c\": \"{\\\"consensus\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\", \"indels\": \"\\\"no\\\"\", \"refOrHistory\": \"{\\\"input1\\\": null, \\\"reference\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0}\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"lastCol\": \"\\\"no\\\"\", \"mapCap\": \"\\\"60\\\"\"}",
+"tool_version": "1.1.0",
+"type": "tool",
+"user_outputs": []
+},
+"9": {
+"annotation": "Generating standard 6 column pileup without MAQ consensus option for MINUS strand",
+"id": 9,
+"input_connections": {
+"refOrHistory|input1": {
+"id": 7,
+"output_name": "output1"
+}
+},
+"inputs": [],
+"name": "Generate pileup",
+"outputs": [
+{
+"name": "output1",
+"type": "tabular"
+}
+],
+"position": {
+"left": 630,
+"top": 1039
+},
+"tool_errors": null,
+"tool_id": "sam_pileup",
+"tool_state": "{\"__page__\": 0, \"c\": \"{\\\"consensus\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\", \"indels\": \"\\\"no\\\"\", \"refOrHistory\": \"{\\\"input1\\\": null, \\\"reference\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0}\", \"lastCol\": \"\\\"no\\\"\", \"mapCap\": \"\\\"60\\\"\"}",
+"tool_version": "1.1.0",
+"type": "tool",
+"user_outputs": []
+},
+"10": {
+"annotation": "PLUS strand. Here pileup is filtered to restrict the dataset to positions covered by al least 100 reads containing high quality bases (phred 30 or above).",
+"id": 10,
+"input_connections": {
+"input": {
+"id": 8,
+"output_name": "output1"
+}
+},
+"inputs": [],
+"name": "Filter pileup",
+"outputs": [
+{
+"name": "out_file1",
+"type": "tabular"
+}
+],
+"position": {
+"left": 774,
+"top": 532
+},
+"tool_errors": null,
+"tool_id": "pileup_parser",
+"tool_state": "{\"snps_only\": \"\\\"No\\\"\", \"__page__\": 0, \"qc_base\": \"\\\"No\\\"\", \"cvrg_cutoff\": \"\\\"100\\\"\", \"interval\": \"\\\"Yes\\\"\", \"pileup_type\": \"{\\\"type_select\\\": \\\"six\\\", \\\"__current_case__\\\": 1}\", \"input\": \"null\", \"diff\": \"\\\"Yes\\\"\", \"qv_cutoff\": \"\\\"30\\\"\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\"}",
+"tool_version": "1.0.2",
+"type": "tool",
+"user_outputs": []
+},
+"11": {
+"annotation": "MINUS strand. Here pileup is filtered to restrict the dataset to positions covered by al least 100 reads containing high quality bases (phred 30 or above).",
+"id": 11,
+"input_connections": {
+"input": {
+"id": 9,
+"output_name": "output1"
+}
+},
+"inputs": [],
+"name": "Filter pileup",
+"outputs": [
+{
+"name": "out_file1",
+"type": "tabular"
+}
+],
+"position": {
+"left": 737,
+"top": 1228
+},
+"tool_errors": null,
+"tool_id": "pileup_parser",
+"tool_state": "{\"snps_only\": \"\\\"No\\\"\", \"__page__\": 0, \"qc_base\": \"\\\"No\\\"\", \"cvrg_cutoff\": \"\\\"100\\\"\", \"interval\": \"\\\"Yes\\\"\", \"pileup_type\": \"{\\\"type_select\\\": \\\"six\\\", \\\"__current_case__\\\": 1}\", \"input\": \"null\", \"diff\": \"\\\"Yes\\\"\", \"qv_cutoff\": \"\\\"30\\\"\"}",
+"tool_version": "1.0.2",
+"type": "tool",
+"user_outputs": []
+},
+"12": {
+"annotation": "PLUS strand. Removing all non mtDNA positions from the dataset (as mtDNA was enriched by PCR there is some genomic DNA contamination which is being removed here)",
+"id": 12,
+"input_connections": {
+"input": {
+"id": 10,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 885,
+"top": 233
+},
+"tool_errors": null,
+"tool_id": "Filter1",
+"tool_state": "{\"__page__\": 0, \"cond\": \"\\\"c1=='chrM' and c10 >= 100\\\"\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"input\": \"null\"}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"13": {
+"annotation": "MINUS strand. Removing all non mtDNA positions from the dataset (as mtDNA was enriched by PCR there is some genomic DNA contamination which is being removed here)",
+"id": 13,
+"input_connections": {
+"input": {
+"id": 11,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 889,
+"top": 1445
+},
+"tool_errors": null,
+"tool_id": "Filter1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c1=='chrM' and c10 >= 100\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"14": {
+"annotation": "PLUS strand. Filtering all positions where frequency of G allele (c8 is the number of Gs, c10 is the coverage, c6/c10 = frequency of Gs) is higher than 1%. The second expression (c11/c10) verifies that the position contains a variant (c11 contains the total number of variants computed by \"Filter Pileup\" tool).",
+"id": 14,
+"input_connections": {
+"input": {
+"id": 12,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1130,
+"top": 434
+},
+"tool_errors": null,
+"tool_id": "Filter1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c8/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"15": {
+"annotation": "PLUS strand. Filtering all positions where frequency of C allele (c7 is the number of Cs, c10 is the coverage, c6/c10 = frequency of Cs) is higher than 1%. The second expression (c11/c10) verifies that the position contains a variant (c11 contains the total number of variants computed by \"Filter Pileup\" tool).",
+"id": 15,
+"input_connections": {
+"input": {
+"id": 12,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1134,
+"top": 303
+},
+"tool_errors": null,
+"tool_id": "Filter1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c7/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"16": {
+"annotation": "PLUS strand. Filtering all positions where frequency of T allele (c9 is the number of Ts, c10 is the coverage, c6/c10 = frequency of Ts) is higher than 1%. The second expression (c11/c10) verifies that the position contains a variant (c11 contains the total number of variants computed by \"Filter Pileup\" tool).",
+"id": 16,
+"input_connections": {
+"input": {
+"id": 12,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1134,
+"top": 566
+},
+"tool_errors": null,
+"tool_id": "Filter1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c9/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"17": {
+"annotation": "PLUS strand. Filtering all positions where frequency of A allele (c6 is the number of As, c10 is the coverage, c6/c10 = frequency of As) is higher than 1%. The second expression (c11/c10) verifies that the position contains a variant (c11 contains the total number of variants computed by \"Filter Pileup\" tool).",
+"id": 17,
+"input_connections": {
+"input": {
+"id": 12,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1129,
+"top": 175
+},
+"tool_errors": null,
+"tool_id": "Filter1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c6/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"18": {
+"annotation": "",
+"id": 18,
+"input_connections": {
+"input": {
+"id": 13,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1135,
+"top": 1354
+},
+"tool_errors": null,
+"tool_id": "Filter1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c8/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"19": {
+"annotation": "MINUS strand. Filtering all positions where frequency of A allele (c6 is the number of As, c10 is the coverage, c6/c10 = frequency of As) is higher than 1%. The second expression (c11/c10) verifies that the position contains a variant (c11 contains the total number of variants computed by \"Filter Pileup\" tool).",
+"id": 19,
+"input_connections": {
+"input": {
+"id": 13,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1136,
+"top": 1105
+},
+"tool_errors": null,
+"tool_id": "Filter1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c6/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"20": {
+"annotation": "",
+"id": 20,
+"input_connections": {
+"input": {
+"id": 13,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1128,
+"top": 1449
+},
+"tool_errors": null,
+"tool_id": "Filter1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c9/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"21": {
+"annotation": "MINUS strand. Filtering all positions where frequency of C allele (c7 is the number of Cs, c10 is the coverage, c6/c10 = frequency of Cs) is higher than 1%. The second expression (c11/c10) verifies that the position contains a variant (c11 contains the total number of variants computed by \"Filter Pileup\" tool).",
+"id": 21,
+"input_connections": {
+"input": {
+"id": 13,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1128,
+"top": 1233
+},
+"tool_errors": null,
+"tool_id": "Filter1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c7/c10 >= 0.01 and c11/c10 >= 0.01\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"22": {
+"annotation": "PLUS strand. Results of the fout filtering steps are concatenated. If a position has two alleles with frequency above 1% it will be listed twice in this dataset. The next tool (\"Group\") takes advantage of this.",
+"id": 22,
+"input_connections": {
+"input1": {
+"id": 17,
+"output_name": "out_file1"
+},
+"queries_0|input2": {
+"id": 15,
+"output_name": "out_file1"
+},
+"queries_1|input2": {
+"id": 14,
+"output_name": "out_file1"
+},
+"queries_2|input2": {
+"id": 16,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Concatenate queries",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1394,
+"top": 636
+},
+"tool_errors": null,
+"tool_id": "cat1",
+"tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"queries\": \"[{\\\"input2\\\": null, \\\"__index__\\\": 0}, {\\\"input2\\\": null, \\\"__index__\\\": 1}, {\\\"input2\\\": null, \\\"__index__\\\": 2}]\"}",
+"tool_version": "1.0.0",
+"type": "tool",
+"user_outputs": []
+},
+"23": {
+"annotation": "MINUS strand. Results of the fout filtering steps are concatenated. If a position has two alleles with frequency above 1% it will be listed twice in this dataset. The next tool (\"Group\") takes advantage of this.",
+"id": 23,
+"input_connections": {
+"input1": {
+"id": 19,
+"output_name": "out_file1"
+},
+"queries_0|input2": {
+"id": 21,
+"output_name": "out_file1"
+},
+"queries_1|input2": {
+"id": 18,
+"output_name": "out_file1"
+},
+"queries_2|input2": {
+"id": 20,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Concatenate queries",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1397,
+"top": 862
+},
+"tool_errors": null,
+"tool_id": "cat1",
+"tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"queries\": \"[{\\\"input2\\\": null, \\\"__index__\\\": 0}, {\\\"input2\\\": null, \\\"__index__\\\": 1}, {\\\"input2\\\": null, \\\"__index__\\\": 2}]\"}",
+"tool_version": "1.0.0",
+"type": "tool",
+"user_outputs": []
+},
+"24": {
+"annotation": "PLUS strand. The previous tool outputs alleles with frequency above 1%. If a position has two alleles (i.e., a heteroplasmy), it will be listed twice. Group tool counts the individual positions and outputs them in the form \"position[tab]count\"",
+"id": 24,
+"input_connections": {
+"input1": {
+"id": 22,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Group",
+"outputs": [
+{
+"name": "out_file1",
+"type": "tabular"
+}
+],
+"position": {
+"left": 1448,
+"top": 464
+},
+"tool_errors": null,
+"tool_id": "Grouping1",
+"tool_state": "{\"ignorecase\": \"\\\"False\\\"\", \"groupcol\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"input1\": \"null\", \"operations\": \"[{\\\"opcol\\\": {\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}, \\\"__index__\\\": 0, \\\"optype\\\": \\\"length\\\", \\\"opround\\\": \\\"yes\\\"}]\", \"__page__\": 0}",
+"tool_version": "1.9.1",
+"type": "tool",
+"user_outputs": []
+},
+"25": {
+"annotation": "MINUS strand. The previous tool outputs alleles with frequency above 1%. If a position has two alleles (i.e., a heteroplasmy), it will be listed twice. Group tool counts the individual positions and outputs them in the form \"position[tab]count\"",
+"id": 25,
+"input_connections": {
+"input1": {
+"id": 23,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Group",
+"outputs": [
+{
+"name": "out_file1",
+"type": "tabular"
+}
+],
+"position": {
+"left": 1490,
+"top": 1104
+},
+"tool_errors": null,
+"tool_id": "Grouping1",
+"tool_state": "{\"ignorecase\": \"\\\"False\\\"\", \"groupcol\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"input1\": \"null\", \"operations\": \"[{\\\"opcol\\\": {\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}, \\\"__index__\\\": 0, \\\"optype\\\": \\\"length\\\", \\\"opround\\\": \\\"yes\\\"}]\", \"__page__\": 0}",
+"tool_version": "1.9.1",
+"type": "tool",
+"user_outputs": []
+},
+"26": {
+"annotation": "PLUS strand. Here the workflow joins the original dataset with the output of the group tools. As a result the output contains the original data plus the count of allelic variants at that site, If there are no allelic variants at a site, the missing column is filled with \"0\" as specified by \"Fill columns by\" option of the Join tool.",
+"id": 26,
+"input_connections": {
+"input1": {
+"id": 12,
+"output_name": "out_file1"
+},
+"input2": {
+"id": 24,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Join two Queries",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1505,
+"top": 282
+},
+"tool_errors": null,
+"tool_id": "join1",
+"tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"field1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"partial\": \"\\\"\\\"\", \"field2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}\", \"fill_empty_columns\": \"{\\\"fill_empty_columns_switch\\\": \\\"fill_empty\\\", \\\"do_fill_empty_columns\\\": {\\\"column_fill_type\\\": \\\"single_fill_value\\\", \\\"fill_value\\\": \\\"0\\\", \\\"__current_case__\\\": 0}, \\\"fill_columns_by\\\": \\\"fill_unjoined_only\\\", \\\"__current_case__\\\": 1}\", \"unmatched\": \"\\\"-u\\\"\", \"input1\": \"null\"}",
+"tool_version": "2.0.2",
+"type": "tool",
+"user_outputs": []
+},
+"27": {
+"annotation": "MINUS strand. Here the workflow joins the original dataset with the output of the group tools. As a result the output contains the original data plus the count of allelic variants at that site, If there are no allelic variants at a site, the missing column is filled with \"0\" as specified by \"Fill columns by\" option of the Join tool.",
+"id": 27,
+"input_connections": {
+"input1": {
+"id": 13,
+"output_name": "out_file1"
+},
+"input2": {
+"id": 25,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Join two Queries",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1538,
+"top": 1223
+},
+"tool_errors": null,
+"tool_id": "join1",
+"tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"field1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"partial\": \"\\\"\\\"\", \"field2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}\", \"fill_empty_columns\": \"{\\\"fill_empty_columns_switch\\\": \\\"fill_empty\\\", \\\"do_fill_empty_columns\\\": {\\\"column_fill_type\\\": \\\"single_fill_value\\\", \\\"fill_value\\\": \\\"0\\\", \\\"__current_case__\\\": 0}, \\\"fill_columns_by\\\": \\\"fill_unjoined_only\\\", \\\"__current_case__\\\": 1}\", \"unmatched\": \"\\\"-u\\\"\", \"input1\": \"null\"}",
+"tool_version": "2.0.2",
+"type": "tool",
+"user_outputs": []
+},
+"28": {
+"annotation": "PLUS strand. Removing unnecessary columns from the data.",
+"id": 28,
+"input_connections": {
+"input": {
+"id": 26,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Cut",
+"outputs": [
+{
+"name": "out_file1",
+"type": "tabular"
+}
+],
+"position": {
+"left": 1690,
+"top": 706
+},
+"tool_errors": null,
+"tool_id": "Cut1",
+"tool_state": "{\"columnList\": \"\\\"c1,c2,c3,c4,c6,c7,c8,c9,c10,c11,c13\\\"\", \"input\": \"null\", \"delimiter\": \"\\\"T\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"29": {
+"annotation": "MINUS strand. Removing unnecessary columns from the data.",
+"id": 29,
+"input_connections": {
+"input": {
+"id": 27,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Cut",
+"outputs": [
+{
+"name": "out_file1",
+"type": "tabular"
+}
+],
+"position": {
+"left": 1677,
+"top": 823
+},
+"tool_errors": null,
+"tool_id": "Cut1",
+"tool_state": "{\"columnList\": \"\\\"c1,c2,c3,c4,c6,c7,c8,c9,c10,c11,c13\\\"\", \"input\": \"null\", \"delimiter\": \"\\\"T\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"30": {
+"annotation": "Joining PLUS and MINUS strands side by side.",
+"id": 30,
+"input_connections": {
+"input1": {
+"id": 28,
+"output_name": "out_file1"
+},
+"input2": {
+"id": 29,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Join two Queries",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1786,
+"top": 195
+},
+"tool_errors": null,
+"tool_id": "join1",
+"tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"field1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"partial\": \"\\\"\\\"\", \"field2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"fill_empty_columns\": \"{\\\"fill_empty_columns_switch\\\": \\\"no_fill\\\", \\\"__current_case__\\\": 0}\", \"unmatched\": \"\\\"\\\"\", \"input1\": \"null\"}",
+"tool_version": "2.0.2",
+"type": "tool",
+"user_outputs": []
+},
+"31": {
+"annotation": "Validating strandedness of variants. We require true variants to be validated on both strands. Columns 11 and 22 in the input dataset contain counts of variants computed with the Group tool three steps earlier (\"Group\" step for PLUS and MINUS strand). If the counts are identical (c11== c22) the tool will return \"True\". Otherwise it will return \"Fasle\".",
+"id": 31,
+"input_connections": {
+"input": {
+"id": 30,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Compute",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1896,
+"top": 369
+},
+"tool_errors": null,
+"tool_id": "Add_a_column1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c11 == c22\\\"\", \"round\": \"\\\"no\\\"\", \"__page__\": 0}",
+"tool_version": "1.1.0",
+"type": "tool",
+"user_outputs": []
+},
+"32": {
+"annotation": "Filtering sites where variants did not validate",
+"id": 32,
+"input_connections": {
+"input": {
+"id": 31,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 2105,
+"top": 404
+},
+"tool_errors": null,
+"tool_id": "Filter1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c23=='False'\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"33": {
+"annotation": "Filtering all True variants",
+"id": 33,
+"input_connections": {
+"input": {
+"id": 31,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Filter",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 2108,
+"top": 168
+},
+"tool_errors": null,
+"tool_id": "Filter1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c23=='True'\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+},
+"34": {
+"annotation": "For sites that do not contain true variants we force the value of new column to \"0\".",
+"id": 34,
+"input_connections": {
+"input": {
+"id": 32,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Add column",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1864,
+"top": 1247
+},
+"tool_errors": null,
+"tool_id": "addValue",
+"tool_state": "{\"__page__\": 0, \"input\": \"null\", \"exp\": \"\\\"0\\\"\", \"iterate\": \"\\\"no\\\"\"}",
+"tool_version": "1.0.0",
+"type": "tool",
+"user_outputs": []
+},
+"35": {
+"annotation": "For True variants we add additional column duplicating column c22, which contains the number of true variants at a site.",
+"id": 35,
+"input_connections": {
+"input": {
+"id": 33,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Compute",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 1861,
+"top": 1082
+},
+"tool_errors": null,
+"tool_id": "Add_a_column1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c22\\\"\", \"round\": \"\\\"yes\\\"\", \"__page__\": 0}",
+"tool_version": "1.1.0",
+"type": "tool",
+"user_outputs": []
+},
+"36": {
+"annotation": "Concatenating true variants and other sites tail-to-head.",
+"id": 36,
+"input_connections": {
+"input1": {
+"id": 35,
+"output_name": "out_file1"
+},
+"queries_0|input2": {
+"id": 34,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Concatenate queries",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 2073,
+"top": 1156
+},
+"tool_errors": null,
+"tool_id": "cat1",
+"tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"queries\": \"[{\\\"input2\\\": null, \\\"__index__\\\": 0}]\"}",
+"tool_version": "1.0.0",
+"type": "tool",
+"user_outputs": []
+},
+"37": {
+"annotation": "Because the input dataset contains plus and minus strand data side by side (remember we joined PLUS and MINUS five steps ago using \"Join two Queries\" tool), we need to combine allelic counts by summing up all As (column 5 in PLUS and column 16 in MINUS)",
+"id": 37,
+"input_connections": {
+"input": {
+"id": 36,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Compute",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 2137,
+"top": 1030
+},
+"tool_errors": null,
+"tool_id": "Add_a_column1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c5+c16\\\"\", \"round\": \"\\\"yes\\\"\", \"__page__\": 0}",
+"tool_version": "1.1.0",
+"type": "tool",
+"user_outputs": []
+},
+"38": {
+"annotation": "... summing up all Cs (column 6 in PLUS and column 17 in MINUS)",
+"id": 38,
+"input_connections": {
+"input": {
+"id": 37,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Compute",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 2185,
+"top": 936
+},
+"tool_errors": null,
+"tool_id": "Add_a_column1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c6+c17\\\"\", \"round\": \"\\\"yes\\\"\", \"__page__\": 0}",
+"tool_version": "1.1.0",
+"type": "tool",
+"user_outputs": []
+},
+"39": {
+"annotation": "... summing up all Gs (column 7 in PLUS and column 18 in MINUS)",
+"id": 39,
+"input_connections": {
+"input": {
+"id": 38,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Compute",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 2221,
+"top": 831
+},
+"tool_errors": null,
+"tool_id": "Add_a_column1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c7+c18\\\"\", \"round\": \"\\\"yes\\\"\", \"__page__\": 0}",
+"tool_version": "1.1.0",
+"type": "tool",
+"user_outputs": []
+},
+"40": {
+"annotation": "... summing up all Ts (column 8 in PLUS and column 19 in MINUS)",
+"id": 40,
+"input_connections": {
+"input": {
+"id": 39,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Compute",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 2289,
+"top": 716
+},
+"tool_errors": null,
+"tool_id": "Add_a_column1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c8+c19\\\"\", \"round\": \"\\\"yes\\\"\", \"__page__\": 0}",
+"tool_version": "1.1.0",
+"type": "tool",
+"user_outputs": []
+},
+"41": {
+"annotation": "... summing up coverage (column 9 in PLUS and column 20 in MINUS)",
+"id": 41,
+"input_connections": {
+"input": {
+"id": 40,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Compute",
+"outputs": [
+{
+"name": "out_file1",
+"type": "input"
+}
+],
+"position": {
+"left": 2344,
+"top": 598
+},
+"tool_errors": null,
+"tool_id": "Add_a_column1",
+"tool_state": "{\"input\": \"null\", \"cond\": \"\\\"c9+c20\\\"\", \"round\": \"\\\"yes\\\"\", \"__page__\": 0}",
+"tool_version": "1.1.0",
+"type": "tool",
+"user_outputs": []
+},
+"42": {
+"annotation": "Finally, we clean up all unnecessary columns and generate a combined dataset that will contain 10 columns:\n[1] Chromosome\n[2] Start\n[3] End\n[4] Base in the reference genome\n[5] Number of As\n[6] Number of Cs\n[7] Number of Gs\n[8] Number of Ts\n[9] Coverage at that position\n[10] Number of variants (differences from reference abve 1% level).",
+"id": 42,
+"input_connections": {
+"input": {
+"id": 41,
+"output_name": "out_file1"
+}
+},
+"inputs": [],
+"name": "Cut",
+"outputs": [
+{
+"name": "out_file1",
+"type": "tabular"
+}
+],
+"position": {
+"left": 2439,
+"top": 1185
+},
+"tool_errors": null,
+"tool_id": "Cut1",
+"tool_state": "{\"columnList\": \"\\\"c1,c2,c3,c4,c25,c26,c27,c28,c29,c24\\\"\", \"input\": \"null\", \"delimiter\": \"\\\"T\\\"\", \"__page__\": 0}",
+"tool_version": "1.0.1",
+"type": "tool",
+"user_outputs": []
+}
+}
+}

Mercurial > repos > greg > heteroplasmy_workflow

comparison Galaxy-Workflow-mt_analysis_0.01_strand-specific_(fastq_double).ga @ 0:39ec6ecd0e3b default tip