Mercurial > repos > greg > genotype_population_info

--- a/genotype_population_info.py	Fri Nov 09 14:16:37 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-#!/usr/bin/env python
-"""
-Generate the genotype_population_info.txt file by parsing the information
-from a Affymetrix 96 well plate CSV file and an associated VCF file.
-"""
-import argparse
-import sys
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--input_csv', dest='input_csv', help='Affymetrix 96 well plate file')
-parser.add_argument('--input_vcf', dest='input_vcf', help='Input VCF file')
-parser.add_argument('--output', dest='output', help='Output dataset'),
-args = parser.parse_args()
-
-# Parse the input_vcf file, looking for the first line
-# that starts with the string "#CHROM"
-with open(args.input_vcf, "r") as vcfh:
-    for line in vcfh:
-        if not line.startswith("#CHROM"):
-            continue
-        line = line.rstrip("\r\n")
-        # Example line:
-        # #CHROM  13704   13706   13708   13736   13748   13762   13782
-        items = line.split("\t")
-        sample_list = items[8:]
-        break
-
-# Parse the input_csv file to get the region for for
-# each sample_id in the sample_list.  Initialize the
-# region_list to be the same as the sample_list to ensure
-# the same length.
-region_list = [x for x in sample_list]
-with open(args.input_csv, "r") as csvh:
-    for i, line in enumerate(csvh):
-        if i == 0:
-            # Skip the header.
-            continue
-        line = line.rstrip('\r\n')
-        items = line.split(',')
-        csv_sample_id = items[0]
-        csv_region = items[9]
-        # Make sure the csv_sample_id is in the sample_list.
-        try:
-            loc = sample_list.index(csv_sample_id)
-            region_list[loc] = csv_region
-        except Exception:
-            pass
-
-# The output file will consist of columns:
-# Item #, Sample ID, Region
-with open(args.output, "w") as outfh:
-    for i, sample_id in enumerate(sample_list):
-        outfh.write("%d\t%s\t%s\n" % (i, sample_id, region_list[1]))
--- a/genotype_population_info.xml	Fri Nov 09 14:16:37 2018 -0500
+++ b/genotype_population_info.xml	Tue Nov 20 15:12:30 2018 -0500
@@ -1,14 +1,14 @@
 <tool id="genotype_population_info" name="Generate genotype population info" version="1.0.0">
-    <description>from Affymetrix data</description>
+    <description>from VCF data</description>
     <command detect_errors="exit_code"><![CDATA[
-python '$__tool_directory__/genotype_population_info.py'
---input_csv '$input_csv'
---input_vcf '$input_vcf'
---output '$output']]></command>
+#set header = 'header.txt'
+#set samples = 'samples.txt'
+grep "#CHROM" $input_vcf > $header &&
+tr '\t' '\n' < $header > $samples &&
+sed -i 1,9d $samples &&
+awk -F'\t' -v OFS='\t' 'NR==0 {print ; next}{print (NR),$0}' $samples > $output
+]]></command>
     <inputs>
-        <param name="input_csv" type="data" format="csv" label="Affymetrix 96 well plate CSV file">
-            <validator type="expression" message="96 well plate data must have 31 columns and 96 rows">value is not None and value.metadata.columns==31 and value.metadata.data_lines==96</validator>
-        </param>
         <param name="input_vcf" type="data" format="vcf" label="VCF file"/>
     </inputs>
     <outputs>
@@ -16,8 +16,7 @@
     </outputs>
     <tests>
         <test>
-            <param name="input_csv" value="96_well_plate.csv" ftype="csv"/>
-            <param name="input_vcf" value="baitssnv.recode.vcf" ftype="vcf"/>
+            <param name="input_vcf" value="input.vcf" ftype="vcf"/>
             <output name="output" file="output.tabular" ftype="tabular"/>
         </test>
     </tests>
@@ -26,10 +25,7 @@

 Generates a file that contains the genotype population information that can be used as input
 to the multilocus_genotype tool.  This tool can be used only within a Galaxy instance which
-includes the complementaty stag database.
------
-
-**Required options**
+includes the complementary stag database.
     </help>
     <citations>
     </citations>