extract_genomic_dna: extract_genomic

comparison extract_genomic_dna.py @ 1:311febbd33d6 draft

Uploaded

author	greg
date	Thu, 14 Jan 2016 09:24:51 -0500
parents	cff5b7c9be55
children	c46db4f7c869

comparison

equal deleted inserted replaced

-:cff5b7c9be55
+:311febbd33d6
 sys.exit(1)
 parser = argparse.ArgumentParser()
 parser.add_option('--input_format', dest='input_format', help="Input dataset format")
 parser.add_option('--input', dest='input', help="Input dataset")
-parser.add_option('--dbkey', dest='dbkey', help="Input dataset genome build")
+parser.add_option('--genome', dest='genome', help="Input dataset genome build")
 parser.add_option('--interpret_features', dest='interpret_features', default=None, help="Interpret features if input format is gff")
 parser.add_option('--columns', dest='columns', help="Columns to use in input file")
 parser.add_option('--reference_genome_source', dest='reference_genome_source', help="Source of reference genome file")
 parser.add_option('--reference_genome', dest='reference_genome', help="Reference genome file")
 parser.add_option('--output_format', dest='output_format', help="Output format")
 parser.add_option('--output', dest='output', help="Output dataset")
 args = parser.parse_args()
 input_is_gff = args.input_format == 'gff'
-interpret_features = args.interpret_features == "yes"
+interpret_features = input_is_gff and args.interpret_features == "yes"
 if len(args.cols.split(',')) == 5:
 # Bed file.
 chrom_col, start_col, end_col, strand_col, name_col = parse_cols_arg(args.cols)
 else:
 # Gff file.
 else:
 nibs[chrom] = nib = bx.seq.nib.NibFile(file("%s/%s.nib" % (seq_path, chrom)))
 try:
 sequence = nib.get(start, end - start)
 except Exception, e:
-warning = "Unable to fetch the sequence from '%d' to '%d' for build '%s'. " % (start, end - start, args.dbkey)
+warning = "Unable to fetch the sequence from '%d' to '%d' for build '%s'. " % (start, end - start, args.genome)
 warnings.append(warning)
 if not invalid_lines:
 invalid_lines = get_lines(feature)
 first_invalid_line = line_count
 skipped_lines += len(invalid_lines)
 continue
-elif os.path.isfile(os.path.join(seq_dir, '%s.2bit' % args.dbkey)):
+elif os.path.isfile(os.path.join(seq_dir, '%s.2bit' % args.genome)):
 if not(twobitfile):
 twobitfile = bx.seq.twobit.TwoBitFile(file(seq_path))
 try:
 if interpret_features:
 # Create sequence from intervals within a feature.
 invalid_lines = get_lines(feature)
 first_invalid_line = line_count
 skipped_lines += len(invalid_lines)
 continue
 else:
-warning = "Chromosome by name '%s' was not found for build '%s'. " % (chrom, args.dbkey)
+warning = "Chromosome by name '%s' was not found for build '%s'. " % (chrom, args.genome)
 warnings.append(warning)
 if not invalid_lines:
 invalid_lines = get_lines(feature)
 first_invalid_line = line_count
 skipped_lines += len(invalid_lines)
 continue
 if sequence == '':
-warning = "Chrom: '%s', start: '%s', end: '%s' is either invalid or not present in build '%s'. " % (chrom, start, end, args.dbkey)
+warning = "Chrom: '%s', start: '%d', end: '%d' is either invalid or not present in build '%s'. " % (chrom, start, end, args.genome)
 warnings.append(warning)
 if not invalid_lines:
 invalid_lines = get_lines(feature)
 first_invalid_line = line_count
 skipped_lines += len(invalid_lines)
 if args.output_format == "fasta":
 l = len(sequence)
 c = 0
 if input_is_gff:
 start, end = gff_util.convert_bed_coords_to_gff([start, end])
-fields = [args.dbkey, str(chrom), str(start), str(end), strand]
+fields = [args.genome, str(chrom), str(start), str(end), strand]
 meta_data = "_".join(fields)
 if name.strip():
 out.write(">%s %s\n" % (meta_data, name))
 else:
 out.write(">%s\n" % meta_data)

Mercurial > repos > greg > extract_genomic_dna

comparison extract_genomic_dna.py @ 1:311febbd33d6 draft