Mercurial > repos > greg > fasta_extract
comparison fasta_extract.py @ 14:a234ed464674 draft default tip
Uploaded
| author | greg |
|---|---|
| date | Sun, 10 Jan 2016 15:01:05 -0500 |
| parents | a5d7ed2680c3 |
| children |
comparison
equal
deleted
inserted
replaced
| 13:a5d7ed2680c3 | 14:a234ed464674 |
|---|---|
| 41 parser.add_argument('--strand', dest='strand', help='Consider strandedness: reverse complement extracted sequence on reverse strand.') | 41 parser.add_argument('--strand', dest='strand', help='Consider strandedness: reverse complement extracted sequence on reverse strand.') |
| 42 args = parser.parse_args() | 42 args = parser.parse_args() |
| 43 | 43 |
| 44 fasta = Fasta(args.genome_file) | 44 fasta = Fasta(args.genome_file) |
| 45 | 45 |
| 46 dh = open('debug.log', 'wb') | |
| 47 for (input_filename, hid) in args.inputs: | 46 for (input_filename, hid) in args.inputs: |
| 48 extend_existing = args.extend_existing == 'existing' | 47 extend_existing = args.extend_existing == 'existing' |
| 49 consider_strand = args.strand == 'yes' | 48 consider_strand = args.strand == 'yes' |
| 50 | 49 |
| 51 reader = csv.reader(open(input_filename, 'rU'), delimiter='\t') | 50 reader = csv.reader(open(input_filename, 'rU'), delimiter='\t') |
| 52 fasta_output_path = get_output_path(hid, | 51 fasta_output_path = get_output_path(hid, |
| 53 args.subtract_from_start, | 52 args.subtract_from_start, |
| 54 args.add_to_end, | 53 args.add_to_end, |
| 55 extend_existing, | 54 extend_existing, |
| 56 consider_strand) | 55 consider_strand) |
| 57 dh.write('\n fasta_output_path: %s\n' % str(fasta_output_path)) | |
| 58 output = open(fasta_output_path, 'wb') | 56 output = open(fasta_output_path, 'wb') |
| 59 gff_output_path = get_output_path(hid, | 57 gff_output_path = get_output_path(hid, |
| 60 args.subtract_from_start, | 58 args.subtract_from_start, |
| 61 args.add_to_end, | 59 args.add_to_end, |
| 62 extend_existing, | 60 extend_existing, |
| 63 consider_strand, | 61 consider_strand, |
| 64 orphan=True) | 62 orphan=True) |
| 65 dh.write('\n gff_output_path: %s\n' % str(gff_output_path)) | |
| 66 orphan_writer = csv.writer(open(gff_output_path, 'wb'), delimiter='\t') | 63 orphan_writer = csv.writer(open(gff_output_path, 'wb'), delimiter='\t') |
| 67 | 64 |
| 68 for row in reader: | 65 for row in reader: |
| 69 dh.write('\n row: %s\n' % str(row)) | |
| 70 if len(row) != 9 or row[0].startswith('#'): | 66 if len(row) != 9 or row[0].startswith('#'): |
| 71 continue | 67 continue |
| 72 try: | 68 cname = row[0] |
| 73 cname = row[0] | 69 start = int(row[3]) |
| 74 start = int(row[3]) | 70 end = int(row[4]) |
| 75 end = int(row[4]) | 71 strand = row[6] |
| 76 strand = row[6] | 72 if extend_existing: |
| 77 if extend_existing: | 73 start -= args.subtract_from_start |
| 78 start -= args.subtract_from_start | 74 end += args.add_to_end |
| 79 end += args.add_to_end | 75 else: |
| 80 else: | 76 midpoint = (start + end) // 2 |
| 81 midpoint = (start + end) // 2 | 77 start = midpoint - args.subtract_from_start |
| 82 start = midpoint - args.subtract_from_start | 78 end = midpoint + args.add_to_end |
| 83 end = midpoint + args.add_to_end | 79 if 1 <= start and end <= len(fasta[cname]): |
| 84 if 1 <= start and end <= len(fasta[cname]): | 80 output.write('>%s:%s-%s_%s\n' % (cname, start, end, strand)) |
| 85 output.write('>%s:%s-%s_%s\n' % (cname, start, end, strand)) | 81 bases = fasta[cname][start-1:end] |
| 86 bases = fasta[cname][start-1:end] | 82 if consider_strand and strand == '-': |
| 87 if consider_strand and strand == '-': | 83 bases = reverse_complement(bases) |
| 88 bases = reverse_complement(bases) | 84 output.write('%s\n' % bases) |
| 89 dh.write('\n bases: %s\n' % str(bases)) | 85 else: |
| 90 output.write('%s\n' % bases) | 86 orphan_writer.writerow(row) |
| 91 else: | |
| 92 orphan_writer.writerow(row) | |
| 93 except Exception, e: | |
| 94 stop_err(str(e)) | |
| 95 output.close() | 87 output.close() |
| 96 dh.close() |
