Mercurial > repos > greg > fasta_extract
comparison fasta_extract.py @ 12:da6ab598f025 draft
Uploaded
| author | greg |
|---|---|
| date | Sun, 10 Jan 2016 14:53:59 -0500 |
| parents | 48f6e9f1c19d |
| children | a5d7ed2680c3 |
comparison
equal
deleted
inserted
replaced
| 11:48f6e9f1c19d | 12:da6ab598f025 |
|---|---|
| 22 format = 'gff' | 22 format = 'gff' |
| 23 output_dir = 'output_orphan_dir' | 23 output_dir = 'output_orphan_dir' |
| 24 else: | 24 else: |
| 25 format = 'fasta' | 25 format = 'fasta' |
| 26 output_dir = 'output_dir' | 26 output_dir = 'output_dir' |
| 27 return os.path.join(output_dir, '%s_on_data_%d.%s' % (attrs, hid, format)) | 27 return os.path.join(output_dir, '%s_on_data_%s.%s' % (attrs, hid, format)) |
| 28 | 28 |
| 29 | 29 |
| 30 def stop_err(msg): | 30 def stop_err(msg): |
| 31 sys.stderr.write(msg) | 31 sys.stderr.write(msg) |
| 32 sys.exit(1) | 32 sys.exit(1) |
| 41 parser.add_argument('--strand', dest='strand', help='Consider strandedness: reverse complement extracted sequence on reverse strand.') | 41 parser.add_argument('--strand', dest='strand', help='Consider strandedness: reverse complement extracted sequence on reverse strand.') |
| 42 args = parser.parse_args() | 42 args = parser.parse_args() |
| 43 | 43 |
| 44 fasta = Fasta(args.genome_file) | 44 fasta = Fasta(args.genome_file) |
| 45 | 45 |
| 46 dh = open('debug.log', 'wb') | |
| 46 for (input_filename, hid) in args.inputs: | 47 for (input_filename, hid) in args.inputs: |
| 47 hid = int(hid) | |
| 48 extend_existing = args.extend_existing == 'existing' | 48 extend_existing = args.extend_existing == 'existing' |
| 49 consider_strand = args.strand == 'yes' | 49 consider_strand = args.strand == 'yes' |
| 50 | |
| 50 reader = csv.reader(open(input_filename, 'rU'), delimiter='\t') | 51 reader = csv.reader(open(input_filename, 'rU'), delimiter='\t') |
| 51 fasta_output_path = get_output_path(hid, | 52 fasta_output_path = get_output_path(hid, |
| 52 args.subtract_from_start, | 53 args.subtract_from_start, |
| 53 args.add_to_end, | 54 args.add_to_end, |
| 54 extend_existing, | 55 extend_existing, |
| 55 consider_strand) | 56 consider_strand) |
| 57 dh.write('\n fasta_output_path: %s\n' % str(fasta_output_path)) | |
| 56 output = open(fasta_output_path, 'wb') | 58 output = open(fasta_output_path, 'wb') |
| 57 gff_output_path = get_output_path(hid, | 59 gff_output_path = get_output_path(hid, |
| 58 args.subtract_from_start, | 60 args.subtract_from_start, |
| 59 args.add_to_end, | 61 args.add_to_end, |
| 60 extend_existing, | 62 extend_existing, |
| 61 consider_strand, | 63 consider_strand, |
| 62 orphan=True) | 64 orphan=True) |
| 65 dh.write('\n gff_output_path: %s\n' % str(gff_output_path)) | |
| 63 orphan_writer = csv.writer(open(gff_output_path, 'wb'), delimiter='\t') | 66 orphan_writer = csv.writer(open(gff_output_path, 'wb'), delimiter='\t') |
| 67 | |
| 64 for row in reader: | 68 for row in reader: |
| 69 dh.write('\n row: %s\n' % str(row)) | |
| 65 if len(row) != 9 or row[0].startswith('#'): | 70 if len(row) != 9 or row[0].startswith('#'): |
| 66 continue | 71 continue |
| 67 try: | 72 try: |
| 68 cname = row[0] | 73 cname = row[0] |
| 69 start = int(row[3]) | 74 start = int(row[3]) |
| 79 if 1 <= start and end <= len(fasta[cname]): | 84 if 1 <= start and end <= len(fasta[cname]): |
| 80 output.write('>%s:%s-%s_%s\n' % (cname, start, end, strand)) | 85 output.write('>%s:%s-%s_%s\n' % (cname, start, end, strand)) |
| 81 bases = fasta[cname][start-1:end] | 86 bases = fasta[cname][start-1:end] |
| 82 if consider_strand and strand == '-': | 87 if consider_strand and strand == '-': |
| 83 bases = reverse_complement(bases) | 88 bases = reverse_complement(bases) |
| 89 dh.write('\n bases: %s\n' % str(bases)) | |
| 84 output.write('%s\n' % bases) | 90 output.write('%s\n' % bases) |
| 85 else: | 91 else: |
| 86 orphan_writer.writerow(row) | 92 orphan_writer.writerow(row) |
| 87 except Exception, e: | 93 except Exception, e: |
| 88 stop_err(str(e)) | 94 stop_err(str(e)) |
| 89 finally: | 95 finally: |
| 90 output.close() | 96 output.close() |
| 97 dh.close() |
