Mercurial > repos > pjbriggs > amplicon_analysis_pipeline
comparison relabel_fasta.py @ 5:bbfc9638ba84 draft
First version with (partial) bioconda deps.
| author | pjbriggs |
|---|---|
| date | Wed, 13 Jun 2018 08:39:26 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 4:013bf1e2cc8f | 5:bbfc9638ba84 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 Replace FASTA labels with new labels <PREFIX>1, <PREFIX>2, | |
| 5 <PREFIX>3 etc (where <PREFIX> is a user-provided argument). | |
| 6 | |
| 7 Can be used to label OTUs as OTU_1, OTU_2 etc. | |
| 8 | |
| 9 This is a reimplementation of the fasta_number.py script from | |
| 10 https://drive5.com/python/fasta_number_py.html | |
| 11 """ | |
| 12 | |
| 13 import argparse | |
| 14 | |
| 15 def relabel_fasta(fp,prefix,include_size=False): | |
| 16 """ | |
| 17 """ | |
| 18 # Iterate over lines in file | |
| 19 nlabel = 0 | |
| 20 for line in fp: | |
| 21 # Strip trailing newlines | |
| 22 line = line.rstrip('\n') | |
| 23 if not line: | |
| 24 # Skip blank lines | |
| 25 continue | |
| 26 elif line.startswith('>'): | |
| 27 # | |
| 28 nlabel += 1 | |
| 29 label = line[1:].strip() | |
| 30 if args.needsize: | |
| 31 # Extract size from the label | |
| 32 try: | |
| 33 size = filter( | |
| 34 lambda x: x.startswith("size="), | |
| 35 label.split(';'))[0] | |
| 36 except Exception as ex: | |
| 37 raise Exception("Couldn't locate 'size' in " | |
| 38 "label: %s" % label) | |
| 39 yield ">%s%d;%s" % (args.prefix, | |
| 40 nlabel, | |
| 41 size) | |
| 42 else: | |
| 43 yield ">%s%d" % (args.prefix, | |
| 44 nlabel) | |
| 45 else: | |
| 46 # Echo the line to output | |
| 47 yield line | |
| 48 | |
| 49 if __name__ == "__main__": | |
| 50 | |
| 51 # Set up command line parser | |
| 52 p = argparse.ArgumentParser() | |
| 53 p.add_argument("--needsize",action="store_true") | |
| 54 p.add_argument("--nosize",action="store_true") | |
| 55 p.add_argument("fasta") | |
| 56 p.add_argument("prefix") | |
| 57 | |
| 58 # Process command line | |
| 59 args = p.parse_args() | |
| 60 | |
| 61 # Relabel FASTA | |
| 62 with open(args.fasta,'rU') as fasta: | |
| 63 for line in relabel_fasta(fasta, | |
| 64 args.prefix, | |
| 65 include_size=args.needsize): | |
| 66 print line | |
| 67 | |
| 68 |
