Mercurial > repos > pjbriggs > amplicon_analysis_pipeline
comparison relabel_fasta.py @ 24:fe354f5dd0ee draft
planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 34034189622f4cf14edd12a4de43739c37b50730
| author | pjbriggs |
|---|---|
| date | Thu, 30 Aug 2018 08:13:55 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 23:545f23776953 | 24:fe354f5dd0ee |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 DESCRIPTION = \ | |
| 4 """Replace FASTA labels with new labels <PREFIX>1, <PREFIX>2, | |
| 5 <PREFIX>3 ... (<PREFIX> is provided by the user via the command | |
| 6 line). | |
| 7 | |
| 8 Can be used to label OTUs as OTU_1, OTU_2 etc. | |
| 9 | |
| 10 This reimplements the functionality of the fasta_number.py utility | |
| 11 from https://drive5.com/python/fasta_number_py.html | |
| 12 """ | |
| 13 | |
| 14 import argparse | |
| 15 | |
| 16 def relabel_fasta(fp,prefix,include_size=False): | |
| 17 """ | |
| 18 Relabel sequence records in a FASTA file | |
| 19 | |
| 20 Arguments: | |
| 21 fp (File): file-like object opened for reading | |
| 22 input FASTA data from | |
| 23 prefix (str): prefix to use in new labels | |
| 24 include_size (bool): if True then copy | |
| 25 'size=...' records into new labels (default | |
| 26 is not to copy the size) | |
| 27 | |
| 28 Yields: updated lines from the input FASTA. | |
| 29 """ | |
| 30 # Iterate over lines in file | |
| 31 nlabel = 0 | |
| 32 for line in fp: | |
| 33 # Strip trailing newlines | |
| 34 line = line.rstrip('\n') | |
| 35 if not line: | |
| 36 # Skip blank lines | |
| 37 continue | |
| 38 elif line.startswith('>'): | |
| 39 # Deal with start of a sequence record | |
| 40 nlabel += 1 | |
| 41 label = line[1:].strip() | |
| 42 if include_size: | |
| 43 # Extract size from the label | |
| 44 try: | |
| 45 size = filter( | |
| 46 lambda x: x.startswith("size="), | |
| 47 label.split(';'))[0] | |
| 48 except Exception as ex: | |
| 49 raise Exception("Couldn't locate 'size' in " | |
| 50 "label: %s" % label) | |
| 51 yield ">%s%d;%s" % (args.prefix, | |
| 52 nlabel, | |
| 53 size) | |
| 54 else: | |
| 55 yield ">%s%d" % (args.prefix, | |
| 56 nlabel) | |
| 57 else: | |
| 58 # Echo the line to output | |
| 59 yield line | |
| 60 | |
| 61 if __name__ == "__main__": | |
| 62 # Set up command line parser | |
| 63 p = argparse.ArgumentParser(description=DESCRIPTION) | |
| 64 p.add_argument("--needsize", | |
| 65 action="store_true", | |
| 66 help="include the size as part of the " | |
| 67 "output label ('size=...' must be present " | |
| 68 "in the input FASTA labels). Output labels " | |
| 69 "will be '<PREFIX><NUMBER>;size=<SIZE>'") | |
| 70 p.add_argument("--nosize", | |
| 71 action="store_true", | |
| 72 help="don't include the size as part of " | |
| 73 "the output label (this is the default)") | |
| 74 p.add_argument("fasta", | |
| 75 metavar="FASTA", | |
| 76 help="input FASTA file") | |
| 77 p.add_argument("prefix", | |
| 78 metavar="PREFIX", | |
| 79 help="prefix to use for labels in output") | |
| 80 # Process command line | |
| 81 args = p.parse_args() | |
| 82 # Relabel FASTA | |
| 83 with open(args.fasta,'rU') as fasta: | |
| 84 for line in relabel_fasta(fasta, | |
| 85 args.prefix, | |
| 86 include_size=args.needsize): | |
| 87 print line | |
| 88 |
