Mercurial > repos > jjohnson > metaphlan
comparison metaphlan_to_phyloxml.py @ 0:0ec6c5781381 draft
Uploaded
| author | jjohnson |
|---|---|
| date | Tue, 09 Oct 2012 12:13:27 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0ec6c5781381 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 Read metaphaln output summarizing taxonomic distribution and format in PhyloXML format | |
| 5 | |
| 6 usage: %prog metaphlan.txt phylo.xml | |
| 7 """ | |
| 8 | |
| 9 import sys | |
| 10 | |
| 11 # Metaphlan output looks like: | |
| 12 # k__Bacteria 99.07618 | |
| 13 # k__Archaea 0.92382 | |
| 14 # k__Bacteria|p__Proteobacteria 82.50732 | |
| 15 # k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria 81.64905 | |
| 16 | |
| 17 rank_map = { 'k__': 'kingdom', 'p__': 'phylum', 'c__': 'class', 'o__': 'order', 'f__': 'family', 'g__': 'genus', 's__': 'species' } | |
| 18 | |
| 19 class Node( object ): | |
| 20 """Node in a taxonomy""" | |
| 21 def __init__( self, rank=None, name=None ): | |
| 22 self.rank = rank | |
| 23 self.name = name | |
| 24 self.value = None | |
| 25 self.children = dict() | |
| 26 @staticmethod | |
| 27 def from_metaphlan_file( file ): | |
| 28 """ | |
| 29 Build tree from metaphlan output | |
| 30 """ | |
| 31 root = Node() | |
| 32 for line in file: | |
| 33 taxa, abundance = line.split() | |
| 34 parts = taxa.split( "|" ) | |
| 35 root.add( parts, abundance ) | |
| 36 return root | |
| 37 def add( self, parts, value ): | |
| 38 """ | |
| 39 Parts is a list of node names, recursively add nodes until we reach | |
| 40 the last part, and then attach the value to that node. | |
| 41 """ | |
| 42 if len( parts ) == 0: | |
| 43 self.value = value | |
| 44 else: | |
| 45 next_part = parts.pop(0) | |
| 46 rank = rank_map[ next_part[:3] ] | |
| 47 name = next_part[3:] | |
| 48 if name not in self.children: | |
| 49 self.children[name] = Node( rank, name ) | |
| 50 self.children[name].add( parts, value ) | |
| 51 def __str__( self ): | |
| 52 if self.children: | |
| 53 return "(" + ",".join( str( child ) for child in self.children.itervalues() ) + "):" + self.name | |
| 54 else: | |
| 55 return self.name | |
| 56 def to_phyloxml( self, out ): | |
| 57 print >>out, "<clade>" | |
| 58 if self.name: | |
| 59 print >>out, "<name>%s</name>" % self.name | |
| 60 print >>out, "<taxonomy><scientific_name>%s</scientific_name><rank>%s</rank></taxonomy>" % ( self.name, self.rank ) | |
| 61 if self.value: | |
| 62 print >>out, "<property datatype='xsd:float' ref='metaphlan:abundance' applies_to='node'>%s</property>" % self.value | |
| 63 ## print >>out, "<confidence type='abundance'>%s</confidence>" % self.value | |
| 64 for child in self.children.itervalues(): | |
| 65 child.to_phyloxml( out ) | |
| 66 print >>out, "</clade>" | |
| 67 | |
| 68 out = open( sys.argv[2], 'w' ) | |
| 69 | |
| 70 print >>out, '<phyloxml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.phyloxml.org" xsi:schemaLocation="http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd">' | |
| 71 print >>out, '<phylogeny rooted="true">' | |
| 72 | |
| 73 Node.from_metaphlan_file( open( sys.argv[1] ) ).to_phyloxml( out ) | |
| 74 | |
| 75 print >>out, '</phylogeny>' | |
| 76 print >>out, '</phyloxml>' |
