diff blastxml_to_gapped_gff3.py @ 16:318a0aa5075a draft

Uploaded manually
author iuc
date Tue, 29 Dec 2015 15:31:54 -0500
parents 67fb31daef0e
children 6bfd32bd1000
line wrap: on
line diff
--- a/blastxml_to_gapped_gff3.py	Tue Oct 13 17:24:37 2015 -0400
+++ b/blastxml_to_gapped_gff3.py	Tue Dec 29 15:31:54 2015 -0500
@@ -28,6 +28,12 @@
     blast_records = NCBIXML.parse(blastxml)
     records = []
     for record in blast_records:
+        # http://www.sequenceontology.org/browser/release_2.4/term/SO:0000343
+        match_type = {  # Currently we can only handle BLASTN, BLASTP
+            'BLASTN': 'nucleotide_match',
+            'BLASTP': 'protein_match',
+        }.get(record.application, 'match')
+
         rec = SeqRecord(Seq("ACTG"), id=record.query)
         for hit in record.alignments:
             for hsp in hit.hsps:
@@ -67,10 +73,10 @@
                     if parent_match_end > hsp.query_end:
                         parent_match_end = hsp.query_end + 1
 
-                # The ``protein_match`` feature will hold one or more ``match_part``s
+                # The ``match`` feature will hold one or more ``match_part``s
                 top_feature = SeqFeature(
                     FeatureLocation(parent_match_start, parent_match_end),
-                    type="protein_match", strand=0,
+                    type=match_type, strand=0,
                     qualifiers=qualifiers
                 )
 
@@ -87,7 +93,7 @@
 
                     if trim:
                         # If trimming, then we start relative to the
-                        # protein_match's start
+                        # match's start
                         match_part_start = parent_match_start + start
                     else:
                         # Otherwise, we have to account for the subject start's location
@@ -108,6 +114,7 @@
                     )
 
                 rec.features.append(top_feature)
+        rec.annotations = {}
         records.append(rec)
     return records
 
@@ -252,5 +259,4 @@
     args = parser.parse_args()
 
     result = blastxml2gff3(**vars(args))
-
     GFF.write(result, sys.stdout)