Mercurial > repos > iuc > jbrowse
diff blastxml_to_gapped_gff3.py @ 16:318a0aa5075a draft
Uploaded manually
| author | iuc |
|---|---|
| date | Tue, 29 Dec 2015 15:31:54 -0500 |
| parents | 67fb31daef0e |
| children | 6bfd32bd1000 |
line wrap: on
line diff
--- a/blastxml_to_gapped_gff3.py Tue Oct 13 17:24:37 2015 -0400 +++ b/blastxml_to_gapped_gff3.py Tue Dec 29 15:31:54 2015 -0500 @@ -28,6 +28,12 @@ blast_records = NCBIXML.parse(blastxml) records = [] for record in blast_records: + # http://www.sequenceontology.org/browser/release_2.4/term/SO:0000343 + match_type = { # Currently we can only handle BLASTN, BLASTP + 'BLASTN': 'nucleotide_match', + 'BLASTP': 'protein_match', + }.get(record.application, 'match') + rec = SeqRecord(Seq("ACTG"), id=record.query) for hit in record.alignments: for hsp in hit.hsps: @@ -67,10 +73,10 @@ if parent_match_end > hsp.query_end: parent_match_end = hsp.query_end + 1 - # The ``protein_match`` feature will hold one or more ``match_part``s + # The ``match`` feature will hold one or more ``match_part``s top_feature = SeqFeature( FeatureLocation(parent_match_start, parent_match_end), - type="protein_match", strand=0, + type=match_type, strand=0, qualifiers=qualifiers ) @@ -87,7 +93,7 @@ if trim: # If trimming, then we start relative to the - # protein_match's start + # match's start match_part_start = parent_match_start + start else: # Otherwise, we have to account for the subject start's location @@ -108,6 +114,7 @@ ) rec.features.append(top_feature) + rec.annotations = {} records.append(rec) return records @@ -252,5 +259,4 @@ args = parser.parse_args() result = blastxml2gff3(**vars(args)) - GFF.write(result, sys.stdout)
