Mercurial > repos > galaxyp > retrieve_ensembl_bed
annotate bedutil.py @ 0:887e111c0919 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
author | galaxyp |
---|---|
date | Sun, 14 Jan 2018 14:11:53 -0500 |
parents | |
children | c3d600729b6f |
rev | line source |
---|---|
0
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
1 #!/usr/bin/env python |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
2 """ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
3 # |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
4 #------------------------------------------------------------------------------ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
5 # University of Minnesota |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
6 # Copyright 2016, Regents of the University of Minnesota |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
7 #------------------------------------------------------------------------------ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
8 # Author: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
9 # |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
10 # James E Johnson |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
11 # |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
12 #------------------------------------------------------------------------------ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
13 """ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
14 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
15 import sys |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
16 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
17 from Bio.Seq import reverse_complement, translate |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
18 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
19 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
20 def bed_from_line(line, ensembl=False, seq_column=None): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
21 fields = line.rstrip('\r\n').split('\t') |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
22 if len(fields) < 12: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
23 return None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
24 (chrom, chromStart, chromEnd, name, score, strand, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
25 thickStart, thickEnd, itemRgb, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
26 blockCount, blockSizes, blockStarts) = fields[0:12] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
27 bed_entry = BedEntry(chrom=chrom, chromStart=chromStart, chromEnd=chromEnd, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
28 name=name, score=score, strand=strand, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
29 thickStart=thickStart, thickEnd=thickEnd, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
30 itemRgb=itemRgb, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
31 blockCount=blockCount, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
32 blockSizes=blockSizes.rstrip(','), |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
33 blockStarts=blockStarts.rstrip(',')) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
34 if seq_column is not None and -len(fields) <= seq_column < len(fields): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
35 bed_entry.seq = fields[seq_column] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
36 if ensembl and len(fields) >= 20: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
37 bed_entry.second_name = fields[12] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
38 bed_entry.cds_start_status = fields[13] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
39 bed_entry.cds_end_status = fields[14] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
40 bed_entry.exon_frames = fields[15].rstrip(',') |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
41 bed_entry.biotype = fields[16] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
42 bed_entry.gene_name = fields[17] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
43 bed_entry.second_gene_name = fields[18] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
44 bed_entry.gene_type = fields[19] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
45 return bed_entry |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
46 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
47 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
48 class BedEntry(object): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
49 def __init__(self, chrom=None, chromStart=None, chromEnd=None, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
50 name=None, score=None, strand=None, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
51 thickStart=None, thickEnd=None, itemRgb=None, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
52 blockCount=None, blockSizes=None, blockStarts=None): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
53 self.chrom = chrom |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
54 self.chromStart = int(chromStart) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
55 self.chromEnd = int(chromEnd) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
56 self.name = name |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
57 self.score = int(score) if score is not None else 0 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
58 self.strand = '-' if str(strand).startswith('-') else '+' |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
59 self.thickStart = int(thickStart) if thickStart else self.chromStart |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
60 self.thickEnd = int(thickEnd) if thickEnd else self.chromEnd |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
61 self.itemRgb = str(itemRgb) if itemRgb is not None else r'100,100,100' |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
62 self.blockCount = int(blockCount) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
63 if isinstance(blockSizes, str) or isinstance(blockSizes, unicode): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
64 self.blockSizes = [int(x) for x in blockSizes.split(',')] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
65 elif isinstance(blockSizes, list): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
66 self.blockSizes = [int(x) for x in blockSizes] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
67 else: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
68 self.blockSizes = blockSizes |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
69 if isinstance(blockStarts, str) or isinstance(blockSizes, unicode): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
70 self.blockStarts = [int(x) for x in blockStarts.split(',')] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
71 elif isinstance(blockStarts, list): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
72 self.blockStarts = [int(x) for x in blockStarts] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
73 else: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
74 self.blockStarts = blockStarts |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
75 self.second_name = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
76 self.cds_start_status = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
77 self.cds_end_status = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
78 self.exon_frames = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
79 self.biotype = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
80 self.gene_name = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
81 self.second_gene_name = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
82 self.gene_type = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
83 self.seq = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
84 self.cdna = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
85 self.pep = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
86 # T26C |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
87 self.aa_change = [] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
88 # p.Trp26Cys g.<pos><ref>><alt> # g.1304573A>G |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
89 self.variants = [] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
90 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
91 def __str__(self): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
92 return '%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%s\t%s' % ( |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
93 self.chrom, self.chromStart, self.chromEnd, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
94 self.name, self.score, self.strand, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
95 self.thickStart, self.thickEnd, str(self.itemRgb), self.blockCount, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
96 ','.join([str(x) for x in self.blockSizes]), |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
97 ','.join([str(x) for x in self.blockStarts])) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
98 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
99 def get_splice_junctions(self): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
100 splice_juncs = [] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
101 for i in range(self.blockCount - 1): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
102 splice_junc = "%s:%d_%d"\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
103 % (self.chrom, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
104 self.chromStart + self.blockSizes[i], |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
105 self.chromStart + self.blockStarts[i+1]) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
106 splice_juncs.append(splice_junc) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
107 return splice_juncs |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
108 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
109 def get_exon_seqs(self): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
110 if not self.seq: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
111 return None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
112 exons = [] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
113 for i in range(self.blockCount): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
114 exons.append(self.seq[self.blockStarts[i]:self.blockStarts[i] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
115 + self.blockSizes[i]]) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
116 if self.strand == '-': # reverse complement |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
117 exons.reverse() |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
118 for i, s in enumerate(exons): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
119 exons[i] = reverse_complement(s) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
120 return exons |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
121 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
122 def get_spliced_seq(self, strand=None): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
123 if not self.seq: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
124 return None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
125 seq = ''.join(self.get_exon_seqs()) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
126 if strand and self.strand != strand: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
127 seq = reverse_complement(seq) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
128 return seq |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
129 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
130 def get_cdna(self): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
131 if not self.cdna: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
132 self.cdna = self.get_spliced_seq() |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
133 return self.cdna |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
134 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
135 def get_cds(self): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
136 cdna = self.get_cdna() |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
137 if cdna: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
138 if self.chromStart == self.thickStart\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
139 and self.chromEnd == self.thickEnd: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
140 return cdna |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
141 pos = [self.cdna_offset_of_pos(self.thickStart), |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
142 self.cdna_offset_of_pos(self.thickEnd)] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
143 if 0 <= min(pos) <= max(pos) <= len(cdna): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
144 return cdna[min(pos):max(pos)] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
145 return None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
146 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
147 def set_cds(self, cdna_start, cdna_end): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
148 cdna_len = sum(self.blockSizes) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
149 if 0 <= cdna_start < cdna_end <= cdna_len: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
150 cds_pos = [self.pos_of_cdna_offet(cdna_start), |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
151 self.pos_of_cdna_offet(cdna_end)] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
152 if all(cds_pos): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
153 self.thickStart = min(cds_pos) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
154 self.thickEnd = max(cds_pos) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
155 return self |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
156 return None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
157 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
158 def trim_cds(self, basepairs): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
159 if self.chromStart <= self.thickStart < self.thickEnd <= self.chromEnd: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
160 cds_pos = [self.cdna_offset_of_pos(self.thickStart), |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
161 self.cdna_offset_of_pos(self.thickEnd)] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
162 if basepairs > 0: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
163 return self.set_cds(min(cds_pos) + basepairs, max(cds_pos)) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
164 else: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
165 return self.set_cds(min(cds_pos), max(cds_pos) + basepairs) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
166 return None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
167 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
168 def get_cigar(self): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
169 cigar = '' |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
170 r = range(self.blockCount) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
171 xl = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
172 for x in r: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
173 if xl is not None: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
174 intronSize = abs(self.blockStarts[x] - self.blockSizes[xl] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
175 - self.blockStarts[xl]) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
176 cigar += '%dN' % intronSize |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
177 cigar += '%dM' % self.blockSizes[x] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
178 xl = x |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
179 return cigar |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
180 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
181 def get_cigar_md(self): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
182 cigar = '' |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
183 md = '' |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
184 r = range(self.blockCount) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
185 xl = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
186 for x in r: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
187 if xl is not None: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
188 intronSize = abs(self.blockStarts[x] - self.blockSizes[xl] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
189 - self.blockStarts[xl]) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
190 cigar += '%dN' % intronSize |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
191 cigar += '%dM' % self.blockSizes[x] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
192 xl = x |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
193 md = '%d' % sum(self.blockSizes) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
194 return (cigar, md) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
195 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
196 def get_translation(self, sequence=None): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
197 translation = None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
198 seq = sequence if sequence else self.get_spliced_seq() |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
199 if seq: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
200 seqlen = len(seq) / 3 * 3 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
201 if seqlen >= 3: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
202 translation = translate(seq[:seqlen]) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
203 return translation |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
204 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
205 def get_translations(self): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
206 translations = [] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
207 seq = self.get_spliced_seq() |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
208 if seq: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
209 for i in range(3): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
210 translation = self.get_translation(sequence=seq[i:]) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
211 if translation: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
212 translations.append(translation) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
213 return translations |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
214 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
215 def pos_of_cdna_offet(self, offset): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
216 if offset is not None and 0 <= offset < sum(self.blockSizes): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
217 r = range(self.blockCount) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
218 rev = self.strand == '-' |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
219 if rev: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
220 r.reverse() |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
221 nlen = 0 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
222 for x in r: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
223 if offset < nlen + self.blockSizes[x]: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
224 if rev: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
225 return self.chromStart + self.blockStarts[x]\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
226 + self.blockSizes[x] - (offset - nlen) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
227 else: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
228 return self.chromStart + self.blockStarts[x]\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
229 + (offset - nlen) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
230 nlen += self.blockSizes[x] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
231 return None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
232 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
233 def cdna_offset_of_pos(self, pos): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
234 if not self.chromStart <= pos < self.chromEnd: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
235 return -1 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
236 r = range(self.blockCount) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
237 rev = self.strand == '-' |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
238 if rev: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
239 r.reverse() |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
240 nlen = 0 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
241 for x in r: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
242 bStart = self.chromStart + self.blockStarts[x] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
243 bEnd = bStart + self.blockSizes[x] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
244 if bStart <= pos < bEnd: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
245 return nlen + (bEnd - pos if rev else pos - bStart) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
246 nlen += self.blockSizes[x] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
247 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
248 def apply_variant(self, pos, ref, alt): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
249 pos = int(pos) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
250 if not ref or not alt: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
251 print >> sys.stderr, "variant requires ref and alt sequences" |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
252 return |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
253 if not self.chromStart <= pos <= self.chromEnd: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
254 print >> sys.stderr, "variant not in entry %s: %s %d < %d < %d"\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
255 % (self.name, self.strand, self.chromStart, pos, self.chromEnd) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
256 print >> sys.stderr, "%s" % str(self) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
257 return |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
258 if len(ref) != len(alt): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
259 print >> sys.stderr, "variant only works for snp: %s %s"\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
260 % (ref, alt) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
261 return |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
262 if not self.seq: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
263 print >> sys.stderr, "variant entry %s has no seq" % self.name |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
264 return |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
265 """ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
266 if self.strand == '-': |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
267 ref = reverse_complement(ref) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
268 alt = reverse_complement(alt) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
269 """ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
270 bases = list(self.seq) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
271 offset = pos - self.chromStart |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
272 for i in range(len(ref)): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
273 # offset = self.cdna_offset_of_pos(pos+i) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
274 if offset is not None: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
275 bases[offset+i] = alt[i] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
276 else: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
277 print >> sys.stderr,\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
278 "variant offset %s: %s %d < %d < %d"\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
279 % (self.name, self.strand, self.chromStart, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
280 pos+1, self.chromEnd) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
281 print >> sys.stderr, "%s" % str(self) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
282 self.seq = ''.join(bases) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
283 self.variants.append("g.%d%s>%s" % (pos+1, ref, alt)) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
284 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
285 def get_variant_bed(self, pos, ref, alt): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
286 pos = int(pos) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
287 if not ref or not alt: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
288 print >> sys.stderr, "variant requires ref and alt sequences" |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
289 return None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
290 if not self.chromStart <= pos <= self.chromEnd: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
291 print >> sys.stderr,\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
292 "variant not in entry %s: %s %d < %d < %d"\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
293 % (self.name, self.strand, self.chromStart, pos, self.chromEnd) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
294 print >> sys.stderr, "%s" % str(self) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
295 return None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
296 if not self.seq: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
297 print >> sys.stderr, "variant entry %s has no seq" % self.name |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
298 return None |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
299 tbed = BedEntry(chrom=self.chrom, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
300 chromStart=self.chromStart, chromEnd=self.chromEnd, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
301 name=self.name, score=self.score, strand=self.strand, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
302 thickStart=self.chromStart, thickEnd=self.chromEnd, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
303 itemRgb=self.itemRgb, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
304 blockCount=self.blockCount, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
305 blockSizes=self.blockSizes, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
306 blockStarts=self.blockStarts) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
307 bases = list(self.seq) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
308 offset = pos - self.chromStart |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
309 tbed.seq = ''.join(bases[:offset] + list(alt) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
310 + bases[offset+len(ref):]) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
311 if len(ref) != len(alt): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
312 diff = len(alt) - len(ref) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
313 rEnd = pos + len(ref) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
314 # need to adjust blocks |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
315 # change spans blocks, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
316 for x in range(tbed.blockCount): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
317 bStart = tbed.chromStart + tbed.blockStarts[x] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
318 bEnd = bStart + tbed.blockSizes[x] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
319 # change within a block or extends (last block) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
320 # adjust blocksize |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
321 # seq: GGGcatGGG |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
322 # ref c alt tag: GGGtagatGGG |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
323 # ref cat alt a: GGGaGGG |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
324 if bStart <= pos < rEnd < bEnd: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
325 tbed.blockSizes[x] += diff |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
326 return tbed |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
327 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
328 # (start, end) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
329 def get_subrange(self, tstart, tstop, debug=False): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
330 chromStart = self.chromStart |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
331 chromEnd = self.chromEnd |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
332 if debug: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
333 print >> sys.stderr, "%s" % (str(self)) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
334 r = range(self.blockCount) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
335 if self.strand == '-': |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
336 r.reverse() |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
337 bStart = 0 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
338 bEnd = 0 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
339 for x in r: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
340 bEnd = bStart + self.blockSizes[x] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
341 if bStart <= tstart < bEnd: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
342 if self.strand == '+': |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
343 chromStart = self.chromStart + self.blockStarts[x] +\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
344 (tstart - bStart) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
345 else: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
346 chromEnd = self.chromStart + self.blockStarts[x] +\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
347 self.blockSizes[x] - (tstart - bStart) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
348 if bStart <= tstop < bEnd: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
349 if self.strand == '+': |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
350 chromEnd = self.chromStart + self.blockStarts[x] +\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
351 (tstop - bStart) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
352 else: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
353 chromStart = self.chromStart + self.blockStarts[x] +\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
354 self.blockSizes[x] - (tstop - bStart) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
355 if debug: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
356 print >> sys.stderr,\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
357 "%3d %s\t%d\t%d\t%d\t%d\t%d\t%d"\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
358 % (x, self.strand, bStart, bEnd, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
359 tstart, tstop, chromStart, chromEnd) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
360 bStart += self.blockSizes[x] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
361 return(chromStart, chromEnd) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
362 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
363 # get the blocks for sub range |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
364 def get_blocks(self, chromStart, chromEnd): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
365 tblockCount = 0 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
366 tblockSizes = [] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
367 tblockStarts = [] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
368 for x in range(self.blockCount): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
369 bStart = self.chromStart + self.blockStarts[x] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
370 bEnd = bStart + self.blockSizes[x] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
371 if bStart > chromEnd: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
372 break |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
373 if bEnd < chromStart: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
374 continue |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
375 cStart = max(chromStart, bStart) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
376 tblockStarts.append(cStart - chromStart) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
377 tblockSizes.append(min(chromEnd, bEnd) - cStart) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
378 tblockCount += 1 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
379 return (tblockCount, tblockSizes, tblockStarts) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
380 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
381 def trim(self, tstart, tstop, debug=False): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
382 (tchromStart, tchromEnd) =\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
383 self.get_subrange(tstart, tstop, debug=debug) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
384 (tblockCount, tblockSizes, tblockStarts) =\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
385 self.get_blocks(tchromStart, tchromEnd) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
386 tbed = BedEntry( |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
387 chrom=self.chrom, chromStart=tchromStart, chromEnd=tchromEnd, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
388 name=self.name, score=self.score, strand=self.strand, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
389 thickStart=tchromStart, thickEnd=tchromEnd, itemRgb=self.itemRgb, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
390 blockCount=tblockCount, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
391 blockSizes=tblockSizes, blockStarts=tblockStarts) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
392 if self.seq: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
393 ts = tchromStart-self.chromStart |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
394 te = tchromEnd - tchromStart + ts |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
395 tbed.seq = self.seq[ts:te] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
396 return tbed |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
397 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
398 def get_filtered_translations(self, untrimmed=False, filtering=True, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
399 ignore_left_bp=0, ignore_right_bp=0, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
400 debug=False): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
401 translations = [None, None, None] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
402 seq = self.get_spliced_seq() |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
403 ignore = (ignore_left_bp if self.strand == '+' |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
404 else ignore_right_bp) / 3 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
405 block_sum = sum(self.blockSizes) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
406 exon_sizes = [x for x in self.blockSizes] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
407 if self.strand == '-': |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
408 exon_sizes.reverse() |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
409 splice_sites = [sum(exon_sizes[:x]) / 3 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
410 for x in range(1, len(exon_sizes))] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
411 if debug: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
412 print >> sys.stderr, "splice_sites: %s" % splice_sites |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
413 junc = splice_sites[0] if len(splice_sites) > 0 else exon_sizes[0] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
414 if seq: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
415 for i in range(3): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
416 translation = self.get_translation(sequence=seq[i:]) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
417 if translation: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
418 tstart = 0 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
419 tstop = len(translation) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
420 offset = (block_sum - i) % 3 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
421 if debug: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
422 print >> sys.stderr,\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
423 "frame: %d\ttstart: %d tstop: %d offset: %d\t%s"\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
424 % (i, tstart, tstop, offset, translation) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
425 if not untrimmed: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
426 tstart = translation.rfind('*', 0, junc) + 1 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
427 stop = translation.find('*', junc) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
428 tstop = stop if stop >= 0 else len(translation) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
429 offset = (block_sum - i) % 3 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
430 trimmed = translation[tstart:tstop] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
431 if debug: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
432 print >> sys.stderr,\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
433 "frame: %d\ttstart: %d tstop: %d offset: %d\t%s"\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
434 % (i, tstart, tstop, offset, trimmed) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
435 if filtering and tstart > ignore: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
436 continue |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
437 # get genomic locations for start and end |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
438 if self.strand == '+': |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
439 chromStart = self.chromStart + i + (tstart * 3) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
440 chromEnd = self.chromEnd - offset\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
441 - (len(translation) - tstop) * 3 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
442 else: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
443 chromStart = self.chromStart + offset\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
444 + (len(translation) - tstop) * 3 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
445 chromEnd = self.chromEnd - i - (tstart * 3) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
446 # get the blocks for this translation |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
447 (tblockCount, tblockSizes, tblockStarts) =\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
448 self.get_blocks(chromStart, chromEnd) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
449 translations[i] = (chromStart, chromEnd, trimmed, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
450 tblockCount, tblockSizes, tblockStarts) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
451 if debug: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
452 print >> sys.stderr,\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
453 "tblockCount: %d tblockStarts: %s tblockSizes: %s"\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
454 % (tblockCount, tblockStarts, tblockSizes) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
455 return translations |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
456 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
457 def get_seq_id(self, seqtype='unk:unk', reference='', frame=None): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
458 # Ensembl fasta ID format |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
459 # >ID SEQTYPE:STATUS LOCATION GENE TRANSCRIPT |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
460 # >ENSP00000328693 pep:splice chromosome:NCBI35:1:904515:910768:1\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
461 # gene:ENSG00000158815:transcript:ENST00000328693\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
462 # gene_biotype:protein_coding transcript_biotype:protein_coding |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
463 frame_name = '' |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
464 chromStart = self.chromStart |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
465 chromEnd = self.chromEnd |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
466 strand = 1 if self.strand == '+' else -1 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
467 if frame is not None: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
468 block_sum = sum(self.blockSizes) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
469 offset = (block_sum - frame) % 3 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
470 frame_name = '_' + str(frame + 1) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
471 if self.strand == '+': |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
472 chromStart += frame |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
473 chromEnd -= offset |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
474 else: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
475 chromStart += offset |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
476 chromEnd -= frame |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
477 location = "chromosome:%s:%s:%s:%s:%s"\ |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
478 % (reference, self.chrom, chromStart, chromEnd, strand) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
479 seq_id = "%s%s %s %s" % (self.name, frame_name, seqtype, location) |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
480 return seq_id |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
481 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
482 def get_line(self, start_offset=0, end_offset=0): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
483 if start_offset or end_offset: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
484 s_offset = start_offset if start_offset else 0 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
485 e_offset = end_offset if end_offset else 0 |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
486 if s_offset > self.chromStart: |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
487 s_offset = self.chromStart |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
488 chrStart = self.chromStart - s_offset |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
489 chrEnd = self.chromEnd + e_offset |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
490 blkSizes = self.blockSizes |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
491 blkSizes[0] += s_offset |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
492 blkSizes[-1] += e_offset |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
493 blkStarts = self.blockStarts |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
494 for i in range(1, self.blockCount): |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
495 blkStarts[i] += s_offset |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
496 items = [str(x) for x in [self.chrom, chrStart, chrEnd, self.name, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
497 self.score, self.strand, self.thickStart, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
498 self.thickEnd, self.itemRgb, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
499 self.blockCount, |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
500 ','.join([str(x) for x in blkSizes]), |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
501 ','.join([str(x) for x in blkStarts])]] |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
502 return '\t'.join(items) + '\n' |
887e111c0919
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 3fd7be931712e7fa5b281bc8c48104c8583ef7f0
galaxyp
parents:
diff
changeset
|
503 return self.line |