Mercurial > repos > galaxyp > map_peptides_to_bed
changeset 0:c770d523bd28 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 34a8020a7810edc543c6adf6ec58f5437302b703
| author | galaxyp | 
|---|---|
| date | Tue, 12 Jan 2016 20:25:34 -0500 | 
| parents | |
| children | f05f0df71026 | 
| files | map_peptides_to_bed.py map_peptides_to_bed.xml test-data/mapped_peptides.bed test-data/peptides.tsv test-data/translated_bed_sequences.bed | 
| diffstat | 5 files changed, 467 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/map_peptides_to_bed.py Tue Jan 12 20:25:34 2016 -0500 @@ -0,0 +1,333 @@ +#!/usr/bin/env python +""" +# +#------------------------------------------------------------------------------ +# University of Minnesota +# Copyright 2014, Regents of the University of Minnesota +#------------------------------------------------------------------------------ +# Author: +# +# James E Johnson +# +#------------------------------------------------------------------------------ +""" + +""" +Input: list of protein_accessions, peptide_sequence + GFF3 with fasta +Output: GFF3 of peptides + +Filter: Must cross splice boundary + +""" + +import sys,re,os.path +import tempfile +import optparse +from optparse import OptionParser +from Bio.Seq import reverse_complement, transcribe, back_transcribe, translate + +class BedEntry( object ): + def __init__(self, line): + self.line = line + try: + fields = line.rstrip('\r\n').split('\t') + (chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts) = fields[0:12] + seq = fields[12] if len(fields) > 12 else None + self.chrom = chrom + self.chromStart = int(chromStart) + self.chromEnd = int(chromEnd) + self.name = name + self.score = int(score) + self.strand = strand + self.thickStart = int(thickStart) + self.thickEnd = int(thickEnd) + self.itemRgb = itemRgb + self.blockCount = int(blockCount) + self.blockSizes = [int(x) for x in blockSizes.split(',')] + self.blockStarts = [int(x) for x in blockStarts.split(',')] + self.seq = seq + except Exception, e: + print >> sys.stderr, "Unable to read Bed entry" % e + exit(1) + def __str__(self): + return '%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%s\t%s%s' % ( + self.chrom, self.chromStart, self.chromEnd, self.name, self.score, self.strand, self.thickStart, self.thickEnd, self.itemRgb, self.blockCount, + ','.join([str(x) for x in self.blockSizes]), + ','.join([str(x) for x in self.blockStarts]), + '\t%s' % self.seq if self.seq else '') + def get_splice_junctions(self): + splice_juncs = [] + for i in range(self.blockCount - 1): + splice_junc = "%s:%d_%d" % (self.chrom, self.chromStart + self.blockSizes[i], self.chromStart + self.blockStarts[i+1]) + splice_juncs.append(splice_junc) + return splice_juncs + def get_exon_seqs(self): + exons = [] + for i in range(self.blockCount): + # splice_junc = "%s:%d_%d" % (self.chrom, self.chromStart + self.blockSizes[i], self.chromStart + self.blockStarts[i+1]) + exons.append(self.seq[self.blockStarts[i]:self.blockStarts[i] + self.blockSizes[i]]) + if self.strand == '-': #reverse complement + exons.reverse() + for i,s in enumerate(exons): + exons[i] = reverse_complement(s) + return exons + def get_spliced_seq(self): + seq = ''.join(self.get_exon_seqs()) + return seq + def get_translation(self,sequence=None): + translation = None + seq = sequence if sequence else self.get_spliced_seq() + if seq: + seqlen = len(seq) / 3 * 3; + if seqlen >= 3: + translation = translate(seq[:seqlen]) + return translation + def get_translations(self): + translations = [] + seq = self.get_spliced_seq() + if seq: + for i in range(3): + translation = self.get_translation(sequence=seq[i:]) + if translation: + translations.append(translation) + return translations + ## (start,end) + def get_subrange(self,tstart,tstop): + chromStart = self.chromStart + chromEnd = self.chromEnd + r = range(self.blockCount) + if self.strand == '-': + r.reverse() + bStart = 0 + for x in r: + bEnd = bStart + self.blockSizes[x] + ## print >> sys.stderr, "%d chromStart: %d chromEnd: %s bStart: %s bEnd: %d" % (x,chromStart,chromEnd,bStart,bEnd) + if bStart <= tstart < bEnd: + if self.strand == '+': + chromStart = self.chromStart + self.blockStarts[x] + (tstart - bStart) + else: + chromEnd = self.chromStart + self.blockStarts[x] + self.blockSizes[x] - (tstart - bStart) + if bStart <= tstop < bEnd: + if self.strand == '+': + chromEnd = self.chromStart + self.blockStarts[x] + (tstop - bStart) + else: + chromStart = self.chromStart + self.blockStarts[x] + self.blockSizes[x] - (tstop - bStart) + bStart += self.blockSizes[x] + return(chromStart,chromEnd) + #get the blocks for sub range + def get_blocks(self,chromStart,chromEnd): + tblockCount = 0 + tblockSizes = [] + tblockStarts = [] + for x in range(self.blockCount): + bStart = self.chromStart + self.blockStarts[x] + bEnd = bStart + self.blockSizes[x] + if bStart > chromEnd: + break + if bEnd < chromStart: + continue + cStart = max(chromStart,bStart) + tblockStarts.append(cStart - chromStart) + tblockSizes.append(min(chromEnd,bEnd) - cStart) + tblockCount += 1 + print >> sys.stderr, "tblockCount: %d tblockStarts: %s tblockSizes: %s" % (tblockCount,tblockStarts,tblockSizes) + return (tblockCount,tblockSizes,tblockStarts) + + ## [[start,end,seq,blockCount,blockSizes,blockStarts],[start,end,seq,blockCount,blockSizes,blockStarts],[start,end,seq,blockCount,blockSizes,blockStarts]] + ## filter: ignore translation if stop codon in first exon after ignore_left_bp + def get_filterd_translations(self,untrimmed=False,filtering=True,ignore_left_bp=0,ignore_right_bp=0): + translations = [None,None,None,None,None,None] + seq = self.get_spliced_seq() + ignore = (ignore_left_bp if self.strand == '+' else ignore_right_bp) / 3 + block_sum = sum(self.blockSizes) + exon_sizes = self.blockSizes + if self.strand == '-': + exon_sizes.reverse() + splice_sites = [sum(exon_sizes[:x]) / 3 for x in range(1,len(exon_sizes))] + print >> sys.stderr, "splice_sites: %s" % splice_sites + junc = splice_sites[0] if len(splice_sites) > 0 else exon_sizes[0] + if seq: + for i in range(3): + translation = self.get_translation(sequence=seq[i:]) + if translation: + tstart = 0 + tstop = len(translation) + if not untrimmed: + tstart = translation.rfind('*',0,junc) + 1 + stop = translation.find('*',junc) + tstop = stop if stop >= 0 else len(translation) + if filtering and tstart > ignore: + continue + trimmed = translation[tstart:tstop] + #get genomic locations for start and end + offset = (block_sum - i) % 3 + print >> sys.stderr, "tstart: %d tstop: %d offset: %d" % (tstart,tstop,offset) + if self.strand == '+': + chromStart = self.chromStart + i + (tstart * 3) + chromEnd = self.chromEnd - offset - (len(translation) - tstop) * 3 + else: + chromStart = self.chromStart + offset + (len(translation) - tstop) * 3 + chromEnd = self.chromEnd - i - (tstart * 3) + #get the blocks for this translation + tblockCount = 0 + tblockSizes = [] + tblockStarts = [] + for x in range(self.blockCount): + bStart = self.chromStart + self.blockStarts[x] + bEnd = bStart + self.blockSizes[x] + if bStart > chromEnd: + break + if bEnd < chromStart: + continue + cStart = max(chromStart,bStart) + tblockStarts.append(cStart - chromStart) + tblockSizes.append(min(chromEnd,bEnd) - cStart) + tblockCount += 1 + print >> sys.stderr, "tblockCount: %d tblockStarts: %s tblockSizes: %s" % (tblockCount,tblockStarts,tblockSizes) + translations[i] = [chromStart,chromEnd,trimmed,tblockCount,tblockSizes,tblockStarts] + return translations + def get_seq_id(self,seqtype='unk:unk',reference='',frame=None): + ## Ensembl fasta ID format + # >ID SEQTYPE:STATUS LOCATION GENE TRANSCRIPT + # >ENSP00000328693 pep:splice chromosome:NCBI35:1:904515:910768:1 gene:ENSG00000158815:transcript:ENST00000328693 gene_biotype:protein_coding transcript_biotype:protein_coding + frame_name = '' + chromStart = self.chromStart + chromEnd = self.chromEnd + strand = 1 if self.strand == '+' else -1 + if frame != None: + block_sum = sum(self.blockSizes) + offset = (block_sum - frame) % 3 + frame_name = '_' + str(frame + 1) + if self.strand == '+': + chromStart += frame + chromEnd -= offset + else: + chromStart += offset + chromEnd -= frame + location = "chromosome:%s:%s:%s:%s:%s" % (reference,self.chrom,chromStart,chromEnd,strand) + seq_id = "%s%s %s %s" % (self.name,frame_name,seqtype,location) + return seq_id + def get_line(self, start_offset = 0, end_offset = 0): + if start_offset or end_offset: + s_offset = start_offset if start_offset else 0 + e_offset = end_offset if end_offset else 0 + if s_offset > self.chromStart: + s_offset = self.chromStart + chrStart = self.chromStart - s_offset + chrEnd = self.chromEnd + e_offset + blkSizes = self.blockSizes + blkSizes[0] += s_offset + blkSizes[-1] += e_offset + blkStarts = self.blockStarts + for i in range(1,self.blockCount): + blkStarts[i] += s_offset + items = [str(x) for x in [self.chrom,chrStart,chrEnd,self.name,self.score,self.strand,self.thickStart,self.thickEnd,self.itemRgb,self.blockCount,','.join([str(x) for x in blkSizes]),','.join([str(x) for x in blkStarts])]] + return '\t'.join(items) + '\n' + return self.line + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-t', '--translated_bed', dest='translated_bed', default=None, help='A bed file with added 13th column having a translation' ) + parser.add_option( '-i', '--input', dest='input', default=None, help='Tabular file with peptide_sequence column' ) + parser.add_option( '-p', '--peptide_column', type='int', dest='peptide_column', default=1, help='column ordinal with peptide sequence' ) + parser.add_option( '-n', '--name_column', type='int', dest='name_column', default=None, help='column ordinal with protein name' ) + parser.add_option( '-s', '--start_column', type='int', dest='start_column', default=None, help='column with peptide start position in protein' ) + parser.add_option( '-B', '--bed', dest='bed', default=None, help='Output a bed file with added 13th column having translation' ) + ## parser.add_option( '-G', '--gff3', dest='gff', default=None, help='Output translations to a GFF3 file' ) + ## parser.add_option( '-f', '--fasta', dest='fasta', default=None, help='Protein fasta' ) + parser.add_option( '-T', '--gffTags', dest='gffTags', action='store_true', default=False, help='Add #gffTags to bed output for IGV' ) + parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr' ) + (options, args) = parser.parse_args() + # Input files + if options.input != None: + try: + inputPath = os.path.abspath(options.input) + inputFile = open(inputPath, 'r') + except Exception, e: + print >> sys.stderr, "failed: %s" % e + exit(2) + else: + inputFile = sys.stdin + inputBed = None + if options.translated_bed != None: + inputBed = open(os.path.abspath(options.translated_bed),'r') + peptide_column = options.peptide_column - 1 + name_column = options.name_column - 1 if options.name_column else None + start_column = options.start_column - 1 if options.start_column else None + # Read in peptides + # peps[prot_name] = [seq] + prot_peps = dict() + unassigned_peps = set() + try: + for i, line in enumerate( inputFile ): + ## print >> sys.stderr, "%3d\t%s" % (i,line) + if line.startswith('#'): + continue + fields = line.rstrip('\r\n').split('\t') + ## print >> sys.stderr, "%3d\t%s" % (i,fields) + if peptide_column < len(fields): + peptide = fields[peptide_column] + prot_name = fields[name_column] if name_column is not None and name_column < len(fields) else None + if prot_name: + offset = fields[start_column] if start_column is not None and start_column < len(fields) else -1 + if prot_name not in prot_peps: + prot_peps[prot_name] = dict() + prot_peps[prot_name][peptide] = offset + else: + unassigned_peps.add(peptide) + if options.debug: + print >> sys.stderr, "prot_peps: %s" % prot_peps + print >> sys.stderr, "unassigned_peps: %s" % unassigned_peps + except Exception, e: + print >> sys.stderr, "failed: Error reading %s - %s" % (options.input if options.input else 'stdin',e) + exit(1) + # Output files + bed_fh = None + ## gff_fh = None + ## gff_fa_file = None + gff_fa = None + outFile = None + if options.bed: + bed_fh = open(options.bed,'w') + bed_fh.write('track name="%s" type=bedDetail description="%s" \n' % ('novel_junction_peptides','test')) + if options.gffTags: + bed_fh.write('#gffTags\n') + ## if options.gff: + ## gff_fh = open(options.gff,'w') + ## gff_fh.write("##gff-version 3.2.1\n") + ## if options.reference: + ## gff_fh.write("##genome-build %s %s\n" % (options.refsource if options.refsource else 'unknown', options.reference)) + try: + for i, line in enumerate( inputBed ): + ## print >> sys.stderr, "%3d:\t%s" % (i,line) + if line.startswith('track'): + continue + entry = BedEntry(line) + if entry.name in prot_peps: + for (peptide,offset) in prot_peps[entry.name].iteritems(): + if offset < 0: + offset = entry.seq.find(peptide) + if options.debug: + print >> sys.stderr, "%s\t%s\t%d\t%s\n" % (entry.name, peptide,offset,entry.seq) + if offset >= 0: + tstart = offset * 3 + tstop = tstart + len(peptide) * 3 + if options.debug: + print >> sys.stderr, "%d\t%d\t%d" % (offset,tstart,tstop) + (pepStart,pepEnd) = entry.get_subrange(tstart,tstop) + if options.debug: + print >> sys.stderr, "%d\t%d\t%d" % (offset,pepStart,pepEnd) + if bed_fh: + entry.thickStart = pepStart + entry.thickEnd = pepEnd + bedfields = str(entry).split('\t') + if options.gffTags: + bedfields[3] = "ID=%s;Name=%s" % (entry.name,peptide) + bed_fh.write("%s\t%s\t%s\n" % ('\t'.join(bedfields[:12]),peptide,entry.seq)) + except Exception, e: + print >> sys.stderr, "failed: Error reading %s - %s" % (options.input if options.input else 'stdin',e) + +if __name__ == "__main__" : __main__() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/map_peptides_to_bed.xml Tue Jan 12 20:25:34 2016 -0500 @@ -0,0 +1,68 @@ +<tool id="map_peptides_to_bed" name="Map peptides to a bed file" version="0.1.0"> + <requirements> + <requirement type="package" version="1.62">biopython</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command><![CDATA[ + map_peptides_to_bed.py + --translated_bed="$translated_bed" + --input="$input" + #if $peptide_column: + --peptide_column=$peptide_column + #end if + #if $name_column: + --name_column=$name_column + #end if + #if $start_column: + --start_column=$start_column + #end if + $gffTags + --bed="$mapped_peptides" + ]]></command> + <inputs> + <param name="translated_bed" type="data" format="bed" label="Translated bed with IDs to match in the input" help=""/> + <param name="input" type="data" format="tabular" label="Identified Peptides" help=""/> + <param name="peptide_column" type="data_column" data_ref="input" label="peptide column" optional="true" + help="Defaults to first column"/> + <param name="name_column" type="data_column" data_ref="input" label="protein name column" optional="true" + help="The name in this column must match the name column in the Translate bed"/> + <param name="start_column" type="data_column" data_ref="input" label="peptide offset column" optional="true" + help="The offset in AnimoAcids of the peptide from the start of the protein sequence"/> + <param name="gffTags" type="boolean" truevalue="--gffTags" falsevalue="" checked="true" label="Use #gffTags in output" help=""/> + </inputs> + <outputs> + <data name="mapped_peptides" format="bed" /> + </outputs> + <tests> + <test> + <param name="translated_bed" type="data" ftype="bed" value="translated_bed_sequences.bed"/> + <param name="input" type="data" ftype="tabular" value="peptides.tsv"/> + <param name="peptide_column" value="2"/> + <param name="name_column" value="1"/> + <output name="mapped_peptides" file="mapped_peptides.bed"/> + </test> + </tests> + <help><![CDATA[ + Usage: map_peptides_to_bed.py [options] + +Options: + -h, --help show this help message and exit + -t TRANSLATED_BED, --translated_bed=TRANSLATED_BED + A bed file with added 13th column having a translation + -i INPUT, --input=INPUT + Tabular file with peptide_sequence column + -p PEPTIDE_COLUMN, --peptide_column=PEPTIDE_COLUMN + column ordinal with peptide sequence + -n NAME_COLUMN, --name_column=NAME_COLUMN + column ordinal with protein name + -s START_COLUMN, --start_column=START_COLUMN + column with peptide start position in protein + -B BED, --bed=BED Output a bed file with added 13th column having + translation + -T, --gffTags Add #gffTags to bed output for IGV + -d, --debug Turn on wrapper debugging to stderr + + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mapped_peptides.bed Tue Jan 12 20:25:34 2016 -0500 @@ -0,0 +1,27 @@ +track name="novel_junction_peptides" type=bedDetail description="test" +#gffTags +15 40902460 40907575 ID=JUNC00019210_2;Name=RNGRNKKLEDNYCEIT 1 + 40902484 40907575 255,0,0 2 35,37 0,5078 RNGRNKKLEDNYCEIT SYENSEKVRNGRNKKLEDNYCEIT +15 40902460 40907575 ID=JUNC00019210_2;Name=SYENSEKVR 1 + 40902460 40902487 255,0,0 2 35,37 0,5078 SYENSEKVR SYENSEKVRNGRNKKLEDNYCEIT +15 40902461 40907549 ID=JUNC00019210_3;Name=KIVRKSEMEGI 1 + 40902467 40907543 255,0,0 2 34,11 0,5077 KIVRKSEMEGI HMKIVRKSEMEGIRN +9 17406 18053 ID=JUNC00000003_1;Name=LDPLAGAVTKTHV 1 - 17421 17460 255,0,0 2 73,26 0,621 LDPLAGAVTKTHV APWTSGPCRYKKYVFLDPLAGAVTKTHVMLGAE +9 17406 18053 ID=JUNC00000003_1;Name=LDPLAGAVTKTHVMLGAE 1 - 17406 17460 255,0,0 2 73,26 0,621 LDPLAGAVTKTHVMLGAE APWTSGPCRYKKYVFLDPLAGAVTKTHVMLGAE +9 17406 18053 ID=JUNC00000003_1;Name=APWTSGPCRYKKYVF 1 - 17460 18053 255,0,0 2 73,26 0,621 APWTSGPCRYKKYVF APWTSGPCRYKKYVFLDPLAGAVTKTHVMLGAE +9 17404 18051 ID=JUNC00000003_3;Name=CLPGPPGWCCNKDPCDAGGRD 1 - 17404 17467 255,0,0 2 75,24 0,623 CLPGPPGWCCNKDPCDAGGRD PLDERALQVQEVCLPGPPGWCCNKDPCDAGGRD +9 17404 18051 ID=JUNC00000003_3;Name=PLDERALQVQEVCLPG 1 - 17455 18051 255,0,0 2 75,24 0,623 PLDERALQVQEVCLPG PLDERALQVQEVCLPGPPGWCCNKDPCDAGGRD +8 27369376 27370079 ID=JUNC00000874_2;Name=PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA 1 + 27369376 27370079 255,0,0 2 51,48 0,655 PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA +8 27369376 27370079 ID=JUNC00000874_2;Name=DMSHGYVTVKGYHKA 1 + 27369397 27370046 255,0,0 2 51,48 0,655 DMSHGYVTVKGYHKA PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA +7 148909514 148910831 ID=JUNC00002152_1;Name=DQQDLADRDIPTDPNSGENKSLSSQHMTFCHGS 1 + 148909514 148910831 255,0,0 2 60,39 0,1278 DQQDLADRDIPTDPNSGENKSLSSQHMTFCHGS DQQDLADRDIPTDPNSGENKSLSSQHMTFCHGS +7 148909514 148910831 ID=JUNC00002152_1;Name=DLADRDIPTDP 1 + 148909523 148909556 255,0,0 2 60,39 0,1278 DLADRDIPTDP DQQDLADRDIPTDPNSGENKSLSSQHMTFCHGS +7 148909515 148910811 ID=JUNC00002152_2;Name=IWQTEIFPRI 1 + 148909524 148909554 255,0,0 2 59,19 0,1277 IWQTEIFPRI ISRIWQTEIFPRIPIQVRTRVSHLST +7 148909515 148910811 ID=JUNC00002152_2;Name=IFPRIPIQVRTRVSHL 1 + 148909539 148910805 255,0,0 2 59,19 0,1277 IFPRIPIQVRTRVSHL ISRIWQTEIFPRIPIQVRTRVSHLST +6 41766614 41767580 ID=JUNC00002625_1;Name=LKDSGGLAVIIERRLGSMSSLT 1 - 41766614 41767580 255,0,0 2 53,13 0,953 LKDSGGLAVIIERRLGSMSSLT LKDSGGLAVIIERRLGSMSSLT +6 41766614 41767580 ID=JUNC00002625_1;Name=DSGGLAVIIERR 1 - 41766638 41767574 255,0,0 2 53,13 0,953 DSGGLAVIIERR LKDSGGLAVIIERRLGSMSSLT +6 41766614 41767580 ID=JUNC00002625_1;Name=GLAVIIERRLGSMSS 1 - 41766620 41766665 255,0,0 2 53,13 0,953 GLAVIIERRLGSMSS LKDSGGLAVIIERRLGSMSSLT +6 41766612 41767578 ID=JUNC00002625_3;Name=KRFRWSGRNHREKIGVHVVFDQ 1 - 41766612 41767578 255,0,0 2 55,11 0,955 KRFRWSGRNHREKIGVHVVFDQ KRFRWSGRNHREKIGVHVVFDQ +6 41766612 41767578 ID=JUNC00002625_3;Name=FRWSGR 1 - 41766654 41767572 255,0,0 2 55,11 0,955 FRWSGR KRFRWSGRNHREKIGVHVVFDQ +6 41766612 41767578 ID=JUNC00002625_3;Name=NHREKIGVHVVFD 1 - 41766615 41766654 255,0,0 2 55,11 0,955 NHREKIGVHVVFD KRFRWSGRNHREKIGVHVVFDQ +6 84856497 84862316 ID=JUNC00002772_1;Name=LKMKSEAVMNQFENSMRRYL 1 - 84856497 84862316 255,0,0 2 7,53 0,5766 LKMKSEAVMNQFENSMRRYL LKMKSEAVMNQFENSMRRYL +6 84856497 84862316 ID=JUNC00002772_1;Name=MNQFENSMRRYL 1 - 84856497 84862292 255,0,0 2 7,53 0,5766 MNQFENSMRRYL LKMKSEAVMNQFENSMRRYL +6 84856497 84862316 ID=JUNC00002772_1;Name=LKMKSEAVMNQFEN 1 - 84862274 84862316 255,0,0 2 7,53 0,5766 LKMKSEAVMNQFEN LKMKSEAVMNQFENSMRRYL +6 84856497 84862316 ID=JUNC00002772_1;Name=LKMKSEAV 1 - 84862292 84862316 255,0,0 2 7,53 0,5766 LKMKSEAV LKMKSEAVMNQFENSMRRYL +6 84856497 84862316 ID=JUNC00002772_1;Name=KSEAVMNQFENSMR 1 - 84862265 84862307 255,0,0 2 7,53 0,5766 KSEAVMNQFENSMR LKMKSEAVMNQFENSMRRYL
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/peptides.tsv Tue Jan 12 20:25:34 2016 -0500 @@ -0,0 +1,25 @@ +JUNC00019210_2 RNGRNKKLEDNYCEIT +JUNC00019210_2 SYENSEKVR +JUNC00019210_3 KIVRKSEMEGI +JUNC00000003_1 LDPLAGAVTKTHVMLGAE +JUNC00000003_1 APWTSGPCRYKKYVF +JUNC00000003_1 LDPLAGAVTKTHV +JUNC00000003_3 PLDERALQVQEVCLPG +JUNC00000003_3 CLPGPPGWCCNKDPCDAGGRD +JUNC00000874_2 PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA +JUNC00000874_2 DMSHGYVTVKGYHKA +JUNC00002152_1 DQQDLADRDIPTDPNSGENKSLSSQHMTFCHGS +JUNC00002152_1 DLADRDIPTDP +JUNC00002152_2 IWQTEIFPRI +JUNC00002152_2 IFPRIPIQVRTRVSHL +JUNC00002625_1 LKDSGGLAVIIERRLGSMSSLT +JUNC00002625_1 GLAVIIERRLGSMSS +JUNC00002625_1 DSGGLAVIIERR +JUNC00002625_3 FRWSGR +JUNC00002625_3 KRFRWSGRNHREKIGVHVVFDQ +JUNC00002625_3 NHREKIGVHVVFD +JUNC00002772_1 LKMKSEAVMNQFENSMRRYL +JUNC00002772_1 LKMKSEAVMNQFEN +JUNC00002772_1 LKMKSEAV +JUNC00002772_1 MNQFENSMRRYL +JUNC00002772_1 KSEAVMNQFENSMR
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/translated_bed_sequences.bed Tue Jan 12 20:25:34 2016 -0500 @@ -0,0 +1,14 @@ +track name="novel_junctioni_translations" description="test" +15 40902460 40907575 JUNC00019210_2 1 + 40902460 40907575 255,0,0 2 35,37 0,5078 SYENSEKVRNGRNKKLEDNYCEIT +15 40902461 40907549 JUNC00019210_3 1 + 40902461 40907549 255,0,0 2 34,11 0,5077 HMKIVRKSEMEGIRN +9 17406 18053 JUNC00000003_1 1 - 17406 18053 255,0,0 2 73,26 0,621 APWTSGPCRYKKYVFLDPLAGAVTKTHVMLGAE +9 17438 18052 JUNC00000003_2 1 - 17438 18052 255,0,0 2 41,25 0,589 PPGRAGPAGTRSMSSWTPWLVL +9 17404 18051 JUNC00000003_3 1 - 17404 18051 255,0,0 2 75,24 0,623 PLDERALQVQEVCLPGPPGWCCNKDPCDAGGRD +8 27369376 27370079 JUNC00000874_2 1 + 27369376 27370079 255,0,0 2 51,48 0,655 PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA +7 99077597 99079392 JUNC00001897_1 1 + 99077597 99079392 255,0,0 2 64,35 0,1760 HLLSLFFQLGVHICIPNFRNNSVSHCTQLRVLL +7 99077598 99079360 JUNC00001897_2 1 + 99077598 99079360 255,0,0 2 63,3 0,1759 IYCLYFSNLVCISVYPTSEITV +7 148909514 148910831 JUNC00002152_1 1 + 148909514 148910831 255,0,0 2 60,39 0,1278 DQQDLADRDIPTDPNSGENKSLSSQHMTFCHGS +7 148909515 148910811 JUNC00002152_2 1 + 148909515 148910811 255,0,0 2 59,19 0,1277 ISRIWQTEIFPRIPIQVRTRVSHLST +6 41766614 41767580 JUNC00002625_1 1 - 41766614 41767580 255,0,0 2 53,13 0,953 LKDSGGLAVIIERRLGSMSSLT +6 41766612 41767578 JUNC00002625_3 1 - 41766612 41767578 255,0,0 2 55,11 0,955 KRFRWSGRNHREKIGVHVVFDQ +6 84856497 84862316 JUNC00002772_1 1 - 84856497 84862316 255,0,0 2 7,53 0,5766 LKMKSEAVMNQFENSMRRYL
