retrieve_ensembl_bed: retrieve_ensembl

comparison retrieve_ensembl_bed.py @ 1:c3d600729b6f draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1

author	galaxyp
date	Mon, 22 Jan 2018 13:13:26 -0500
parents	887e111c0919
children	e385fe93df68

comparison

equal deleted inserted replaced

-:887e111c0919
+:c3d600729b6f
 #
 #  James E Johnson
 #
 #------------------------------------------------------------------------------
 """
+from __future__ import print_function
 import argparse
 import re
 import sys
 'output',
 help='Output BED filepath, or for stdout: "-"')
 parser.add_argument('-v', '--verbose', action='store_true', help='Verbose')
 parser.add_argument('-d', '--debug', action='store_true', help='Debug')
 args = parser.parse_args()
-# print >> sys.stderr, "args: %s" % args
 species = args.species
 out_wtr = open(args.output, 'w') if args.output != '-' else sys.stdout
 biotypes = ';'.join(['biotype=%s' % bt.strip()
 for biotype in args.biotypes
 for bt in biotype.split(',') if bt.strip()])
 if chrom:
 if chrom not in selected_regions:
 selected_regions[chrom] = []
 selected_regions[chrom].append([start, end, strand])
 if args.debug:
-print >> sys.stderr, "selected_regions: %s" % selected_regions
+print("selected_regions: %s" % selected_regions, file=sys.stderr)
 def retrieve_region(species, ref, start, stop, strand):
 transcript_count = 0
-regions = range(start, stop, max_region)
+regions = list(range(start, stop, max_region))
 if not regions or regions[-1] < stop:
 regions.append(stop)
 for end in regions[1:]:
 bedlines = get_transcripts_bed(species, ref, start, end,
 strand=strand, params=biotypes)
 if args.debug:
-print >> sys.stderr,\
+print("%s\t%s\tstart: %d\tend: %d\tcDNA transcripts:%d" %
-"%s\t%s\tstart: %d\tend: %d\tcDNA transcripts:%d"\
+(species, ref, start, end, len(bedlines)),
-% (species, ref, start, end, len(bedlines))
+file=sys.stderr)
 # start, end, seq
 for i, bedline in enumerate(bedlines):
 if args.debug:
-print >> sys.stderr, "%s\n" % (bedline)
+print("%s\n" % (bedline), file=sys.stderr)
 if not args.ucsc_chrom_names:
 bedline = re.sub('^[^\t]+', ref, bedline)
 try:
 if out_wtr:
 out_wtr.write(bedline.replace(',\t', '\t')
 if args.extended_bed
 else str(bed_from_line(bedline)))
 out_wtr.write("\n")
 out_wtr.flush()
 except Exception as e:
-print >> sys.stderr,\
+print("BED error (%s) : %s\n" % (e, bedline),
-"BED error (%s) : %s\n" % (e, bedline)
+file=sys.stderr)
 start = end + 1
 return transcript_count
 coord_systems = get_toplevel(species)
 if 'chromosome' in coord_systems:
 ref_lengths = dict()
 for ref in sorted(coord_systems['chromosome'].keys()):
 length = coord_systems['chromosome'][ref]
 ref_lengths[ref] = length
 if args.toplevel:
-print >> sys.stderr,\
+print("%s\t%s\tlength: %d" % (species, ref, length),
-"%s\t%s\tlength: %d" % (species, ref, length)
+file=sys.stderr)
 if selected_regions:
 transcript_count = 0
 for ref in sorted(selected_regions.keys()):
 if ref in ref_lengths:
 for reg in selected_regions[ref]:
 transcript_count += retrieve_region(species, ref,
 start, stop,
 strand)
 if args.debug or args.verbose:
 length = stop - start
-print >> sys.stderr,\
+print("%s\t%s:%d-%d%s\tlength: %d\ttrancripts:%d" %
-"%s\t%s:%d-%d%s\tlength: %d\ttrancripts:%d"\
+(species, ref, start, stop, strand,
-% (species, ref, start, stop, strand,
+length, transcript_count),
-length, transcript_count)
+file=sys.stderr)
 else:
 strand = ''
 start = 0
 for ref in sorted(ref_lengths.keys()):
 length = ref_lengths[ref]
 transcript_count = 0
 if args.debug:
-print >> sys.stderr,\
+print("Retrieving transcripts: %s\t%s\tlength: %d" %
-"Retrieving transcripts: %s\t%s\tlength: %d"\
+(species, ref, length), file=sys.stderr)
-% (species, ref, length)
 transcript_count += retrieve_region(species, ref, start,
 length, strand)
 if args.debug or args.verbose:
-print >> sys.stderr,\
+print("%s\t%s\tlength: %d\ttrancripts:%d" %
-"%s\t%s\tlength: %d\ttrancripts:%d"\
+(species, ref, length, transcript_count),
-% (species, ref, length, transcript_count)
+file=sys.stderr)
 if __name__ == "__main__":
 __main__()

Mercurial > repos > galaxyp > retrieve_ensembl_bed

comparison retrieve_ensembl_bed.py @ 1:c3d600729b6f draft