diff retrieve_ensembl_bed.py @ 1:c3d600729b6f draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
author galaxyp
date Mon, 22 Jan 2018 13:13:26 -0500
parents 887e111c0919
children e385fe93df68
line wrap: on
line diff
--- a/retrieve_ensembl_bed.py	Sun Jan 14 14:11:53 2018 -0500
+++ b/retrieve_ensembl_bed.py	Mon Jan 22 13:13:26 2018 -0500
@@ -12,6 +12,8 @@
 #------------------------------------------------------------------------------
 """
 
+from __future__ import print_function
+
 import argparse
 import re
 import sys
@@ -49,7 +51,6 @@
     parser.add_argument('-v', '--verbose', action='store_true', help='Verbose')
     parser.add_argument('-d', '--debug', action='store_true', help='Debug')
     args = parser.parse_args()
-    # print >> sys.stderr, "args: %s" % args
     species = args.species
     out_wtr = open(args.output, 'w') if args.output != '-' else sys.stdout
     biotypes = ';'.join(['biotype=%s' % bt.strip()
@@ -72,24 +73,24 @@
                             selected_regions[chrom] = []
                         selected_regions[chrom].append([start, end, strand])
         if args.debug:
-            print >> sys.stderr, "selected_regions: %s" % selected_regions
+            print("selected_regions: %s" % selected_regions, file=sys.stderr)
 
     def retrieve_region(species, ref, start, stop, strand):
         transcript_count = 0
-        regions = range(start, stop, max_region)
+        regions = list(range(start, stop, max_region))
         if not regions or regions[-1] < stop:
             regions.append(stop)
         for end in regions[1:]:
             bedlines = get_transcripts_bed(species, ref, start, end,
                                            strand=strand, params=biotypes)
             if args.debug:
-                print >> sys.stderr,\
-                    "%s\t%s\tstart: %d\tend: %d\tcDNA transcripts:%d"\
-                    % (species, ref, start, end, len(bedlines))
+                print("%s\t%s\tstart: %d\tend: %d\tcDNA transcripts:%d" %
+                      (species, ref, start, end, len(bedlines)),
+                      file=sys.stderr)
             # start, end, seq
             for i, bedline in enumerate(bedlines):
                 if args.debug:
-                    print >> sys.stderr, "%s\n" % (bedline)
+                    print("%s\n" % (bedline), file=sys.stderr)
                 if not args.ucsc_chrom_names:
                     bedline = re.sub('^[^\t]+', ref, bedline)
                 try:
@@ -100,8 +101,8 @@
                         out_wtr.write("\n")
                         out_wtr.flush()
                 except Exception as e:
-                    print >> sys.stderr,\
-                        "BED error (%s) : %s\n" % (e, bedline)
+                    print("BED error (%s) : %s\n" % (e, bedline),
+                          file=sys.stderr)
             start = end + 1
         return transcript_count
 
@@ -112,8 +113,8 @@
             length = coord_systems['chromosome'][ref]
             ref_lengths[ref] = length
             if args.toplevel:
-                print >> sys.stderr,\
-                    "%s\t%s\tlength: %d" % (species, ref, length)
+                print("%s\t%s\tlength: %d" % (species, ref, length),
+                      file=sys.stderr)
         if selected_regions:
             transcript_count = 0
             for ref in sorted(selected_regions.keys()):
@@ -129,10 +130,10 @@
                                                             strand)
                         if args.debug or args.verbose:
                             length = stop - start
-                            print >> sys.stderr,\
-                                "%s\t%s:%d-%d%s\tlength: %d\ttrancripts:%d"\
-                                % (species, ref, start, stop, strand,
-                                   length, transcript_count)
+                            print("%s\t%s:%d-%d%s\tlength: %d\ttrancripts:%d" %
+                                  (species, ref, start, stop, strand,
+                                   length, transcript_count),
+                                  file=sys.stderr)
         else:
             strand = ''
             start = 0
@@ -140,15 +141,14 @@
                 length = ref_lengths[ref]
                 transcript_count = 0
                 if args.debug:
-                    print >> sys.stderr,\
-                        "Retrieving transcripts: %s\t%s\tlength: %d"\
-                        % (species, ref, length)
+                    print("Retrieving transcripts: %s\t%s\tlength: %d" %
+                          (species, ref, length), file=sys.stderr)
                 transcript_count += retrieve_region(species, ref, start,
                                                     length, strand)
                 if args.debug or args.verbose:
-                    print >> sys.stderr,\
-                        "%s\t%s\tlength: %d\ttrancripts:%d"\
-                        % (species, ref, length, transcript_count)
+                    print("%s\t%s\tlength: %d\ttrancripts:%d" %
+                          (species, ref, length, transcript_count),
+                          file=sys.stderr)
 
 
 if __name__ == "__main__":