Mercurial > repos > dfornika > artic_align_trim
annotate align_trim.py @ 0:defebd1f95b9 draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
author | dfornika |
---|---|
date | Tue, 10 Mar 2020 22:03:49 +0000 |
parents | |
children | 26516cf26444 |
rev | line source |
---|---|
0
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
1 #!/usr/bin/env python |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
2 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
3 # Written by Nick Loman |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
4 # Originally part of the ZiBRA pipeline (zibraproject.org) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
5 # This file adapted from ARTICnetwork 'fieldbioinformatics' pipeline: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
6 # https://github.com/artic-network/fieldbioinformatics |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
7 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
8 import sys |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
9 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
10 from collections import defaultdict |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
11 from copy import copy |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
12 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
13 import pysam |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
14 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
15 def read_bed_file(fn): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
16 bedfile = [] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
17 with open(fn) as csvfile: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
18 reader = csv.reader(csvfile, dialect='excel-tab') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
19 for row in reader: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
20 bedrow = {} |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
21 bedrow['Primer_ID'] = row[3] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
22 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
23 if len(row) >= 6: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
24 # new style bed |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
25 bedrow['direction'] = row[5] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
26 elif len(row) == 5: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
27 # old style without directory |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
28 if 'LEFT' in row[3]: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
29 bedrow['direction'] = '+' |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
30 elif 'RIGHT' in row[3]: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
31 bedrow['direction'] = '-' |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
32 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
33 print("Malformed BED file!", file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
34 raise SystemExit |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
35 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
36 if bedrow['direction'] == '+': |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
37 bedrow['end'] = int(row[2]) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
38 bedrow['start'] = int(row[1]) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
39 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
40 bedrow['end'] = int(row[1]) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
41 bedrow['start'] = int(row[2]) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
42 bedfile.append(bedrow) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
43 return bedfile |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
44 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
45 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
46 def check_still_matching_bases(s): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
47 for flag, length in s.cigartuples: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
48 if flag == 0: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
49 return True |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
50 return False |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
51 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
52 def trim(args, cigar, s, start_pos, end): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
53 if not end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
54 pos = s.pos |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
55 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
56 pos = s.reference_end |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
57 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
58 eaten = 0 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
59 while 1: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
60 ## chomp stuff off until we reach pos |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
61 if end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
62 flag, length = cigar.pop() |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
63 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
64 flag, length = cigar.pop(0) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
65 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
66 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
67 print("Chomped a %s, %s" % (flag, length), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
68 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
69 if flag == 0: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
70 ## match |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
71 #to_trim -= length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
72 eaten += length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
73 if not end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
74 pos += length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
75 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
76 pos -= length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
77 if flag == 1: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
78 ## insertion to the ref |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
79 #to_trim -= length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
80 eaten += length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
81 if flag == 2: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
82 ## deletion to the ref |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
83 #eaten += length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
84 if not end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
85 pos += length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
86 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
87 pos -= length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
88 pass |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
89 if flag == 4: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
90 eaten += length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
91 if not end and pos >= start_pos and flag == 0: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
92 break |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
93 if end and pos <= start_pos and flag == 0: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
94 break |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
95 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
96 #print >>sys.stderr, "pos:%s %s" % (pos, start_pos) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
97 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
98 extra = abs(pos - start_pos) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
99 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
100 print("extra %s" % (extra), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
101 if extra: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
102 if flag == 0: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
103 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
104 print("Inserted a %s, %s" % (0, extra), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
105 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
106 if end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
107 cigar.append((0, extra)) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
108 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
109 cigar.insert(0, (0, extra)) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
110 eaten -= extra |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
111 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
112 if not end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
113 s.pos = pos - extra |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
114 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
115 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
116 print("New pos: %s" % (s.pos), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
117 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
118 if end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
119 cigar.append((4, eaten)) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
120 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
121 cigar.insert(0, (4, eaten)) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
122 oldcigarstring = s.cigarstring |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
123 s.cigartuples = cigar |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
124 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
125 #print >>sys.stderr, s.query_name, oldcigarstring[0:50], s.cigarstring[0:50] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
126 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
127 def find_primer(bed, pos, direction): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
128 from operator import itemgetter |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
129 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
130 closest = min([(abs(p['start'] - pos), p['start'] - pos, p) for p in bed if p['direction'] == direction], key=itemgetter(0)) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
131 return closest |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
132 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
133 def is_correctly_paired(p1, p2): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
134 name1 = p1[2]['Primer_ID'] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
135 name2 = p2[2]['Primer_ID'] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
136 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
137 name1 = name1.replace('_LEFT', '') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
138 name2 = name2.replace('_RIGHT', '') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
139 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
140 return name1 == name2 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
141 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
142 def go(args): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
143 if args.report: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
144 reportfh = open(args.report, "w") |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
145 print("QueryName\tReferenceStart\tReferenceEnd\tPrimerPair\tPrimer1\tPrimer1Start\tPrimer2\tPrimer2Start\tIsSecondary\tIsSupplementary\tStart\tEnd\tCorrectlyPaired", file=reportfh) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
146 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
147 bed = read_bed_file(args.bedfile) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
148 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
149 counter = defaultdict(int) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
150 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
151 infile = pysam.AlignmentFile("-", "rb") |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
152 outfile = pysam.AlignmentFile("-", "wh", template=infile) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
153 for s in infile: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
154 cigar = copy(s.cigartuples) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
155 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
156 ## logic - if alignment start site is _before_ but within X bases of |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
157 ## a primer site, trim it off |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
158 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
159 if s.is_unmapped: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
160 print("%s skipped as unmapped" % (s.query_name), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
161 continue |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
162 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
163 if s.is_supplementary: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
164 print("%s skipped as supplementary" % (s.query_name), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
165 continue |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
166 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
167 p1 = find_primer(bed, s.reference_start, '+') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
168 p2 = find_primer(bed, s.reference_end, '-') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
169 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
170 correctly_paired = is_correctly_paired(p1, p2) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
171 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
172 report = "%s\t%s\t%s\t%s_%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%d" % (s.query_name, s.reference_start, s.reference_end, p1[2]['Primer_ID'], p2[2]['Primer_ID'], p1[2]['Primer_ID'], abs(p1[1]), p2[2]['Primer_ID'], abs(p2[1]), s.is_secondary, s.is_supplementary, p1[2]['start'], p2[2]['end'], correctly_paired) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
173 if args.report: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
174 print(report, file=reportfh) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
175 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
176 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
177 print(report, file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
178 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
179 ## if the alignment starts before the end of the primer, trim to that position |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
180 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
181 try: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
182 if args.start: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
183 primer_position = p1[2]['start'] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
184 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
185 primer_position = p1[2]['end'] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
186 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
187 if s.reference_start < primer_position: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
188 trim(args, cigar, s, primer_position, 0) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
189 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
190 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
191 print("ref start %s >= primer_position %s" % (s.reference_start, primer_position), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
192 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
193 if args.start: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
194 primer_position = p2[2]['start'] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
195 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
196 primer_position = p2[2]['end'] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
197 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
198 if s.reference_end > primer_position: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
199 trim(args, cigar, s, primer_position, 1) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
200 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
201 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
202 print("ref end %s >= primer_position %s" % (s.reference_end, primer_position), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
203 except Exception as e: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
204 print("problem %s" % (e,), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
205 pass |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
206 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
207 if args.normalise: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
208 pair = "%s-%s-%d" % (p1[2]['Primer_ID'], p2[2]['Primer_ID'], s.is_reverse) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
209 counter[pair] += 1 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
210 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
211 if counter[pair] > args.normalise: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
212 continue |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
213 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
214 if not check_still_matching_bases(s): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
215 continue |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
216 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
217 outfile.write(s) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
218 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
219 reportfh.close() |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
220 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
221 def main(): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
222 import argparse |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
223 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
224 parser = argparse.ArgumentParser(description='Trim alignments from an amplicon scheme.') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
225 parser.add_argument('bedfile', help='BED file containing the amplicon scheme') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
226 parser.add_argument('--normalise', type=int, help='Subsample to n coverage per strand') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
227 parser.add_argument('--report', type=str, help='Output report to file') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
228 parser.add_argument('--start', action='store_true', help='Trim to start of primers instead of ends') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
229 parser.add_argument('--verbose', action='store_true', help='Debug mode') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
230 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
231 args = parser.parse_args() |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
232 go(args) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
233 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
234 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
235 if __name__ == "__main__": |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
236 main() |