Mercurial > repos > dfornika > artic_align_trim
annotate align_trim.py @ 10:1ae3d853cac8 draft default tip
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit fc533961a332436c344ecbed6e7b0a17e108ae50-dirty"
author | dfornika |
---|---|
date | Tue, 17 Mar 2020 23:39:02 +0000 |
parents | 26516cf26444 |
children |
rev | line source |
---|---|
0
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
1 #!/usr/bin/env python |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
2 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
3 # Written by Nick Loman |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
4 # Originally part of the ZiBRA pipeline (zibraproject.org) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
5 # This file adapted from ARTICnetwork 'fieldbioinformatics' pipeline: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
6 # https://github.com/artic-network/fieldbioinformatics |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
7 |
6
26516cf26444
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
0
diff
changeset
|
8 import csv |
0
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
9 import sys |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
10 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
11 from collections import defaultdict |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
12 from copy import copy |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
13 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
14 import pysam |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
15 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
16 def read_bed_file(fn): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
17 bedfile = [] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
18 with open(fn) as csvfile: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
19 reader = csv.reader(csvfile, dialect='excel-tab') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
20 for row in reader: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
21 bedrow = {} |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
22 bedrow['Primer_ID'] = row[3] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
23 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
24 if len(row) >= 6: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
25 # new style bed |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
26 bedrow['direction'] = row[5] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
27 elif len(row) == 5: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
28 # old style without directory |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
29 if 'LEFT' in row[3]: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
30 bedrow['direction'] = '+' |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
31 elif 'RIGHT' in row[3]: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
32 bedrow['direction'] = '-' |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
33 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
34 print("Malformed BED file!", file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
35 raise SystemExit |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
36 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
37 if bedrow['direction'] == '+': |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
38 bedrow['end'] = int(row[2]) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
39 bedrow['start'] = int(row[1]) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
40 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
41 bedrow['end'] = int(row[1]) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
42 bedrow['start'] = int(row[2]) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
43 bedfile.append(bedrow) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
44 return bedfile |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
45 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
46 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
47 def check_still_matching_bases(s): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
48 for flag, length in s.cigartuples: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
49 if flag == 0: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
50 return True |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
51 return False |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
52 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
53 def trim(args, cigar, s, start_pos, end): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
54 if not end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
55 pos = s.pos |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
56 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
57 pos = s.reference_end |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
58 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
59 eaten = 0 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
60 while 1: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
61 ## chomp stuff off until we reach pos |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
62 if end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
63 flag, length = cigar.pop() |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
64 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
65 flag, length = cigar.pop(0) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
66 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
67 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
68 print("Chomped a %s, %s" % (flag, length), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
69 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
70 if flag == 0: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
71 ## match |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
72 #to_trim -= length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
73 eaten += length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
74 if not end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
75 pos += length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
76 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
77 pos -= length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
78 if flag == 1: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
79 ## insertion to the ref |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
80 #to_trim -= length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
81 eaten += length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
82 if flag == 2: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
83 ## deletion to the ref |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
84 #eaten += length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
85 if not end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
86 pos += length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
87 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
88 pos -= length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
89 pass |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
90 if flag == 4: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
91 eaten += length |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
92 if not end and pos >= start_pos and flag == 0: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
93 break |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
94 if end and pos <= start_pos and flag == 0: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
95 break |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
96 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
97 #print >>sys.stderr, "pos:%s %s" % (pos, start_pos) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
98 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
99 extra = abs(pos - start_pos) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
100 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
101 print("extra %s" % (extra), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
102 if extra: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
103 if flag == 0: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
104 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
105 print("Inserted a %s, %s" % (0, extra), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
106 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
107 if end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
108 cigar.append((0, extra)) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
109 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
110 cigar.insert(0, (0, extra)) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
111 eaten -= extra |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
112 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
113 if not end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
114 s.pos = pos - extra |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
115 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
116 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
117 print("New pos: %s" % (s.pos), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
118 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
119 if end: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
120 cigar.append((4, eaten)) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
121 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
122 cigar.insert(0, (4, eaten)) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
123 oldcigarstring = s.cigarstring |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
124 s.cigartuples = cigar |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
125 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
126 #print >>sys.stderr, s.query_name, oldcigarstring[0:50], s.cigarstring[0:50] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
127 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
128 def find_primer(bed, pos, direction): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
129 from operator import itemgetter |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
130 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
131 closest = min([(abs(p['start'] - pos), p['start'] - pos, p) for p in bed if p['direction'] == direction], key=itemgetter(0)) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
132 return closest |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
133 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
134 def is_correctly_paired(p1, p2): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
135 name1 = p1[2]['Primer_ID'] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
136 name2 = p2[2]['Primer_ID'] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
137 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
138 name1 = name1.replace('_LEFT', '') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
139 name2 = name2.replace('_RIGHT', '') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
140 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
141 return name1 == name2 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
142 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
143 def go(args): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
144 if args.report: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
145 reportfh = open(args.report, "w") |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
146 print("QueryName\tReferenceStart\tReferenceEnd\tPrimerPair\tPrimer1\tPrimer1Start\tPrimer2\tPrimer2Start\tIsSecondary\tIsSupplementary\tStart\tEnd\tCorrectlyPaired", file=reportfh) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
147 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
148 bed = read_bed_file(args.bedfile) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
149 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
150 counter = defaultdict(int) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
151 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
152 infile = pysam.AlignmentFile("-", "rb") |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
153 outfile = pysam.AlignmentFile("-", "wh", template=infile) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
154 for s in infile: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
155 cigar = copy(s.cigartuples) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
156 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
157 ## logic - if alignment start site is _before_ but within X bases of |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
158 ## a primer site, trim it off |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
159 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
160 if s.is_unmapped: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
161 print("%s skipped as unmapped" % (s.query_name), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
162 continue |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
163 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
164 if s.is_supplementary: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
165 print("%s skipped as supplementary" % (s.query_name), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
166 continue |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
167 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
168 p1 = find_primer(bed, s.reference_start, '+') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
169 p2 = find_primer(bed, s.reference_end, '-') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
170 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
171 correctly_paired = is_correctly_paired(p1, p2) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
172 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
173 report = "%s\t%s\t%s\t%s_%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%d" % (s.query_name, s.reference_start, s.reference_end, p1[2]['Primer_ID'], p2[2]['Primer_ID'], p1[2]['Primer_ID'], abs(p1[1]), p2[2]['Primer_ID'], abs(p2[1]), s.is_secondary, s.is_supplementary, p1[2]['start'], p2[2]['end'], correctly_paired) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
174 if args.report: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
175 print(report, file=reportfh) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
176 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
177 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
178 print(report, file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
179 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
180 ## if the alignment starts before the end of the primer, trim to that position |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
181 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
182 try: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
183 if args.start: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
184 primer_position = p1[2]['start'] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
185 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
186 primer_position = p1[2]['end'] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
187 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
188 if s.reference_start < primer_position: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
189 trim(args, cigar, s, primer_position, 0) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
190 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
191 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
192 print("ref start %s >= primer_position %s" % (s.reference_start, primer_position), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
193 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
194 if args.start: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
195 primer_position = p2[2]['start'] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
196 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
197 primer_position = p2[2]['end'] |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
198 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
199 if s.reference_end > primer_position: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
200 trim(args, cigar, s, primer_position, 1) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
201 else: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
202 if args.verbose: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
203 print("ref end %s >= primer_position %s" % (s.reference_end, primer_position), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
204 except Exception as e: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
205 print("problem %s" % (e,), file=sys.stderr) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
206 pass |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
207 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
208 if args.normalise: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
209 pair = "%s-%s-%d" % (p1[2]['Primer_ID'], p2[2]['Primer_ID'], s.is_reverse) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
210 counter[pair] += 1 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
211 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
212 if counter[pair] > args.normalise: |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
213 continue |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
214 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
215 if not check_still_matching_bases(s): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
216 continue |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
217 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
218 outfile.write(s) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
219 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
220 reportfh.close() |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
221 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
222 def main(): |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
223 import argparse |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
224 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
225 parser = argparse.ArgumentParser(description='Trim alignments from an amplicon scheme.') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
226 parser.add_argument('bedfile', help='BED file containing the amplicon scheme') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
227 parser.add_argument('--normalise', type=int, help='Subsample to n coverage per strand') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
228 parser.add_argument('--report', type=str, help='Output report to file') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
229 parser.add_argument('--start', action='store_true', help='Trim to start of primers instead of ends') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
230 parser.add_argument('--verbose', action='store_true', help='Debug mode') |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
231 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
232 args = parser.parse_args() |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
233 go(args) |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
234 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
235 |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
236 if __name__ == "__main__": |
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
dfornika
parents:
diff
changeset
|
237 main() |