Mercurial > repos > iuc > irma
annotate createMissingFiles.py @ 3:205a59cb55f1 draft default tip
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 308791d86112ad5ecfca99789841a9520e9bcb34
| author | iuc |
|---|---|
| date | Wed, 29 Jan 2025 08:19:15 +0000 |
| parents | 736090e99c59 |
| children |
| rev | line source |
|---|---|
|
0
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
1 import glob |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
2 import os |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
3 import subprocess |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
4 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
5 dirPrefix = "resultDir/" |
|
1
736090e99c59
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents:
0
diff
changeset
|
6 expectedSegments = {"A_MP": 7, "A_NP": 5, "A_HA": 4, "A_PB1": 2, |
|
736090e99c59
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents:
0
diff
changeset
|
7 "A_PB2": 1, "A_NA": 6, "A_PA": 3, "A_NS": 8} |
|
0
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
8 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
9 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
10 def renameSubtypeFiles(identifier): |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
11 files = glob.glob(dirPrefix + "A_" + identifier + "_*.*") |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
12 for file in files: |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
13 ext = file.split('.')[-1] |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
14 os.rename(file, dirPrefix + "A_" + identifier + "." + ext) |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
15 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
16 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
17 def getMissingSegments(): |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
18 presentSegments = [] |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
19 for file in os.listdir(dirPrefix): |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
20 if file.endswith(".fasta"): |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
21 presentSegments.append(file.split('.')[0]) |
|
1
736090e99c59
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents:
0
diff
changeset
|
22 return [segment for segment in expectedSegments.keys() |
|
0
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
23 if segment not in presentSegments] |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
24 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
25 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
26 def getBamHeaderFromAnyFile(): |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
27 anyBamFile = glob.glob(dirPrefix + "*.bam")[0] |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
28 samtoolsCmd = ["samtools", "view", "-H", anyBamFile] |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
29 result = subprocess.check_output(samtoolsCmd, text=True) |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
30 return result.split('\n')[0] |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
31 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
32 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
33 def getVcfHeaderFromAnyFile(): |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
34 with open(glob.glob(dirPrefix + "*.vcf")[0]) as f: |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
35 anyVersionAndDateLines = f.readline() + f.readline() |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
36 emptyHeaderLine = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO" |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
37 return anyVersionAndDateLines + emptyHeaderLine |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
38 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
39 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
40 def writeEmptyBam(identifier, bamHeader): |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
41 with open("headerSamFile.sam", "w") as f: |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
42 f.write(bamHeader) # write header to a temporary sam file |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
43 cmd = ['samtools', 'view', '-H', '-b', 'headerSamFile.sam'] |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
44 targetBam = dirPrefix + identifier + ".bam" |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
45 with open(targetBam, "xb") as tB: |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
46 subprocess.check_call(cmd, stdout=tB) |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
47 os.remove("headerSamFile.sam") |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
48 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
49 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
50 def writeEmptyFasta(identifier): |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
51 open(dirPrefix + identifier + ".fasta", 'x').close() |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
52 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
53 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
54 def writeEmptyVcf(identifier, vcfHeader): |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
55 with open(dirPrefix + identifier + ".vcf", 'x') as f: |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
56 f.write(vcfHeader) |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
57 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
58 |
|
1
736090e99c59
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents:
0
diff
changeset
|
59 def writeEmptyAmendedFasta(identifier): |
|
736090e99c59
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents:
0
diff
changeset
|
60 # irma names these files like: resultDir/amended_consensus/resultDir_<segNr>.fa |
|
736090e99c59
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents:
0
diff
changeset
|
61 open(dirPrefix + "amended_consensus/resultDir_" + str(expectedSegments[identifier]) + ".fa", 'x').close() |
|
736090e99c59
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents:
0
diff
changeset
|
62 |
|
736090e99c59
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents:
0
diff
changeset
|
63 |
|
0
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
64 def samtoolsSortAllBam(): |
|
1
736090e99c59
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents:
0
diff
changeset
|
65 for segment in expectedSegments.keys(): |
|
0
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
66 os.rename(dirPrefix + segment + ".bam", |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
67 dirPrefix + segment + "_unsorted.bam") |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
68 cmd = ['samtools', 'sort', dirPrefix + segment + "_unsorted.bam"] |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
69 targetBam = dirPrefix + segment + ".bam" |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
70 with open(targetBam, "w") as tB: |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
71 subprocess.check_call(cmd, stdout=tB, text=True) |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
72 os.remove(dirPrefix + segment + "_unsorted.bam") |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
73 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
74 |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
75 if __name__ == "__main__": |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
76 renameSubtypeFiles("HA") |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
77 renameSubtypeFiles("NA") |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
78 bamHeader = getBamHeaderFromAnyFile() |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
79 vcfHeader = getVcfHeaderFromAnyFile() |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
80 for segment in getMissingSegments(): |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
81 writeEmptyBam(segment, bamHeader) |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
82 writeEmptyFasta(segment) |
|
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
83 writeEmptyVcf(segment, vcfHeader) |
|
1
736090e99c59
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents:
0
diff
changeset
|
84 writeEmptyAmendedFasta(segment) |
|
0
3d86c05cd838
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff
changeset
|
85 samtoolsSortAllBam() |
