comparison createMissingFiles.py @ 0:3d86c05cd838 draft

planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
author iuc
date Sat, 09 Nov 2024 13:53:38 +0000
parents
children 736090e99c59
comparison
equal deleted inserted replaced
-1:000000000000 0:3d86c05cd838
1 import glob
2 import os
3 import subprocess
4
5 dirPrefix = "resultDir/"
6 expectedSegments = ["A_MP", "A_NP", "A_HA", "A_PB1",
7 "A_PB2", "A_NA", "A_PA", "A_NS"]
8
9
10 def renameSubtypeFiles(identifier):
11 files = glob.glob(dirPrefix + "A_" + identifier + "_*.*")
12 for file in files:
13 ext = file.split('.')[-1]
14 os.rename(file, dirPrefix + "A_" + identifier + "." + ext)
15
16
17 def getMissingSegments():
18 presentSegments = []
19 for file in os.listdir(dirPrefix):
20 if file.endswith(".fasta"):
21 presentSegments.append(file.split('.')[0])
22 return [segment for segment in expectedSegments
23 if segment not in presentSegments]
24
25
26 def getBamHeaderFromAnyFile():
27 anyBamFile = glob.glob(dirPrefix + "*.bam")[0]
28 samtoolsCmd = ["samtools", "view", "-H", anyBamFile]
29 result = subprocess.check_output(samtoolsCmd, text=True)
30 return result.split('\n')[0]
31
32
33 def getVcfHeaderFromAnyFile():
34 with open(glob.glob(dirPrefix + "*.vcf")[0]) as f:
35 anyVersionAndDateLines = f.readline() + f.readline()
36 emptyHeaderLine = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"
37 return anyVersionAndDateLines + emptyHeaderLine
38
39
40 def writeEmptyBam(identifier, bamHeader):
41 with open("headerSamFile.sam", "w") as f:
42 f.write(bamHeader) # write header to a temporary sam file
43 cmd = ['samtools', 'view', '-H', '-b', 'headerSamFile.sam']
44 targetBam = dirPrefix + identifier + ".bam"
45 with open(targetBam, "xb") as tB:
46 subprocess.check_call(cmd, stdout=tB)
47 os.remove("headerSamFile.sam")
48
49
50 def writeEmptyFasta(identifier):
51 open(dirPrefix + identifier + ".fasta", 'x').close()
52
53
54 def writeEmptyVcf(identifier, vcfHeader):
55 with open(dirPrefix + identifier + ".vcf", 'x') as f:
56 f.write(vcfHeader)
57
58
59 def samtoolsSortAllBam():
60 for segment in expectedSegments:
61 os.rename(dirPrefix + segment + ".bam",
62 dirPrefix + segment + "_unsorted.bam")
63 cmd = ['samtools', 'sort', dirPrefix + segment + "_unsorted.bam"]
64 targetBam = dirPrefix + segment + ".bam"
65 with open(targetBam, "w") as tB:
66 subprocess.check_call(cmd, stdout=tB, text=True)
67 os.remove(dirPrefix + segment + "_unsorted.bam")
68
69
70 if __name__ == "__main__":
71 renameSubtypeFiles("HA")
72 renameSubtypeFiles("NA")
73 bamHeader = getBamHeaderFromAnyFile()
74 vcfHeader = getVcfHeaderFromAnyFile()
75 for segment in getMissingSegments():
76 writeEmptyBam(segment, bamHeader)
77 writeEmptyFasta(segment)
78 writeEmptyVcf(segment, vcfHeader)
79 samtoolsSortAllBam()