Mercurial > repos > iuc > irma
comparison createMissingFiles.py @ 0:3d86c05cd838 draft
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
| author | iuc |
|---|---|
| date | Sat, 09 Nov 2024 13:53:38 +0000 |
| parents | |
| children | 736090e99c59 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3d86c05cd838 |
|---|---|
| 1 import glob | |
| 2 import os | |
| 3 import subprocess | |
| 4 | |
| 5 dirPrefix = "resultDir/" | |
| 6 expectedSegments = ["A_MP", "A_NP", "A_HA", "A_PB1", | |
| 7 "A_PB2", "A_NA", "A_PA", "A_NS"] | |
| 8 | |
| 9 | |
| 10 def renameSubtypeFiles(identifier): | |
| 11 files = glob.glob(dirPrefix + "A_" + identifier + "_*.*") | |
| 12 for file in files: | |
| 13 ext = file.split('.')[-1] | |
| 14 os.rename(file, dirPrefix + "A_" + identifier + "." + ext) | |
| 15 | |
| 16 | |
| 17 def getMissingSegments(): | |
| 18 presentSegments = [] | |
| 19 for file in os.listdir(dirPrefix): | |
| 20 if file.endswith(".fasta"): | |
| 21 presentSegments.append(file.split('.')[0]) | |
| 22 return [segment for segment in expectedSegments | |
| 23 if segment not in presentSegments] | |
| 24 | |
| 25 | |
| 26 def getBamHeaderFromAnyFile(): | |
| 27 anyBamFile = glob.glob(dirPrefix + "*.bam")[0] | |
| 28 samtoolsCmd = ["samtools", "view", "-H", anyBamFile] | |
| 29 result = subprocess.check_output(samtoolsCmd, text=True) | |
| 30 return result.split('\n')[0] | |
| 31 | |
| 32 | |
| 33 def getVcfHeaderFromAnyFile(): | |
| 34 with open(glob.glob(dirPrefix + "*.vcf")[0]) as f: | |
| 35 anyVersionAndDateLines = f.readline() + f.readline() | |
| 36 emptyHeaderLine = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO" | |
| 37 return anyVersionAndDateLines + emptyHeaderLine | |
| 38 | |
| 39 | |
| 40 def writeEmptyBam(identifier, bamHeader): | |
| 41 with open("headerSamFile.sam", "w") as f: | |
| 42 f.write(bamHeader) # write header to a temporary sam file | |
| 43 cmd = ['samtools', 'view', '-H', '-b', 'headerSamFile.sam'] | |
| 44 targetBam = dirPrefix + identifier + ".bam" | |
| 45 with open(targetBam, "xb") as tB: | |
| 46 subprocess.check_call(cmd, stdout=tB) | |
| 47 os.remove("headerSamFile.sam") | |
| 48 | |
| 49 | |
| 50 def writeEmptyFasta(identifier): | |
| 51 open(dirPrefix + identifier + ".fasta", 'x').close() | |
| 52 | |
| 53 | |
| 54 def writeEmptyVcf(identifier, vcfHeader): | |
| 55 with open(dirPrefix + identifier + ".vcf", 'x') as f: | |
| 56 f.write(vcfHeader) | |
| 57 | |
| 58 | |
| 59 def samtoolsSortAllBam(): | |
| 60 for segment in expectedSegments: | |
| 61 os.rename(dirPrefix + segment + ".bam", | |
| 62 dirPrefix + segment + "_unsorted.bam") | |
| 63 cmd = ['samtools', 'sort', dirPrefix + segment + "_unsorted.bam"] | |
| 64 targetBam = dirPrefix + segment + ".bam" | |
| 65 with open(targetBam, "w") as tB: | |
| 66 subprocess.check_call(cmd, stdout=tB, text=True) | |
| 67 os.remove(dirPrefix + segment + "_unsorted.bam") | |
| 68 | |
| 69 | |
| 70 if __name__ == "__main__": | |
| 71 renameSubtypeFiles("HA") | |
| 72 renameSubtypeFiles("NA") | |
| 73 bamHeader = getBamHeaderFromAnyFile() | |
| 74 vcfHeader = getVcfHeaderFromAnyFile() | |
| 75 for segment in getMissingSegments(): | |
| 76 writeEmptyBam(segment, bamHeader) | |
| 77 writeEmptyFasta(segment) | |
| 78 writeEmptyVcf(segment, vcfHeader) | |
| 79 samtoolsSortAllBam() |
