Mercurial > repos > iuc > irma
comparison createMissingFiles.py @ 1:736090e99c59 draft
planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
| author | iuc |
|---|---|
| date | Wed, 22 Jan 2025 14:10:33 +0000 |
| parents | 3d86c05cd838 |
| children |
comparison
equal
deleted
inserted
replaced
| 0:3d86c05cd838 | 1:736090e99c59 |
|---|---|
| 1 import glob | 1 import glob |
| 2 import os | 2 import os |
| 3 import subprocess | 3 import subprocess |
| 4 | 4 |
| 5 dirPrefix = "resultDir/" | 5 dirPrefix = "resultDir/" |
| 6 expectedSegments = ["A_MP", "A_NP", "A_HA", "A_PB1", | 6 expectedSegments = {"A_MP": 7, "A_NP": 5, "A_HA": 4, "A_PB1": 2, |
| 7 "A_PB2", "A_NA", "A_PA", "A_NS"] | 7 "A_PB2": 1, "A_NA": 6, "A_PA": 3, "A_NS": 8} |
| 8 | 8 |
| 9 | 9 |
| 10 def renameSubtypeFiles(identifier): | 10 def renameSubtypeFiles(identifier): |
| 11 files = glob.glob(dirPrefix + "A_" + identifier + "_*.*") | 11 files = glob.glob(dirPrefix + "A_" + identifier + "_*.*") |
| 12 for file in files: | 12 for file in files: |
| 17 def getMissingSegments(): | 17 def getMissingSegments(): |
| 18 presentSegments = [] | 18 presentSegments = [] |
| 19 for file in os.listdir(dirPrefix): | 19 for file in os.listdir(dirPrefix): |
| 20 if file.endswith(".fasta"): | 20 if file.endswith(".fasta"): |
| 21 presentSegments.append(file.split('.')[0]) | 21 presentSegments.append(file.split('.')[0]) |
| 22 return [segment for segment in expectedSegments | 22 return [segment for segment in expectedSegments.keys() |
| 23 if segment not in presentSegments] | 23 if segment not in presentSegments] |
| 24 | 24 |
| 25 | 25 |
| 26 def getBamHeaderFromAnyFile(): | 26 def getBamHeaderFromAnyFile(): |
| 27 anyBamFile = glob.glob(dirPrefix + "*.bam")[0] | 27 anyBamFile = glob.glob(dirPrefix + "*.bam")[0] |
| 54 def writeEmptyVcf(identifier, vcfHeader): | 54 def writeEmptyVcf(identifier, vcfHeader): |
| 55 with open(dirPrefix + identifier + ".vcf", 'x') as f: | 55 with open(dirPrefix + identifier + ".vcf", 'x') as f: |
| 56 f.write(vcfHeader) | 56 f.write(vcfHeader) |
| 57 | 57 |
| 58 | 58 |
| 59 def writeEmptyAmendedFasta(identifier): | |
| 60 # irma names these files like: resultDir/amended_consensus/resultDir_<segNr>.fa | |
| 61 open(dirPrefix + "amended_consensus/resultDir_" + str(expectedSegments[identifier]) + ".fa", 'x').close() | |
| 62 | |
| 63 | |
| 59 def samtoolsSortAllBam(): | 64 def samtoolsSortAllBam(): |
| 60 for segment in expectedSegments: | 65 for segment in expectedSegments.keys(): |
| 61 os.rename(dirPrefix + segment + ".bam", | 66 os.rename(dirPrefix + segment + ".bam", |
| 62 dirPrefix + segment + "_unsorted.bam") | 67 dirPrefix + segment + "_unsorted.bam") |
| 63 cmd = ['samtools', 'sort', dirPrefix + segment + "_unsorted.bam"] | 68 cmd = ['samtools', 'sort', dirPrefix + segment + "_unsorted.bam"] |
| 64 targetBam = dirPrefix + segment + ".bam" | 69 targetBam = dirPrefix + segment + ".bam" |
| 65 with open(targetBam, "w") as tB: | 70 with open(targetBam, "w") as tB: |
| 74 vcfHeader = getVcfHeaderFromAnyFile() | 79 vcfHeader = getVcfHeaderFromAnyFile() |
| 75 for segment in getMissingSegments(): | 80 for segment in getMissingSegments(): |
| 76 writeEmptyBam(segment, bamHeader) | 81 writeEmptyBam(segment, bamHeader) |
| 77 writeEmptyFasta(segment) | 82 writeEmptyFasta(segment) |
| 78 writeEmptyVcf(segment, vcfHeader) | 83 writeEmptyVcf(segment, vcfHeader) |
| 84 writeEmptyAmendedFasta(segment) | |
| 79 samtoolsSortAllBam() | 85 samtoolsSortAllBam() |
