view createMissingFiles.py @ 3:205a59cb55f1 draft default tip

planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 308791d86112ad5ecfca99789841a9520e9bcb34
author iuc
date Wed, 29 Jan 2025 08:19:15 +0000
parents 736090e99c59
children
line wrap: on
line source

import glob
import os
import subprocess

dirPrefix = "resultDir/"
expectedSegments = {"A_MP": 7, "A_NP": 5, "A_HA": 4, "A_PB1": 2,
                    "A_PB2": 1, "A_NA": 6, "A_PA": 3, "A_NS": 8}


def renameSubtypeFiles(identifier):
    files = glob.glob(dirPrefix + "A_" + identifier + "_*.*")
    for file in files:
        ext = file.split('.')[-1]
        os.rename(file, dirPrefix + "A_" + identifier + "." + ext)


def getMissingSegments():
    presentSegments = []
    for file in os.listdir(dirPrefix):
        if file.endswith(".fasta"):
            presentSegments.append(file.split('.')[0])
    return [segment for segment in expectedSegments.keys()
            if segment not in presentSegments]


def getBamHeaderFromAnyFile():
    anyBamFile = glob.glob(dirPrefix + "*.bam")[0]
    samtoolsCmd = ["samtools", "view", "-H", anyBamFile]
    result = subprocess.check_output(samtoolsCmd, text=True)
    return result.split('\n')[0]


def getVcfHeaderFromAnyFile():
    with open(glob.glob(dirPrefix + "*.vcf")[0]) as f:
        anyVersionAndDateLines = f.readline() + f.readline()
        emptyHeaderLine = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"
        return anyVersionAndDateLines + emptyHeaderLine


def writeEmptyBam(identifier, bamHeader):
    with open("headerSamFile.sam", "w") as f:
        f.write(bamHeader)  # write header to a temporary sam file
    cmd = ['samtools', 'view', '-H', '-b', 'headerSamFile.sam']
    targetBam = dirPrefix + identifier + ".bam"
    with open(targetBam, "xb") as tB:
        subprocess.check_call(cmd, stdout=tB)
        os.remove("headerSamFile.sam")


def writeEmptyFasta(identifier):
    open(dirPrefix + identifier + ".fasta", 'x').close()


def writeEmptyVcf(identifier, vcfHeader):
    with open(dirPrefix + identifier + ".vcf", 'x') as f:
        f.write(vcfHeader)


def writeEmptyAmendedFasta(identifier):
    #  irma names these files like: resultDir/amended_consensus/resultDir_<segNr>.fa
    open(dirPrefix + "amended_consensus/resultDir_" + str(expectedSegments[identifier]) + ".fa", 'x').close()


def samtoolsSortAllBam():
    for segment in expectedSegments.keys():
        os.rename(dirPrefix + segment + ".bam",
                  dirPrefix + segment + "_unsorted.bam")
        cmd = ['samtools', 'sort', dirPrefix + segment + "_unsorted.bam"]
        targetBam = dirPrefix + segment + ".bam"
        with open(targetBam, "w") as tB:
            subprocess.check_call(cmd, stdout=tB, text=True)
            os.remove(dirPrefix + segment + "_unsorted.bam")


if __name__ == "__main__":
    renameSubtypeFiles("HA")
    renameSubtypeFiles("NA")
    bamHeader = getBamHeaderFromAnyFile()
    vcfHeader = getVcfHeaderFromAnyFile()
    for segment in getMissingSegments():
        writeEmptyBam(segment, bamHeader)
        writeEmptyFasta(segment)
        writeEmptyVcf(segment, vcfHeader)
        writeEmptyAmendedFasta(segment)
    samtoolsSortAllBam()