Mercurial > repos > devteam > fastqc
annotate rgFastQC.py @ 8:5b995cef2fbb draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
| author | iuc |
|---|---|
| date | Tue, 31 Jan 2017 07:22:22 -0500 |
| parents | ec73b7c83b2c |
| children | 0a7c65540937 |
| rev | line source |
|---|---|
| 0 | 1 """ |
| 1 | 2 Rewrite of rgFastQC.py for Version 0.11.2 of FastQC. |
| 3 | |
| 4 Changes implemented from tmcgowan at | |
| 5 https://testtoolshed.g2.bx.psu.edu/view/tmcgowan/fastqc | |
| 6 and iuc at https://toolshed.g2.bx.psu.edu/view/iuc/fastqc | |
| 7 with minor changes and bug fixes | |
| 0 | 8 |
| 1 | 9 SYNOPSIS |
| 10 | |
| 11 rgFastQC.py -i input_file -j input_file.name -o output_html_file [-d output_directory] | |
| 12 [-f fastq|bam|sam] [-n job_name] [-c contaminant_file] [-e fastqc_executable] | |
| 0 | 13 |
| 1 | 14 EXAMPLE (generated by Galaxy) |
| 0 | 15 |
| 1 | 16 rgFastQC.py -i path/dataset_1.dat -j 1000gsample.fastq -o path/dataset_3.dat -d path/job_working_directory/subfolder |
| 17 -f fastq -n FastQC -c path/dataset_2.dat -e fastqc | |
| 0 | 18 """ |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
19 import bz2 |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
20 import glob |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
21 import gzip |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
22 import mimetypes |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
23 import optparse |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
24 import os |
| 0 | 25 import re |
| 1 | 26 import shutil |
| 0 | 27 import subprocess |
| 28 import tempfile | |
| 29 import zipfile | |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
30 |
| 0 | 31 |
| 1 | 32 class FastQCRunner(object): |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
33 def __init__(self, opts=None): |
| 1 | 34 ''' |
| 35 Initializes an object to run FastQC in Galaxy. To start the process, use the function run_fastqc() | |
| 36 ''' | |
|
6
e8c90ad3cbf9
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
devteam
parents:
4
diff
changeset
|
37 |
| 1 | 38 # Check whether the options are specified and saves them into the object |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
39 assert opts is not None |
| 1 | 40 self.opts = opts |
| 0 | 41 |
| 1 | 42 def prepare_command_line(self): |
| 43 ''' | |
| 44 Develops the Commandline to run FastQC in Galaxy | |
| 45 ''' | |
|
6
e8c90ad3cbf9
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
devteam
parents:
4
diff
changeset
|
46 |
| 1 | 47 # Check whether a given file compression format is valid |
| 48 # This prevents uncompression of already uncompressed files | |
| 0 | 49 infname = self.opts.inputfilename |
| 50 linf = infname.lower() | |
| 51 trimext = False | |
| 52 # decompression at upload currently does NOT remove this now bogus ending - fastqc will barf | |
| 53 # patched may 29 2013 until this is fixed properly | |
|
7
ec73b7c83b2c
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit 8918618a5ef7bdca55a31cd919efa593044a376e
devteam
parents:
6
diff
changeset
|
54 type = mimetypes.guess_type(self.opts.input) |
|
ec73b7c83b2c
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit 8918618a5ef7bdca55a31cd919efa593044a376e
devteam
parents:
6
diff
changeset
|
55 if linf.endswith('.gz') or linf.endswith('.gzip') or type[-1] == "gzip": |
| 0 | 56 f = gzip.open(self.opts.input) |
| 57 try: | |
| 1 | 58 f.readline() |
| 0 | 59 except: |
| 60 trimext = True | |
| 61 f.close() | |
| 62 elif linf.endswith('bz2'): | |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
63 f = bz2.BZ2File(self.opts.input, 'r') |
| 0 | 64 try: |
| 65 f.readline() | |
| 66 except: | |
| 67 trimext = True | |
| 68 f.close() | |
| 69 elif linf.endswith('.zip'): | |
| 70 if not zipfile.is_zipfile(self.opts.input): | |
| 71 trimext = True | |
| 72 if trimext: | |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
73 f = open(self.opts.input) |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
74 try: |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
75 f.readline() |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
76 except: |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
77 raise Exception("Input file corruption, could not identify the filetype") |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
78 infname = os.path.splitext(infname)[0] |
|
6
e8c90ad3cbf9
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
devteam
parents:
4
diff
changeset
|
79 |
| 1 | 80 # Replace unwanted or problematic charaters in the input file name |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
81 self.fastqinfilename = re.sub(r'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname)) |
| 4 | 82 # check that the symbolic link gets a proper ending, fastqc seems to ignore the given format otherwise |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
83 if 'fastq' in self.opts.informat: |
|
6
e8c90ad3cbf9
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
devteam
parents:
4
diff
changeset
|
84 # with fastq the .ext is ignored, but when a format is actually passed it must comply with fastqc's |
| 4 | 85 # accepted formats.. |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
86 self.opts.informat = 'fastq' |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
87 elif not self.fastqinfilename.endswith(self.opts.informat): |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
88 self.fastqinfilename += '.%s' % self.opts.informat |
| 4 | 89 |
| 1 | 90 # Build the Commandline from the given parameters |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
91 command_line = [opts.executable, '--outdir %s' % self.opts.outputdir] |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
92 if self.opts.contaminants is not None: |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
93 command_line.append('--contaminants %s' % self.opts.contaminants) |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
94 if self.opts.limits is not None: |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
95 command_line.append('--limits %s' % self.opts.limits) |
| 1 | 96 command_line.append('--quiet') |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
97 command_line.append('--extract') # to access the output text file |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
98 if type[-1] != "gzip": |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
99 command_line.append('-f %s' % self.opts.informat) |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
100 else: |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
101 self.fastqinfilename += ".gz" |
| 1 | 102 command_line.append(self.fastqinfilename) |
| 103 self.command_line = ' '.join(command_line) | |
| 104 | |
| 105 def copy_output_file_to_dataset(self): | |
| 106 ''' | |
| 107 Retrieves the output html and text files from the output directory and copies them to the Galaxy output files | |
| 108 ''' | |
|
6
e8c90ad3cbf9
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
devteam
parents:
4
diff
changeset
|
109 |
| 1 | 110 # retrieve html file |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
111 result_file = glob.glob(self.opts.outputdir + '/*html') |
| 1 | 112 with open(result_file[0], 'rb') as fsrc: |
| 113 with open(self.opts.htmloutput, 'wb') as fdest: | |
| 114 shutil.copyfileobj(fsrc, fdest) | |
|
6
e8c90ad3cbf9
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
devteam
parents:
4
diff
changeset
|
115 |
| 1 | 116 # retrieve text file |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
117 text_file = glob.glob(self.opts.outputdir + '/*/fastqc_data.txt') |
| 1 | 118 with open(text_file[0], 'rb') as fsrc: |
| 119 with open(self.opts.textoutput, 'wb') as fdest: | |
| 120 shutil.copyfileobj(fsrc, fdest) | |
| 121 | |
| 122 def run_fastqc(self): | |
| 123 ''' | |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
124 Executes FastQC. Make sure the mandatory import parameters input, inputfilename, outputdir and htmloutput have been specified in the options |
| 1 | 125 ''' |
|
6
e8c90ad3cbf9
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
devteam
parents:
4
diff
changeset
|
126 |
| 1 | 127 # Create a log file |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
128 dummy, tlog = tempfile.mkstemp(prefix='rgFastQC', suffix=".log", dir=self.opts.outputdir) |
| 1 | 129 sout = open(tlog, 'w') |
|
6
e8c90ad3cbf9
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
devteam
parents:
4
diff
changeset
|
130 |
| 1 | 131 self.prepare_command_line() |
| 132 sout.write(self.command_line) | |
| 133 sout.write('\n') | |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
134 sout.write("Creating symlink\n") # between the input (.dat) file and the given input file name |
| 1 | 135 os.symlink(self.opts.input, self.fastqinfilename) |
| 136 sout.write("check_call\n") | |
| 137 subprocess.check_call(self.command_line, shell=True) | |
| 138 sout.write("Copying working %s file to %s \n" % (self.fastqinfilename, self.opts.htmloutput)) | |
| 139 self.copy_output_file_to_dataset() | |
| 140 sout.write("Finished") | |
| 0 | 141 sout.close() |
| 142 | |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
143 |
| 0 | 144 if __name__ == '__main__': |
| 145 op = optparse.OptionParser() | |
| 146 op.add_option('-i', '--input', default=None) | |
| 1 | 147 op.add_option('-j', '--inputfilename', default=None) |
| 0 | 148 op.add_option('-o', '--htmloutput', default=None) |
| 1 | 149 op.add_option('-t', '--textoutput', default=None) |
| 0 | 150 op.add_option('-d', '--outputdir', default="/tmp/shortread") |
| 151 op.add_option('-f', '--informat', default='fastq') | |
| 152 op.add_option('-n', '--namejob', default='rgFastQC') | |
| 153 op.add_option('-c', '--contaminants', default=None) | |
| 1 | 154 op.add_option('-l', '--limits', default=None) |
| 0 | 155 op.add_option('-e', '--executable', default='fastqc') |
| 156 opts, args = op.parse_args() | |
|
6
e8c90ad3cbf9
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
devteam
parents:
4
diff
changeset
|
157 |
|
8
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
158 assert opts.input is not None |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
159 assert opts.inputfilename is not None |
|
5b995cef2fbb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit bff04a5409662a8968c50501487042a3a0c93702
iuc
parents:
7
diff
changeset
|
160 assert opts.htmloutput is not None |
|
6
e8c90ad3cbf9
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
devteam
parents:
4
diff
changeset
|
161 if not os.path.exists(opts.outputdir): |
| 0 | 162 os.makedirs(opts.outputdir) |
|
6
e8c90ad3cbf9
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
devteam
parents:
4
diff
changeset
|
163 |
| 1 | 164 fastqc_runner = FastQCRunner(opts) |
| 3 | 165 fastqc_runner.run_fastqc() |
