Mercurial > repos > devteam > fastqc
comparison rgFastQC.py @ 6:e8c90ad3cbf9 draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
| author | devteam |
|---|---|
| date | Mon, 31 Oct 2016 10:40:00 -0400 |
| parents | ca0eb9b89b32 |
| children | ec73b7c83b2c |
comparison
equal
deleted
inserted
replaced
| 5:93f27bdc08cd | 6:e8c90ad3cbf9 |
|---|---|
| 28 import gzip | 28 import gzip |
| 29 import bz2 | 29 import bz2 |
| 30 import zipfile | 30 import zipfile |
| 31 | 31 |
| 32 class FastQCRunner(object): | 32 class FastQCRunner(object): |
| 33 | 33 |
| 34 def __init__(self,opts=None): | 34 def __init__(self,opts=None): |
| 35 ''' | 35 ''' |
| 36 Initializes an object to run FastQC in Galaxy. To start the process, use the function run_fastqc() | 36 Initializes an object to run FastQC in Galaxy. To start the process, use the function run_fastqc() |
| 37 ''' | 37 ''' |
| 38 | 38 |
| 39 # Check whether the options are specified and saves them into the object | 39 # Check whether the options are specified and saves them into the object |
| 40 assert opts != None | 40 assert opts != None |
| 41 self.opts = opts | 41 self.opts = opts |
| 42 | 42 |
| 43 def prepare_command_line(self): | 43 def prepare_command_line(self): |
| 44 ''' | 44 ''' |
| 45 Develops the Commandline to run FastQC in Galaxy | 45 Develops the Commandline to run FastQC in Galaxy |
| 46 ''' | 46 ''' |
| 47 | 47 |
| 48 # Check whether a given file compression format is valid | 48 # Check whether a given file compression format is valid |
| 49 # This prevents uncompression of already uncompressed files | 49 # This prevents uncompression of already uncompressed files |
| 50 infname = self.opts.inputfilename | 50 infname = self.opts.inputfilename |
| 51 linf = infname.lower() | 51 linf = infname.lower() |
| 52 trimext = False | 52 trimext = False |
| 53 # decompression at upload currently does NOT remove this now bogus ending - fastqc will barf | 53 # decompression at upload currently does NOT remove this now bogus ending - fastqc will barf |
| 54 # patched may 29 2013 until this is fixed properly | 54 # patched may 29 2013 until this is fixed properly |
| 55 if ( linf.endswith('.gz') or linf.endswith('.gzip') ): | 55 if ( linf.endswith('.gz') or linf.endswith('.gzip') ): |
| 56 f = gzip.open(self.opts.input) | 56 f = gzip.open(self.opts.input) |
| 57 try: | 57 try: |
| 58 f.readline() | 58 f.readline() |
| 59 except: | 59 except: |
| 60 trimext = True | 60 trimext = True |
| 74 try: | 74 try: |
| 75 f.readline() | 75 f.readline() |
| 76 except: | 76 except: |
| 77 raise Exception("Input file corruption, could not identify the filetype") | 77 raise Exception("Input file corruption, could not identify the filetype") |
| 78 infname = os.path.splitext(infname)[0] | 78 infname = os.path.splitext(infname)[0] |
| 79 | 79 |
| 80 # Replace unwanted or problematic charaters in the input file name | 80 # Replace unwanted or problematic charaters in the input file name |
| 81 self.fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname)) | 81 self.fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname)) |
| 82 # check that the symbolic link gets a proper ending, fastqc seems to ignore the given format otherwise | 82 # check that the symbolic link gets a proper ending, fastqc seems to ignore the given format otherwise |
| 83 if 'fastq' in opts.informat: | 83 if 'fastq' in opts.informat: |
| 84 # with fastq the .ext is ignored, but when a format is actually passed it must comply with fastqc's | 84 # with fastq the .ext is ignored, but when a format is actually passed it must comply with fastqc's |
| 85 # accepted formats.. | 85 # accepted formats.. |
| 86 opts.informat = 'fastq' | 86 opts.informat = 'fastq' |
| 87 elif not self.fastqinfilename.endswith(opts.informat): | 87 elif not self.fastqinfilename.endswith(opts.informat): |
| 88 self.fastqinfilename += '.%s' % opts.informat | 88 self.fastqinfilename += '.%s' % opts.informat |
| 89 | 89 |
| 101 | 101 |
| 102 def copy_output_file_to_dataset(self): | 102 def copy_output_file_to_dataset(self): |
| 103 ''' | 103 ''' |
| 104 Retrieves the output html and text files from the output directory and copies them to the Galaxy output files | 104 Retrieves the output html and text files from the output directory and copies them to the Galaxy output files |
| 105 ''' | 105 ''' |
| 106 | 106 |
| 107 # retrieve html file | 107 # retrieve html file |
| 108 result_file = glob.glob(opts.outputdir + '/*html') | 108 result_file = glob.glob(opts.outputdir + '/*html') |
| 109 with open(result_file[0], 'rb') as fsrc: | 109 with open(result_file[0], 'rb') as fsrc: |
| 110 with open(self.opts.htmloutput, 'wb') as fdest: | 110 with open(self.opts.htmloutput, 'wb') as fdest: |
| 111 shutil.copyfileobj(fsrc, fdest) | 111 shutil.copyfileobj(fsrc, fdest) |
| 112 | 112 |
| 113 # retrieve text file | 113 # retrieve text file |
| 114 text_file = glob.glob(opts.outputdir + '/*/fastqc_data.txt') | 114 text_file = glob.glob(opts.outputdir + '/*/fastqc_data.txt') |
| 115 with open(text_file[0], 'rb') as fsrc: | 115 with open(text_file[0], 'rb') as fsrc: |
| 116 with open(self.opts.textoutput, 'wb') as fdest: | 116 with open(self.opts.textoutput, 'wb') as fdest: |
| 117 shutil.copyfileobj(fsrc, fdest) | 117 shutil.copyfileobj(fsrc, fdest) |
| 118 | 118 |
| 119 def run_fastqc(self): | 119 def run_fastqc(self): |
| 120 ''' | 120 ''' |
| 121 Executes FastQC. Make sure the mandatory import parameters input, inputfilename, outputdir and htmloutput have been specified in the options (opts) | 121 Executes FastQC. Make sure the mandatory import parameters input, inputfilename, outputdir and htmloutput have been specified in the options (opts) |
| 122 ''' | 122 ''' |
| 123 | 123 |
| 124 # Create a log file | 124 # Create a log file |
| 125 dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir) | 125 dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir) |
| 126 sout = open(tlog, 'w') | 126 sout = open(tlog, 'w') |
| 127 | 127 |
| 128 self.prepare_command_line() | 128 self.prepare_command_line() |
| 129 sout.write(self.command_line) | 129 sout.write(self.command_line) |
| 130 sout.write('\n') | 130 sout.write('\n') |
| 131 sout.write("Creating symlink\n") # between the input (.dat) file and the given input file name | 131 sout.write("Creating symlink\n") # between the input (.dat) file and the given input file name |
| 132 os.symlink(self.opts.input, self.fastqinfilename) | 132 os.symlink(self.opts.input, self.fastqinfilename) |
| 148 op.add_option('-n', '--namejob', default='rgFastQC') | 148 op.add_option('-n', '--namejob', default='rgFastQC') |
| 149 op.add_option('-c', '--contaminants', default=None) | 149 op.add_option('-c', '--contaminants', default=None) |
| 150 op.add_option('-l', '--limits', default=None) | 150 op.add_option('-l', '--limits', default=None) |
| 151 op.add_option('-e', '--executable', default='fastqc') | 151 op.add_option('-e', '--executable', default='fastqc') |
| 152 opts, args = op.parse_args() | 152 opts, args = op.parse_args() |
| 153 | 153 |
| 154 assert opts.input != None | 154 assert opts.input != None |
| 155 assert opts.inputfilename != None | 155 assert opts.inputfilename != None |
| 156 assert opts.htmloutput != None | 156 assert opts.htmloutput != None |
| 157 assert os.path.isfile(opts.executable),'##rgFastQC.py error - cannot find executable %s' % opts.executable | 157 if not os.path.exists(opts.outputdir): |
| 158 if not os.path.exists(opts.outputdir): | |
| 159 os.makedirs(opts.outputdir) | 158 os.makedirs(opts.outputdir) |
| 160 | 159 |
| 161 fastqc_runner = FastQCRunner(opts) | 160 fastqc_runner = FastQCRunner(opts) |
| 162 fastqc_runner.run_fastqc() | 161 fastqc_runner.run_fastqc() |
