Next changeset 1:6c82626a09e5 (2017-03-17) |
Commit message:
Uploaded |
added:
conifer3/c_calls.xml conifer3/c_plotcalls.xml conifer3/c_rpkm.xml conifer3/conifer.py conifer3/conifer_functions.py conifer3/conifer_wrapper.pl |
b |
diff -r 000000000000 -r 496c4b45765d conifer3/c_calls.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/conifer3/c_calls.xml Fri Mar 17 04:29:42 2017 -0400 |
b |
@@ -0,0 +1,48 @@ +<tool id="conifer_call" name="CoNIFER CNVs caller" version="1.0"> + <description></description> + <command> + python $__tool_directory__/conifer.py call + --input $input + --output temporary + --threshold $threshold + 2>&1 + && awk '{print \$2,\$3,\$4,\$5,\$1}' OFS="\t" temporary > $outputFile + </command> + <inputs> + <param format="hdf5" name="input" type="data" label="CoNIFER output analysis file (HDF5)" /> + <param name="threshold" type="float" value="1.5" label="Threshold" /> + </inputs> + <outputs> + <data format="bed" name="outputFile" label="${tool.name} on ${on_string}" /> + </outputs> + <help> +**What it does** + +This tool is a CNVs caller using SVD-ZRPKM thresholding. + +**License and citation** + +This Galaxy tool is Copyright © 2014 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +You can use this tool only if you agree to the license terms of: `CoNIFER`_. + +.. _CoNIFER: http://conifer.sourceforge.net/ + +If you use this tool, please cite: + +- |Cuccuru2014|_ +- |Krumm2012|_. + +.. |Cuccuru2014| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2014) Orione, a web-based framework for NGS analysis in microbiology. *Bioinformatics* 30(13), 1928-1929 +.. _Cuccuru2014: http://bioinformatics.oxfordjournals.org/content/30/13/1928 +.. |Krumm2012| replace:: Krumm, N., Sudmant, P. H., Ko, A., O'Roak, B. J., Malig, M., Coe, B. P., NHLBI Exome Sequencing Project, Quinlan, A. R., Nickerson, D. A., Eichler, E. E. (2012) Copy number variation detection and genotyping from exome sequence data. *Genome Res.* 22(8), 1525-1532 +.. _Krumm2012: http://genome.cshlp.org/content/22/8/1525 + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btu135</citation> + <citation type="doi">10.1101/gr.138115.112</citation> + </citations> +</tool> |
b |
diff -r 000000000000 -r 496c4b45765d conifer3/c_plotcalls.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/conifer3/c_plotcalls.xml Fri Mar 17 04:29:42 2017 -0400 |
b |
@@ -0,0 +1,57 @@ +<tool id="conifer_plotcalls" name="CoNIFER plot caller" version="1.0"> + <description>cnv caller</description> + <command interpreter="perl"> + conifer_wrapper.pl --input $input --regions $plot_option.regions --html_file $html_file --html_folder $html_file.files_path + #if str($plot_option.plot_option_select) == "single" + --sample "$plot_option.sample" + #else + --window $plot_option.window + --multiple + #end if + 2>&1 + </command> + <inputs> + <param format="hdf5" name="input" type="data" label="CoNIFER output analysis file (hdf5)"/> + <conditional name="plot_option"> + <param name="plot_option_select" type="select" label="Single or multiple regions?"> + <option value="single" selected="true">Single region</option> + <option value="multiple">Multiple regions</option> + </param> + <when value="single"> + <param name="regions" type="text" size="50" optional="false" label="Region" help="Region for the plot (i.e. chr1:1000-2000)"/> + <param name="sample" type="text" size="50" optional="true" value="" label="Sample" help="Sample names to be highlighted in the plot (optional)"/> + </when> + <when value="multiple"> + <param format="tabular" name="regions" type="data" label="CoNIFER output calls file (tabular)"/> + <param name="window" size="10" type="integer" value="50" label="Window"/> + </when> + </conditional> + </inputs> + <outputs> + <data format="html" name="html_file" label="${tool.name} on ${on_string}" /> + </outputs> +<help> +**What it does** + +Draw a HTML page with CNV plots. It accepts as input either a single region or a file with CoNIFER calls. + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2014|_. + +.. |Cuccuru2014| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2014) Orione, a web-based framework for NGS analysis in microbiology. *Bioinformatics*, accepted +.. _Cuccuru2014: http://bioinformatics.oxfordjournals.org/content/early/2014/03/10/bioinformatics.btu135 + +This tool uses CoNIFER, which is licensed separately. Please cite: + +- |Krumm2012|_. + +.. |Krumm2012| replace:: (Krumm et al., 2012) Copy number variation detection and genotyping from exome sequence data. Genome research. +.. _Krumm2012: http://genome.cshlp.org/content/22/8/1525.full?sid=4a7a300a-b960-4544-8611-effc3315411c + </help> +</tool> |
b |
diff -r 000000000000 -r 496c4b45765d conifer3/c_rpkm.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/conifer3/c_rpkm.xml Fri Mar 17 04:29:42 2017 -0400 |
b |
@@ -0,0 +1,82 @@ +<tool id="conifer_rpkm" name="CoNIFER RPKM and Analyze" version="1.0"> + <description></description> + <command> + mkdir rpkm_dir; +#for $input_bam in $inputs: + #if str($input_bam.label.value) != "": + ln -s ${input_bam.inputFile} ${$input_bam.label.value}.bam; + ln -s ${input_bam.inputFile.metadata.bam_index} ${$input_bam.label.value}.bam.bai; + python $__tool_directory__/conifer.py rpkm + --probes $probesFile + --input ${$input_bam.label.value}.bam + --output rpkm_dir/${$input_bam.label.value}.txt; + #else + ln -s ${input_bam.inputFile} ${input_bam.inputFile.dataset.name}.bam; + ln -s ${input_bam.inputFile.metadata.bam_index} ${input_bam.inputFile.dataset.name}.bam.bai; + python $__tool_directory__/conifer.py rpkm + --probes $probesFile + --input ${input_bam.inputFile.dataset.name}.bam + --output rpkm_dir/${input_bam.inputFile.dataset.name}.txt; + #end if +#end for + python $__tool_directory__/conifer.py analyze + --probes $probesFile + --rpkm_dir rpkm_dir + --output $outputFile + --svd $svd + 2>&1; + </command> + <inputs> + <param format="bed" name="probesFile" type="data" label="Target regions (BED)" /> + <repeat name="inputs" title="BAM" min="2" help="Need to add more files? Use controls below."> + <param format="bam" name="inputFile" type="data" label="BAM file" /> + <param name="label" type="text" size="30" value="" label="Label" help="Label to use in the output. If not given, the dataset name will be used instead"> + <validator type="regex" message="Spaces are not allowed">^\S*$</validator> + </param> + </repeat> + <param name="svd" type="integer" value="2" label="SVD" /> + </inputs> + <outputs> + <data format="hdf5" name="outputFile" label="${tool.name} on ${on_string}: hdf5" /> + </outputs> + <help> + +.. class:: warningmark + +**Warning about SVD** + +The SVD value must be less than the number of samples. + +E.g.: if the number of samples is 3, the value of SVD must be 2 or less. + +**What it does** + +This tool calculates RPKM for each BAM file and analyzes them, afterward it creates a unique HDF5 file containing all SVD-ZRPKM values, +probes and samples for downstream analysis with 'CoNIFER CNV caller' and 'CoNIFER plot'. + +**License and citation** + +This Galaxy tool is Copyright © 2015 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +You can use this tool only if you agree to the license terms of: `CoNIFER`_. + +.. _CoNIFER: http://conifer.sourceforge.net/ + +If you use this tool, please cite: + +- |Cuccuru2014|_ +- |Krumm2012|_. + +.. |Cuccuru2014| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2014) Orione, a web-based framework for NGS analysis in microbiology. *Bioinformatics* 30(13), 1928-1929 +.. _Cuccuru2014: http://bioinformatics.oxfordjournals.org/content/30/13/1928 +.. |Krumm2012| replace:: Krumm, N., Sudmant, P. H., Ko, A., O'Roak, B. J., Malig, M., Coe, B. P., NHLBI Exome Sequencing Project, Quinlan, A. R., Nickerson, D. A., Eichler, E. E. (2012) Copy number variation detection and genotyping from exome sequence data. *Genome Res.* 22(8), 1525-1532 +.. _Krumm2012: http://genome.cshlp.org/content/22/8/1525 + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btu135</citation> + <citation type="doi">10.1101/gr.138115.112</citation> + </citations> +</tool> |
b |
diff -r 000000000000 -r 496c4b45765d conifer3/conifer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/conifer3/conifer.py Fri Mar 17 04:29:42 2017 -0400 |
[ |
b'@@ -0,0 +1,682 @@\n+#######################################################################\n+#######################################################################\n+# CoNIFER: Copy Number Inference From Exome Reads\n+# Developed by Niklas Krumm (C) 2012\n+# nkrumm@gmail.com\n+# \n+# homepage: http://conifer.sf.net\n+# This program is described in:\n+# Krumm et al. 2012. Genome Research. doi:10.1101/gr.138115.112\n+#\n+# This file is part of CoNIFER.\n+# CoNIFER is free software: you can redistribute it and/or modify\n+# it under the terms of the GNU General Public License as published by\n+# the Free Software Foundation, either version 3 of the License, or\n+# (at your option) any later version.\n+# \n+# This program is distributed in the hope that it will be useful,\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+# GNU General Public License for more details.\n+# \n+# You should have received a copy of the GNU General Public License\n+# along with this program. If not, see <http://www.gnu.org/licenses/>.\n+#######################################################################\n+#######################################################################\n+\n+import argparse\n+import os, sys, copy\n+import glob\n+import csv\n+import conifer_functions as cf\n+import operator\n+from tables import *\n+import numpy as np\n+\n+def CF_analyze(args):\n+\t# do path/file checks:\n+\ttry: \n+\t\t# read probes table\n+\t\tprobe_fn = str(args.probes[0])\n+\t\tprobes = cf.loadProbeList(probe_fn)\n+\t\tnum_probes = len(probes)\n+\t\tprint \'[INIT] Successfully read in %d probes from %s\' % (num_probes, probe_fn)\n+\texcept IOError as e: \n+\t\tprint \'[ERROR] Cannot read probes file: \', probe_fn\n+\t\tsys.exit(0)\n+\t\n+\ttry: \n+\t\tsvd_outfile_fn = str(args.output)\n+\t\th5file_out = openFile(svd_outfile_fn, mode=\'w\')\n+\t\tprobe_group = h5file_out.createGroup("/","probes","probes")\n+\texcept IOError as e: \n+\t\tprint \'[ERROR] Cannot open SVD output file for writing: \', svd_outfile_fn\n+\t\tsys.exit(0)\n+\t\n+\tif args.write_svals != "":\n+\t\tsval_f = open(args.write_svals,\'w\')\n+\t\n+\tif args.plot_scree != "":\n+\t\ttry:\n+\t\t\timport matplotlib\n+\t\t\tmatplotlib.use(\'Agg\')\n+\t\t\timport matplotlib.pyplot as plt\n+\t\t\timport pylab as P\n+\t\t\tfrom matplotlib.lines import Line2D\n+\t\t\tfrom matplotlib.patches import Rectangle\n+\t\texcept:\n+\t\t\tprint "[ERROR] One or more of the required modules for plotting cannot be loaded! Are matplotlib and pylab installed?"\n+\t\t\tsys.exit(0)\n+\t\t\n+\t\tplt.gcf().clear()\n+\t\tfig = plt.figure(figsize=(10,5))\n+\t\tax = fig.add_subplot(111)\n+\t\n+\trpkm_dir = str(args.rpkm_dir[0])\n+\trpkm_files = glob.glob(rpkm_dir + "/*")\n+\tif len(rpkm_files) == 0:\n+\t\tprint \'[ERROR] Cannot find any files in RPKM directory (or directory path is incorrect): \', rpkm_dir\n+\t\tsys.exit(0)\n+\telif len(rpkm_files) == 1:\n+\t\tprint \'[ERROR] Found only 1 RPKM file (sample). CoNIFER requires multiple samples (8 or more) to run. Exiting.\'\n+\t\tsys.exit(0)\n+\telif len(rpkm_files) < 8:\n+\t\tprint \'[WARNING] Only found %d samples... this is less than the recommended minimum, and CoNIFER may not analyze this dataset correctly!\' % len(rpkm_files)\n+\telif len(rpkm_files) <= int(args.svd):\n+\t\tprint \'[ERROR] The number of SVD values specified (%d) must be less than the number of samples (%d). Either add more samples to the analysis or reduce the --svd parameter! Exiting.\' % (len(rpkm_files), int(args.svd))\n+\t\tsys.exit(0)\n+\telse:\n+\t\tprint \'[INIT] Found %d RPKM files in %s\' % (len(rpkm_files), rpkm_dir)\n+\t\n+\t# read in samples names and generate file list\n+\tsamples = {}\n+\tfor f in rpkm_files:\n+\t\ts = \'.\'.join(f.split(\'/\')[-1].split(\'.\')[0:-1])\n+\t\tprint "[INIT] Mapping file to sampleID: %s --> %s" % (f, s)\n+\t\tsamples[s] = f\n+\t\n+\t#check uniqueness and total # of samples\n+\tif len(set(samples)) != len(set(rpkm_files)):\n+\t\tprint \'[ERROR] Could not successfully derive sample names from RPKM filenames. There are probably non-unique sample names! Please rename files using <sampleID>.txt format!\'\n+\t\tsys'..b'PKM values")\n+analyze_parser.add_argument(\'--svd\', metavar=\'12\', type=int, nargs=\'?\', default = 12,help="Number of components to remove")\n+analyze_parser.add_argument(\'--min_rpkm\', metavar=\'1.00\', type=float, nargs="?", default = 1.00,help="Minimum population median RPKM per probe.")\n+analyze_parser.add_argument(\'--write_svals\', metavar=\'SingularValues.txt\', type=str, default= "", help="Optional file to write singular values (S-values). Used for Scree Plot.")\n+analyze_parser.add_argument(\'--plot_scree\', metavar=\'ScreePlot.png\', type=str, default= "", help="Optional graphical scree plot. Requires matplotlib.")\n+analyze_parser.add_argument(\'--write_sd\', metavar=\'StandardDeviations.txt\', type=str, default= "", help="Optional file with sample SVD-ZRPKM standard deviations. Used to filter noisy samples.")\n+analyze_parser.set_defaults(func=CF_analyze)\n+\n+# export command\n+export_parser= subparsers.add_parser(\'export\', help=\'Export SVD-ZRPKM values from a HDF5 file to bed or vcf format.\')\n+export_parser.add_argument(\'--input\',\'-i\',action=\'store\', required=True, metavar=\'CoNIFER_SVD.hdf5\',help="HDF5 file from CoNIFER \'analyze\' step")\n+export_parser.add_argument(\'--output\',\'-o\',action=\'store\', required=False, default=\'.\', metavar=\'output.bed\',help="Location of output file[s]. When exporting multiple samples, top-level directory of this path will be used.")\n+export_parser.add_argument(\'--sample\',\'-s\',action=\'store\', required=False, metavar=\'sampleID\', default=\'all\', nargs="+",help="SampleID or comma-separated list of sampleIDs to export. Default: export all samples")\n+export_parser.set_defaults(func=CF_export)\n+\n+# plot command\n+plot_parser= subparsers.add_parser(\'plot\', help=\'Plot SVD-ZRPKM values using matplotlib\')\n+plot_parser.add_argument(\'--input\',\'-i\',action=\'store\', required=True, metavar=\'CoNIFER_SVD.hdf5\',help="HDF5 file from CoNIFER \'analyze\' step")\n+plot_parser.add_argument(\'--region\',action=\'store\', required=True, metavar=\'chr#:start-stop\',help="Region to plot")\n+plot_parser.add_argument(\'--output\',action=\'store\', required=True, metavar=\'image.png\',help="Output path and filetype. PDF, PNG, PS, EPS, and SVG are supported.")\n+plot_parser.add_argument(\'--sample\',action=\'store\', required=False, metavar=\'sampleID\',nargs="+",default=\'none\',help="Sample[s] to highlight. The following optional color spec can be used: <color>:<sampleID>. Available colors are r,b,g,y,c,m,y,k. The unsmoothed SVD-ZRPKM values for the first sample in this list will be drawn. Default: No samples highlighted.")\n+plot_parser.set_defaults(func=CF_plot)\n+\n+# make calls command\n+call_parser= subparsers.add_parser(\'call\', help=\'Very rudimentary caller for CNVs using SVD-ZRPKM thresholding.\')\n+call_parser.add_argument(\'--input\',\'-i\',action=\'store\', required=True, metavar=\'CoNIFER_SVD.hdf5\',help="HDF5 file from CoNIFER \'analyze\' step")\n+call_parser.add_argument(\'--output\',action=\'store\', required=True, metavar=\'calls.txt\',help="Output file for calls")\n+call_parser.add_argument(\'--threshold\', metavar=\'1.50\', type=float, nargs=\'?\', required=False, default = 1.50,help="+/- Threshold for calling (minimum SVD-ZRPKM)")\n+call_parser.set_defaults(func=CF_call)\n+\n+# plotcalls command\n+plotcalls_parser= subparsers.add_parser(\'plotcalls\', help=\'Make basic plots from call file from "call" command.\')\n+plotcalls_parser.add_argument(\'--input\',\'-i\',action=\'store\', required=True, metavar=\'CoNIFER_SVD.hdf5\',help="HDF5 file from CoNIFER \'analyze\' step")\n+plotcalls_parser.add_argument(\'--calls\',action=\'store\', required=True, metavar=\'calls.txt\',help="File with calls from \'call\' command.")\n+plotcalls_parser.add_argument(\'--outputdir\',action=\'store\', required=True, metavar=\'/path/to/directory\',help="Output directory for plots")\n+plotcalls_parser.add_argument(\'--window\',action=\'store\', required=False, metavar=\'50\',default=50,help="In exons, the amount of padding to plot around each call")\n+plotcalls_parser.set_defaults(func=CF_plotcalls)\n+\n+args = parser.parse_args()\n+args.func(args)\n' |
b |
diff -r 000000000000 -r 496c4b45765d conifer3/conifer_functions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/conifer3/conifer_functions.py Fri Mar 17 04:29:42 2017 -0400 |
[ |
b'@@ -0,0 +1,370 @@\n+#######################################################################\n+#######################################################################\n+# CoNIFER: Copy Number Inference From Exome Reads\n+# Developed by Niklas Krumm (C) 2012\n+# nkrumm@gmail.com\n+# \n+# homepage: http://conifer.sf.net\n+# This program is described in:\n+# Krumm et al. 2012. Genome Research. doi:10.1101/gr.138115.112\n+#\n+# This file is part of CoNIFER.\n+# CoNIFER is free software: you can redistribute it and/or modify\n+# it under the terms of the GNU General Public License as published by\n+# the Free Software Foundation, either version 3 of the License, or\n+# (at your option) any later version.\n+# \n+# This program is distributed in the hope that it will be useful,\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+# GNU General Public License for more details.\n+# \n+# You should have received a copy of the GNU General Public License\n+# along with this program. If not, see <http://www.gnu.org/licenses/>.\n+#######################################################################\n+#######################################################################\n+\n+import csv\n+from tables import *\n+import numpy as np\n+import operator\n+\n+class rpkm_value(IsDescription):\n+\tprobeID = UInt32Col(pos=0)\n+\trpkm = FloatCol(pos=1)\n+\n+class probe(IsDescription):\n+\tprobeID = \tUInt32Col(pos=0)\n+\tstart = \tUInt32Col(pos=1) # start of probe\n+\tstop = \t\tUInt32Col(pos=2) # stop of probe\n+\tname = \t\tStringCol(20,pos=3) # 20-character String\n+\n+def chrInt2Str(chromosome_int):\n+\tif int(chromosome_int) == 23:\n+\t\treturn \'chrX\'\n+\telif int(chromosome_int) == 24:\n+\t\treturn \'chrY\' \n+\telse:\n+\t\treturn \'chr\' + str(chromosome_int)\n+\n+\n+def chrStr2Int(chromosome_str):\n+\tchr = chromosome_str.replace(\'chr\',\'\')\n+\tif chr == \'X\':\n+\t\treturn 23\n+\telif chr == \'Y\':\n+\t\treturn 24 \n+\telse:\n+\t\treturn int(chr)\n+\n+def parseLocString(locstr):\n+\ttry:\n+\t\tchr,locstr = locstr.split(":")\n+\t\tstart, stop = locstr.split("-")\n+\texcept:\n+\t\tchr, start, stop = locstr.split("\\t")\n+\t\n+\tchr = chrStr2Int(chr)\t\n+\tstart = int(start)\n+\tstop = int(stop)\n+\treturn (chr,start,stop)\t\n+\n+def zrpkm(rpkm,median,sd):\n+\treturn (rpkm - median) / sd\n+\n+\n+\n+class sample(IsDescription):\n+\tsampleID = \tStringCol(100,pos=0) # 20-char string (sampleID)\n+\n+def loadProbeList(CF_probe_filename):\n+\t# Load data files\n+\tprobefile = open(CF_probe_filename, \'rb\')\n+\ts = csv.Sniffer()\n+\theader = s.has_header(probefile.read(1024))\n+\tprobefile.seek(0)\n+\tdialect = s.sniff(probefile.read(1024))\n+\tprobefile.seek(0)\n+\tif header:\n+\t\tr = csv.DictReader(probefile, dialect=dialect)\n+\telse:\n+\t\tr = csv.DictReader(probefile, dialect=dialect, fieldnames=[\'chr\',\'start\',\'stop\',\'name\'])\n+\t\n+\tprobes = []\n+\tprobeID = 1\n+\tfor row in r:\n+\t\tprobes.append({\'probeID\': probeID, \'chr\':chrStr2Int(row[\'chr\']),\'start\':int(row[\'start\']),\'stop\':int(row[\'stop\']), \'name\':row[\'name\']})\n+\t\tprobeID +=1\n+\t\n+\tif len(probes) == 0:\n+\t\traise Exception("No probes in probe file")\n+\t\t\n+\treturn probes\n+\n+\n+def export_sample(h5file_in,sample,probes,outfile_f):\n+\tdt = np.dtype([(\'chr\',\'|S10\'),(\'start\', \'<u4\'), (\'stop\', \'<u4\'), (\'name\', \'|S20\'),(\'SVDZRPKM\',np.float)])\n+\tfor chr in h5file_in.root:\n+\t\tif chr._v_title in (\'probes\',\'samples\'):\n+\t\t\tcontinue\n+\t\t\n+\t\tout_data = np.empty(len(probes[chr._v_title]),dtype=dt)\n+\t\tout_data["SVDZRPKM"] = chr._f_getChild("sample_" + sample).read(field=\'rpkm\')\n+\t\tout_data["chr"] = np.repeat(chr._v_title,len(out_data))\n+\t\tout_data["start"] = probes[chr._v_title]["start"]\n+\t\tout_data["stop"] = probes[chr._v_title]["stop"]\n+\t\tout_data["name"] = probes[chr._v_title]["name"]\n+\t\tnp.savetxt(outfile_f, out_data,fmt=["%s","%d","%d","%s","%f"], delimiter="\\t")\n+\n+\n+\n+def plotGenes(axis, rpkm_data, levels=5,x_pos=-2,text_pos=\'right\',line_spacing=0.1,text_offset=0.25,data_range=None):\n+\tfrom matplotlib.lines import Line2D\n+\tcounter = 0\n+\tprev_gene = ""\n+\tif data_range is not None:\n+\t\texon_s'..b'e = self.h5file.root.samples.samples\n+\t\t\n+\tdef __del__(self):\n+\t\tself.h5file.close()\n+\t\n+\tdef getExonValuesByExons(self, chromosome, start_exon, stop_exon, sampleList=None,genotype=False):\n+\t\t\n+\t\tprobe_tbl = self.h5file.root.probes._f_getChild("probes_chr" + str(chromosome))\n+\t\t#table_rows = probe_tbl.getWhereList(\'(start >= %d) & (stop <= %d)\' % (start,stop))\n+\t\tstart_exon = max(start_exon,0)\n+\t\tstop_exon = min(stop_exon, probe_tbl.nrows)\n+\t\t#print start_exon, stop_exon\n+\t\ttable_rows = np.arange(start_exon,stop_exon,1)\n+\t\tdata_tbl = self.h5file.root._f_getChild("chr" + str(chromosome))\n+\t\t\n+\t\tif sampleList == None:\n+\t\t\tnum_samples = data_tbl._v_nchildren\n+\t\t\tsamples = data_tbl\t\n+\t\telse:\n+\t\t\tnum_samples = len(sampleList)\n+\t\t\tsamples = [data_tbl._f_getChild("sample_" + s) for s in sampleList]\n+\t\t\n+\t\tdata = np.empty([num_samples,len(table_rows)],dtype=np.float)\n+\t\t\n+\t\tout_sample_list = []\n+\t\tcnt = 0\n+\t\tfor sample_tbl in samples:\n+\t\t\td = sample_tbl.readCoordinates(table_rows,field="rpkm")\n+\t\t\tdata[cnt,:] = d\n+\t\t\tcnt +=1\n+\t\t\tout_sample_list.append(sample_tbl.title)\n+\t\t\n+\t\td = rpkm_data()\n+\t\tif genotype: # return average #todo-- implement median and SD?\n+\t\t\td.rpkm = data.transpose().mean(axis=0)\n+\t\t\td.isGenotype = True\n+\t\telse: #return all data points\n+\t\t\td.rpkm = data.transpose()\n+\t\td.samples = out_sample_list\n+\t\td.exons = probe_tbl.readCoordinates(table_rows)\n+\t\t\n+\t\treturn d\n+\t\n+\tdef getExonValuesByRegion(self, chromosome, start=None, stop=None, sampleList=None,genotype=False):\n+\t\tprobe_tbl = self.h5file.root.probes._f_getChild("probes_chr" + str(chromosome))\n+\t\tif (start is not None) and (stop is not None):\n+\t\t\ttable_rows = probe_tbl.getWhereList(\'(start >= %d) & (stop <= %d)\' % (start,stop))\n+\t\telse:\n+\t\t\ttable_rows = probe_tbl.getWhereList(\'(start >= 0) & (stop <= 1000000000)\')\n+\t\t\n+\t\tdata_tbl = self.h5file.root._f_getChild("chr" + str(chromosome))\n+\t\t\n+\t\tif sampleList == None:\n+\t\t\tnum_samples = data_tbl._v_nchildren\n+\t\t\tsamples = data_tbl\t\n+\t\telse:\n+\t\t\tnum_samples = len(sampleList)\n+\t\t\tsamples = [data_tbl._f_getChild("sample_" + s) for s in sampleList]\n+\t\t\n+\t\tdata = np.empty([num_samples,len(table_rows)],dtype=np.float)\n+\t\t\n+\t\tout_sample_list = []\n+\t\tcnt = 0\n+\t\tfor sample_tbl in samples:\n+\t\t\td = sample_tbl.readCoordinates(table_rows,field="rpkm")\n+\t\t\tdata[cnt,:] = d\n+\t\t\tcnt +=1\n+\t\t\tout_sample_list.append(sample_tbl.title)\n+\t\t\n+\t\td = rpkm_data()\n+\t\tif genotype: # return average #todo-- implement median and SD?\n+\t\t\td.rpkm = data.transpose().mean(axis=0)\n+\t\t\td.isGenotype = True\n+\t\telse: #return all data points\n+\t\t\td.rpkm = data.transpose()\n+\t\td.samples = out_sample_list\n+\t\td.exons = probe_tbl.readCoordinates(table_rows)\n+\t\t\n+\t\treturn d\n+\t\n+\tdef getSampleList(self,cohort=None,sex=None,ethnicity=None,custom=None):\n+\t\t"""Return a list of available samples in the current data file. Specifying no arguments will return all available samples"""\n+\t\t\n+\t\treadWhereStr = ""\n+\t\tif custom != None:\n+\t\t\treadWhereStr = custom\n+\t\telse:\n+\t\t\tif cohort != None:\n+\t\t\t\tif isinstance(cohort,list):\n+\t\t\t\t\tfor c in cohort:\n+\t\t\t\t\t\treadWhereStr += "(cohort==\'%s\') | " % c\n+\t\t\t\t\treadWhereStr = readWhereStr.strip(" |")\n+\t\t\t\t\treadWhereStr += " & "\n+\t\t\t\telse:\n+\t\t\t\t\treadWhereStr += "(cohort==\'%s\') " % cohort\n+\t\t\tif sex != None:\n+\t\t\t\tif sex not in [\'M\',\'F\']:\t\n+\t\t\t\t\tsex = sex.upper()[0]\n+\t\t\t\treadWhereStr += " (sex==\'%s\') &" % sex\n+\t\t\tif ethnicity != None:\n+\t\t\t\treadWhereStr += " (ethnicity==\'%s\') &" % ethnicity\n+\t\t\t\n+\t\t\treadWhereStr = readWhereStr.strip(" &") # remove leading or trailing characters\n+\t\tif readWhereStr != "":\n+\t\t\t#print readWhereStr\n+\t\t\tsampleIDs = self.sample_table.readWhere(readWhereStr,field=\'sampleID\')\n+\t\telse:\n+\t\t\tsampleIDs = self.sample_table.read(field=\'sampleID\')\n+\t\t\n+\t\treturn sampleIDs\n+\t\n+\tdef getExonIDs(self, chromosome, start, stop):\n+\t\tprobe_tbl = self.h5file.root.probes._f_getChild("probes_chr" + str(chromosome))\n+\t\texons = probe_tbl.getWhereList(\'(start >= %d) & (stop <= %d)\' % (start,stop))\n+\t\treturn exons\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r 496c4b45765d conifer3/conifer_wrapper.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/conifer3/conifer_wrapper.pl Fri Mar 17 04:29:42 2017 -0400 |
[ |
@@ -0,0 +1,51 @@ +#!/usr/bin/env perl + +# Execute CoNIFER plotcalls and +# returns a HTML page with links to PNG plots + +use strict; +use warnings; +use Getopt::Long; +use File::Basename; +use File::Path qw(make_path remove_tree); + +my $command; +my $dir=$ENV{'CONIFER_PATH'}; + +our ($multiple, $input, $regions, $sample, $window, $html_file, $html_folder, $verbose); + +GetOptions('multiple'=>\$multiple, 'input=s'=>\$input, 'regions=s'=>\$regions, + 'sample:s'=>\$sample, 'window:i'=>\$window, 'verbose'=>\$verbose, + 'html_file=s'=>\$html_file, 'html_folder=s'=>\$html_folder); + +make_path($html_folder); + +# Build command +if ($multiple){ + # Reformat file with regions as required by CoNIFER plotcalls + system("awk '{print \$5,\$1,\$2,\$3,\$4}' OFS=\"\t\" $regions > regions_sorted"); + + $command = "python ".$dir."/conifer.py plotcalls --input $input --calls regions_sorted --window $window --outputdir $html_folder 2>&1"; +}else{ + my $sample_command = ($sample eq "") ? "" : "--sample $sample"; + my $plot_name = $regions; + $plot_name =~ s/[:-]/_/g; + $command = "python ".$dir."/conifer.py plot --input $input --region $regions $sample_command --output $html_folder/$plot_name.png 2>&1"; +} + +# Run CoNIFER +system($command); +$verbose and print $command,"\n"; + +# Write HTML file +open(HTML, ">$html_file"); +print HTML "<html><head><title>CoNIFER: Copy Number Analysis for Targeted Resequencing</title></head><body><h3>CoNIFER Output Files:</h3><p><ul>\n"; +opendir(DIR, $html_folder); + +my @FILES= grep { /png$/ } readdir(DIR); +closedir(DIR); +foreach my $file (@FILES) { + print HTML "<li><a href=$file>$file</a><img src=\"$file\" height=\"50\" width=\"100\"></li>\n"; +} +print HTML "</ul></p></body></html>\n"; +close(HTML); |