Mercurial > repos > yating-l > ucsc_blat
diff blat.xml @ 5:70d7377d5e24 draft
planemo upload commit 7856c637db5bd4ea0b8b4db63e242618421a9cc6-dirty
| author | yating-l |
|---|---|
| date | Wed, 01 Feb 2017 17:16:02 -0500 |
| parents | 9e56efe1c371 |
| children | 6f06b6d68c0b |
line wrap: on
line diff
--- a/blat.xml Tue Jan 31 18:31:42 2017 -0500 +++ b/blat.xml Wed Feb 01 17:16:02 2017 -0500 @@ -1,5 +1,6 @@ +<?xml version="1.0"?> <tool id="ucsc_blat" name="UCSC BLAT Alignment Tool" version="1.0"> - <description>Rapidly align sequences to the genome</description> + <description>Standalone blat sequence search command line tool</description> <requirements> <requirement type="package" version="1.0">ucsc_tools_340_for_BLAT</requirement> </requirements> @@ -13,8 +14,46 @@ -mask=$mask '${database}' '${query}' - '${output}' - + output + && sort -k 10,10 -k 12,12n output > '${output_sorted}' + && pslReps -minAli=0.25 '${output_sorted}' output.reps.psl output.reps.psr + && faPolyASizes '${query}' query.polyA + #if $filter_param.filter =="yes" + && pslCDnaFilter + #if $filter_param.assembly_type == "native" + -localNearBest=0.001 + #if $filter_param.assembly_category == "finished" + -minId=0.95 + -minCover=0.25 + #else if $filter_param.assembly_category == "well-ordered" + -minId=0.95 + -minCover=0.15 + #else + -minId=0.94 + -minAlnSize=80 + #end if + #else + -localNearBest=0.010 + #if $filter_param.assembly_category == "finished" + -minId=0.35 + -minCover=0.25 + #else if $filter_param.assembly_category == "well-ordered" + -minId=0.35 + -minCover=0.15 + #else + -minId=0.33 + -minAlnSize=80 + #end if + #end if + -minQSize=20 + -ignoreIntrons + -repsAsMatch + -ignoreNs + -bestOverlap + -polyASizes=query.polyA + output.reps.psl + '${output_filtered}' + #end if ]]></command> <inputs> <param type="data" name="database" format="fasta" /> @@ -40,93 +79,27 @@ <option value="out">out - mask according to database.out RepeatMasker .out file</option> <option value="file.out">file.out - mask database according to RepeatMasker file.out</option> </param> - <!--<conditional name="database" format="fasta"> - <param type="select" name="database_type" format="text" multiple="false" label="database type" help="Choose your database type, the default is dna"> - <option value="dna">DNA sequence</option> - <option value="prot">protein sequence</option> - <option value="dnax">DNA sequence translated in six frames to protein</option> + <conditional name="filter_param"> + <param name="filter" type="select" label="Filter BLAT results with pslCDnaFilter"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> </param> - <when value="dna"> - <param type="integer" name="tileSize" value="11" min="1" max="12" label="tileSize" help="Sets the size of match that triggers an alignment. Usually between 8 and 12">tileSize</param> - <param name="minMatch" type="integer" value="2" label="Sets the number of tile matches. Usually set from 2 to 4. - Default is 2 for nucleotide, 1 for protein.">-minMatch</param> - <param name="minIdentity" type="integer" value="90" label="Sets minimum sequence identity (in percent). Default is - 90 for nucleotide searches, 25 for protein or translated - protein searches.">-minIdentity</param> - - </when> - <when value="prot"> - <param type="integer" name="tileSize" value="5" min="1" max="12" label="tileSize" help="Sets the size of match that triggers an alignment. Usually between 8 and 12">tileSize</param> - <param name="minMatch" type="integer" value="1" label="Sets the number of tile matches. Usually set from 2 to 4. - Default is 2 for nucleotide, 1 for protein.">-minMatch</param> - <param name="minIdentity" type="integer" value="25" label="Sets minimum sequence identity (in percent). Default is - 90 for nucleotide searches, 25 for protein or translated - protein searches.">-minIdentity</param> - </when> - </conditional> - <param type="select" name="query_type" format="text" multiple="false" label="query type" help="Choose your query type, the default is dna"> - <option value="dna">DNA sequence</option> - <option value="rna">RNA sequence</option> - <option value="prot">protein sequence</option> - <option value="dnax">DNA sequence translated in six frames to protein</option> - <option value="rnax">DNA sequence translated in three frames to protein</option> - </param> - <conditional name="settings"> - <param name="advanced" type="select" multiple="false" label="Specify advanced parameters"> - <option value="simple" selected="true">No, use program defaults. </option> - <option value="advanced">Yes, see full parameter list.</option> - </param> - <when value="advanced"> - <param name="mask" type="select" label="Mask out repeats" help="Alignments won't be started in masked region - but may extend through it in nucleotide searches. Masked areas - are ignored entirely in protein or translated searches."> - <option value="lower">lower - mask out lower-cased sequence</option> - <option value="upper">upper - mask out upper-cased sequence</option> - <option value="out">out - mask according to database.out RepeatMasker .out file</option> - <option value="file.out">file.out - mask database according to RepeatMasker file.out</option> - </param> - <param name="qmask" type="select" label="Mask out repeats in query sequence" help="Similar to -mask above, but - for query rather than target sequence."> - <option value="lower">lower - mask out lower-cased sequence</option> - <option value="upper">upper - mask out upper-cased sequence</option> - <option value="out">out - mask according to database.out RepeatMasker .out file</option> - <option value="file.out">file.out - mask database according to RepeatMasker file.out</option> - </param> - <param name="oneOff" type="integer" value="0" label="If set to 1, this allows one mismatch in tile and still triggers an alignment. Default is 0.">-oneOff</param> - <param name="minScore" type="integer" value="30" label="Sets minimum score. This is the matches minus the - mismatches minus some sort of gap penalty. Default is 30.">-minScore</param> - <param name="maxGap" type="integer" value="2" label="Sets the size of maximum gap between tiles in a clump. Usually - set from 0 to 3. Default is 2. Only relevent for minMatch > 1.">-maxGap</param> - <param name="minRepDivergence" type="integer" value="15" min="0" max="100" label="Minimum percent divergence of repeats to allow - them to be unmasked. Default is 15. Only relevant for - masking using RepeatMasker .out files.">-minRepDivergence</param> - <param name="noHead" type="boolean" value="false" label="Suppresses .psl header (so it's just a tab-separated file)." /> - <param name="dots" type="integer" value="0" label="Output dot every N sequences to show program's progress." /> - <param name="trimT" type="boolean" value="false" label="Trim leading poly-T." /> - <param name="trimHardA" type="boolean" value="false" label="Remove poly-A tail from qSize as well as alignments in - psl output." /> - <param name="fastMap" type="boolean" value="false" label="Run for fast DNA/DNA remapping - not allowing introns, - requiring high %ID. Query sizes must not exceed 5000." /> - <param name="fine" type="boolean" value="false" label="For high-quality mRNAs, look harder for small initial and - terminal exons. Not recommended for ESTs." /> - <param name="out" type="select" label="Output file format"> - <option value="psl" selected="true">psl - Default. Tab-separated format, no sequence</option> - <option value="pslx">pslx - Tab-separated format with sequence</option> - <option value="axt">axt - blastz-associated axt format</option> - <option value="maf">maf - multiz-associated maf format</option> - <option value="sim4">sim4 - similar to sim4 format</option> - <option value="wublast">wublast - similar to wublast format</option> - <option value="blast">blast - similar to NCBI blast format</option> - <option value="blast8">blast8- NCBI blast tabular format</option> - <option value="blast9">blast9 - NCBI blast tabular format with comments</option> - </param> - <param name="maxIntro" type="integer" value="750000" label="Sets maximum intron size. Default is 750000." /> - <param name="extendThroughN" type="boolean" value="false" label="Allows extension of alignment through large blocks of Ns." /> - </when> - </conditional>--> + <when value="yes"> + <param name="assembly_type" type="select" label="Choose your type of cDNA sequence"> + <option value="native">Same species</option> + <option value="xeno">Across species</option> + </param> + <param name="assembly_category" type="select" label="Choose your genome assembly category"> + <option value="finished">finished assemblies (high quality)</option> + <option value="well-ordered">well-ordered assemblies (well ordered, whole genome shotgun)</option> + <option value="low-coverage">low-coverage assemblies (low coverage (< 4x"), lots of contigs, N50 scaffold size < 1mb) </option> + </param> + </when> + </conditional> </inputs> <outputs> - <data format="psl" name="output"></data> + <data format="psl" name="output_sorted"></data> + <data format="psl" name="output_filtered"></data> </outputs> <tests> <test> @@ -136,9 +109,64 @@ <param name="query_type" value="rnax" /> <param name="noHead" value="true" /> <param name="mask" value="lower" /> - <output name="output" value="amaVit1_Gallus_gallus.psl" /> + <param name="filter" value="yes" /> + <param name="assembly_type" value="xeno" /> + <param name="assembly_category" value="well-ordered" /> + <output name="output_sorted" value="amaVit1_Gallus_gallus.psl" /> + <output name="output_filtered" value="amaVit1_Gallus_gallus_filtered.psl" /> </test> - </tests> + </tests> + <help> + <![CDATA[ +BLAT +==== +BLAT is a bioinformatics software a tool which performs rapid mRNA/DNA and cross-species protein alignments. + +blat (version: v340)- Standalone blat sequence search command line tool. +--------------------------------------------------------- +usage: +++++++ + blat database query [-ooc=11.ooc] output.psl +where: + database and query are each either a .fa, .nib or .2bit file, + or a list of these files with one file name per line. + -ooc=11.ooc tells the program to load over-occurring 11-mers from + an external file. This will increase the speed + by a factor of 40 in many cases, but is not required. + output.psl is the name of the output file. +documentation: +++++++++++++++ +See Blat documentation (http://genome.ucsc.edu/goldenPath/help/blatSpec.html) +Source code: +++++++++++++ +http://hgdownload.cse.ucsc.edu/admin/exe/ +pslCDnaFilter (version: v340) +--------------------------- +Filter cDNA alignments in psl format. Filtering criteria are comparative, selecting near best in genome alignments for each given cDNA and non-comparative, based only on the quality of an individual alignment. +usage: +++++++ + pslCDnaFilter [options] inPsl outPsl +Source code: +++++++++++++ +http://hgdownload.cse.ucsc.edu/admin/exe/ + +Licence +======= +Please note that commercial download and installation of the Blat and In-Silico PCR software may be licensed through Kent Informatics (http://www.kentinformatics.com). +]]> +</help> +<citations> + <citation type="bibtex">@article{kent2002blat, + title={BLAT—the BLAST-like alignment tool}, + author={Kent, W James}, + journal={Genome research}, + volume={12}, + number={4}, + pages={656--664}, + year={2002}, + publisher={Cold Spring Harbor Lab} + }</citation> +</citations> </tool>
