Mercurial > repos > galaxyp > validate_fasta_database
comparison validate_fasta_database.xml @ 0:8eb277262715 draft
planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
| author | galaxyp |
|---|---|
| date | Thu, 14 Sep 2017 16:14:56 -0400 |
| parents | |
| children | 5f6657ae8e81 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:8eb277262715 |
|---|---|
| 1 <tool id="validate_fasta_database" name="Validate FASTA Database" version="0.1.4"> | |
| 2 <requirements> | |
| 3 <requirement type="package" version="1.0">validate-fasta-database</requirement> | |
| 4 </requirements> | |
| 5 <stdio> | |
| 6 <exit_code range="1" level="fatal" description="Invalid FASTA headers detected, was asked to fail"/> | |
| 7 </stdio> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 validate-fasta-database | |
| 10 '$inFasta' | |
| 11 '$goodFastaOut' | |
| 12 '$badFastaOut' | |
| 13 '$crashIfInvalid' | |
| 14 '$checkIsProtein' | |
| 15 '$customLetters' | |
| 16 '$checkHasAccession' | |
| 17 '$minimumLength' | |
| 18 ]]></command> | |
| 19 <inputs> | |
| 20 <param type="data" name="inFasta" format="fasta" label="Select input FASTA dataset"/> | |
| 21 <param type="boolean" name="crashIfInvalid" | |
| 22 label="Fail job if invalid FASTA headers detected?" | |
| 23 value="false"/> | |
| 24 <param type="boolean" name="checkIsProtein" | |
| 25 label="Ensure that sequence is a valid amino acid sequence?" | |
| 26 help="Checks that sequence only contains the 20 essential amino | |
| 27 acids (and optional non-standard AAs), and checks that is not DNA or RNA" | |
| 28 value="true"/> | |
| 29 <param type="text" name="customLetters" value="" | |
| 30 label="Optional: add one letter codes for any non-standard amino acids that you are using. " | |
| 31 help="Anything that is not an upper case letter [A-Z] will be ignored."/> | |
| 32 <param type="boolean" name="checkHasAccession" | |
| 33 label="Only pass sequences with accession numbers?" | |
| 34 value="false"/> | |
| 35 <param type="integer" name="minimumLength" | |
| 36 label="Minimum length for sequences to pass" | |
| 37 value="0"/> | |
| 38 </inputs> | |
| 39 <outputs> | |
| 40 <data name="goodFastaOut" format="fasta" label="Validate FASTA ${on_string}: passed"/> | |
| 41 <data name="badFastaOut" format="fasta" label="Validate FASTA ${on_string}: failed"/> | |
| 42 </outputs> | |
| 43 <tests> | |
| 44 <!-- test general filtering --> | |
| 45 <test> | |
| 46 <param name="inFasta" value="fastaFilteringTest_IN.fasta"/> | |
| 47 <output name="goodFastaOut" file="fastaFilteringTest_OUT1.fasta" /> | |
| 48 <output name="badFastaOut" file="fastaFilteringTest_OUT2.fasta" /> | |
| 49 </test> | |
| 50 | |
| 51 <!--test filtering out genetic sequences and bad protein sequences--> | |
| 52 <test> | |
| 53 <param name="inFasta" value="geneticFiltering.in"/> | |
| 54 <param name="checkIsProtein" value="true"/> | |
| 55 <output name="goodFastaOut" file="geneticFilteringGood.out"/> | |
| 56 <output name="badFastaOut" file="geneticFilteringBad.out"/> | |
| 57 </test> | |
| 58 | |
| 59 <test> | |
| 60 <param name="inFasta" value="length5Filtering.in"/> | |
| 61 <param name="minimumLength" value="5"/> | |
| 62 <output name="goodFastaOut" file="length5FilteringGood.out"/> | |
| 63 <output name="badFastaOut" file="length5FilteringBad.out"/> | |
| 64 </test> | |
| 65 </tests> | |
| 66 <help> | |
| 67 | |
| 68 <![CDATA[ | |
| 69 **Notes** | |
| 70 | |
| 71 Takes a FASTA database and validates the headers using the Compomics (developers of SearchGUI and PeptideShaker) schema. | |
| 72 Custom FASTA databases may be in an invalid format, which causes SearchGUI to crash. | |
| 73 | |
| 74 **Output** | |
| 75 | |
| 76 The main output of this tool, "Validate FASTA: Passed Sequences", is a FASTA database that can be run through SearchGUI without error. | |
| 77 The failed sequences may be examined for typos and other errors. | |
| 78 | |
| 79 In addition, the tool will print the databases assigned by the Compomics utility (i.e., UniProt), for a quick check of the validity of the custom FASTA database. | |
| 80 | |
| 81 Sequences that may cause the tool to report an exception are those that are not valid examples of the following formats: | |
| 82 * UniProt, | |
| 83 * SwissProt (starts with ">sw|" or ">SW|") | |
| 84 * NCBI (starts with ">gi|" or ">GI|") | |
| 85 * Halobacterium from Max Planck (starts with "OE") | |
| 86 * H Influenza, from Novartis (starts with ">hflu_") | |
| 87 * C Trachomatis (starts with ">C.tr\_" or "C\_trachomatis\_") | |
| 88 * M Tuberculosis (starts with ">M. tub") | |
| 89 * Saccharomyces Genome Database (contains "SGDID") | |
| 90 * Genome translation (ex. ">dm345\_3L-sense [2343534-234353938]") | |
| 91 * Genome Annotation Framework for Flexible Analysis (GAFFA) (starts with ">GAFFA") | |
| 92 * UPS (contains "\_HUMAN\_UPS") | |
| 93 | |
| 94 Many sequences are reported as Generic, which may or may not allow for extraction of the accession number. | |
| 95 ]]> | |
| 96 </help> | |
| 97 <citations> | |
| 98 <citation type="bibtex"> | |
| 99 @misc{fastaValidationTool, | |
| 100 author = {The GalaxyP Team}, | |
| 101 date = {22 June 2017}, | |
| 102 title = {FASTA Database Validation Tool} | |
| 103 } | |
| 104 </citation> | |
| 105 </citations> | |
| 106 </tool> |
