Mercurial > repos > proteore > proteore_id_converter
diff id_converter.xml @ 0:c83e45dab5c9 draft
planemo upload commit fa7e64b910f0f92c5f3e160536ace963c7ab0cba-dirty
| author | proteore |
|---|---|
| date | Tue, 21 Aug 2018 09:06:21 -0400 |
| parents | |
| children | e4d9a4ab1930 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/id_converter.xml Tue Aug 21 09:06:21 2018 -0400 @@ -0,0 +1,246 @@ +<tool id="IDconverter" name="ID Converter" version="0.1.0"> + <description>convert public database identifiers + </description> + <requirements> + <requirement type="package" version="3.4.1">R</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command interpreter="Rscript"> + $__tool_directory__/id_converter_UniProt.R + --id_type="$species.idtypein" + #if $input.ids == "text" + --input="$input.txt" + --input_type="list" + #else + --input="$input.file" + --column_number="$input.ncol" + --header="$input.header" + --input_type="file" + #end if + --target_ids="$species.idto.idtypeout" + --output="$output" + --ref_file="${ filter( lambda x: str( x[0] ) == str( $species.mapping_file ), $__app__.tool_data_tables['id_mapping_file'].get_fields() )[0][-1] }" + + </command> + <inputs> + <conditional name="input" > + <param name="ids" type="select" label="Provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" > + <option value="text">Copy/paste your identifiers</option> + <option value="file" selected="true">Input file containing your identifiers</option> + </param> + <when value="text" > + <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by "," into the form field, for example: P31946,P62258' > + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + <add source=" " target=""/> + <add source="
" target=""/> + <add source="
" target=""/> + <add source="	" target=""/> + </mapping> + </sanitizer> + </param> + </when> + <when value="file" > + <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of IDs" help="" /> + <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" /> + <param name="ncol" type="text" value="c1" label="The column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> + </when> + </conditional> + <conditional name="species"> + <param name="mapping_file" type="select" label="Select species for ID conversion" > + <options from_data_table="id_mapping_file"/> + <option value="human_id_mapping"></option> + <option value="mouse_id_mapping"></option> + </param> + <when value="human_id_mapping"> + <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" > + <option value="neXtProt" >neXtProt ID (e.g. NX_P31946)</option> + <option value="UniProt.AC" selected="True" >Uniprot accession number (e.g. P31946)</option> + <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option> + <option value="GeneID" >Entrez gene ID (e.g. 7529)</option> + <option value="RefSeq" >RefSeq (NCBI) protein (e.g. NP_003395.1)</option> + <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option> + <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option> + <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option> + <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option> + <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option> + <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option> + <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option> + <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option> + <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option> + <option value="BioGrid" >BioGrid (e.g. 113361)</option> + <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option> + <option value="KEGG" >KEGG (e.g. hsa:7529)</option> + </param> + <section name="idto" title="Target type of IDs" expanded="True" > + <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" > + <option value="neXtProt" >neXtProt ID (e.g. NX_P31946)</option> + <option value="UniProt.AC" >Uniprot accession number (e.g. P31946)</option> + <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option> + <option value="GeneID" >Entrez gene ID (e.g. 7529)</option> + <option value="RefSeq" >RefSeq (NCBI) protein (e.g. NP_003395.1)</option> + <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option> + <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option> + <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option> + <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option> + <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option> + <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option> + <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option> + <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option> + <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option> + <option value="BioGrid" >BioGrid (e.g. 113361)</option> + <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option> + <option value="KEGG" >KEGG (e.g. hsa:7529)</option> + </param> + </section> + </when> + <when value="mouse_id_mapping"> + <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" > + <option value="UniProt.AC" selected="True" >Uniprot accession number (e.g. P31946)</option> + <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option> + <option value="GeneID" >Entrez gene ID (e.g. 7529)</option> + <option value="RefSeq" >RefSeq (NCBI) protein (e.g. NP_003395.1)</option> + <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option> + <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option> + <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option> + <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option> + <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option> + <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option> + <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option> + <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option> + <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option> + <option value="BioGrid" >BioGrid (e.g. 113361)</option> + <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option> + <option value="KEGG" >KEGG (e.g. hsa:7529)</option> + </param> + <section name="idto" title="Target type of IDs" expanded="True" > + <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" > + <option value="UniProt.AC" >Uniprot accession number (e.g. P31946)</option> + <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option> + <option value="GeneID" >Entrez gene ID (e.g. 7529)</option> + <option value="RefSeq" >RefSeq (NCBI) protein (e.g. NP_003395.1)</option> + <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option> + <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option> + <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option> + <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option> + <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option> + <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option> + <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option> + <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option> + <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option> + <option value="BioGrid" >BioGrid (e.g. 113361)</option> + <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option> + <option value="KEGG" >KEGG (e.g. hsa:7529)</option> + </param> + </section> + </when> + </conditional> + </inputs> + <outputs> + <data name="output" format="tabular" /> + </outputs> + <tests> + <test> + <conditional name="input" > + <param name="ids" value="file"/> + <param name="file" value="FKW_Lacombe_et_al_2017_OK.txt" /> + <param name="header" value="true" /> + <param name="ncol" value="c1"/> + </conditional> + <conditional name="species"> + <param name="mapping_file" value="human_id_mapping"/> + <param name="idtypein" value="UniProt.AC"/> + <section name="idto"> + <param name="idtypeout" value="neXtProt,UniProt.ID,GeneID,MIM,Ensembl" /> + </section> + </conditional> + <output name="output" value="ID_Converted_FKW_Lacombe_et_al_2017_OK.txt" /> + </test> + </tests> + <help><![CDATA[ +This tool converts a list of IDs to another identifier type, select the source and target type from the dropdown menus above (see below supported source and target types). + +After choosing the type of input IDs, you can choose one or more types of IDs you would like to map to. + +If your input is a list of IDs or a single-column file, the tool will return a file containing the mapped IDs. Please, note that a "NA" is returned when there is no corresponding ID. + +If your input is a multiple-column file, the mapped IDs column(s) will be added at the end of the input file. + +**Available databases** + +* neXtProt ID (e.g. NX_P31946) + +* Uniprot accession number (e.g. P31946) + +* Uniprot ID (e.g 1433B_HUMAN) + +* Entrez gene ID (e.g. 7529) + +* RefSeq (NCBI) protein (e.g. NP_003395.1) + +* GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448) + +* Protein DataBank ID (e.g. 2BR9:A) + +* GOterms (Gene Ontology) ID (e.g. GO:0070062) + +* Protein Information Resource ID (e.g. S34755) + +* OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289) + +* Unigene ID (e.g. Hs.643544) + +* Ensembl gene ID (e.g. ENSG00000166913) + +* Ensembl transcript ID (e.g. ENST00000353703) + +* Ensembl protein ID (e.g. ENSP00000300161) + +----- + +.. class:: infomark + +This tool converts human IDs using the following source files: + +* HUMAN_9606_idmapping_selected.tab (Uniprot 02/07/2018) + Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ + +* HUMAN_9606_idmapping.dat (Uniprot 02/07/18) + Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ + +* nextprot_ac_list_all.txt (Nextprot released on 17/01/2018) + Downloaded from ftp://ftp.nextprot.org/pub/current_release/ac_lists/ + +----- + +.. class:: warningmark + +Accession numbers with an hyphen ("-") that normally correspond to isoform are considered +(and will therefore be treated) as similar to its canonical form. + +For example, "Q71U36-2" will be treated as "Q71U36". + +----- + +.. class:: infomark + +**Authors** + +T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR + +Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform + +This work has been partially funded through the French National Agency for Research (ANR) IFB project. + +Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool. + + ]]></help> + <citations> + </citations> +</tool>
