# HG changeset patch # User greg # Date 1485547395 18000 # Node ID a63b610ec5bd7972c7c3604c57281569736075b6 # Parent f041f777854023356b5df4bd09858082e3d19f60 Uploaded diff -r f041f7778540 -r a63b610ec5bd gene_family_classifier.xml --- a/gene_family_classifier.xml Tue Jan 24 10:32:56 2017 -0500 +++ b/gene_family_classifier.xml Fri Jan 27 15:03:15 2017 -0500 @@ -1,7 +1,7 @@ - - pipeline + + into precomputed orthologous gene family clusters - plant_tribes_gene_family_classifier + plant_tribes_gene_family_classifier @@ -71,9 +71,9 @@ #if $create_ortho_sequences: #if $create_corresponding_coding_sequences: - && echo "Sequences classified into pre-computed orthologous plant gene family clusters with corresponding coding sequences: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output + && echo "Sequences classified into precomputed orthologous plant gene family clusters with corresponding coding sequences: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output #else: - && echo "Sequences classified into pre-computed orthologous plant gene family clusters: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output + && echo "Sequences classified into precomputed orthologous plant gene family clusters: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output #end if && ls -al $orthogroups_fasta_src_dir | grep f >> $output && mv $orthogroups_fasta_src_dir/* $dest_dir || true @@ -119,7 +119,7 @@ - + @@ -182,18 +182,24 @@ This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of -complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It performs gene family -classification of the post processed de novo transcripts using either blastp (faster), HMMScan (slower but more sensitive to remote -homologs) or both (more exhaustive). +complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. This tool classifies gene +sequences into precomputed orthologous gene family clusters using either blastp (faster), HMMScan (slower but more sensitive +to remote homologs) or both (more exhaustive). + +This tool accepts any of the following as input. + +* the postprocessed assemblies produced by the **Postprocess de novo assembly transcripts into putative coding sequences** tool +* externally predicted coding sequences and their corresponding amino acid translations derived from a transcriptome assembly +* gene predictions from a sequenced genome ----- **Options** * **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data. - * **Protein clustering method** - One of GFam, OrthoFinder or OrthoMCL. - * **Protein classification method** - One of blastp, HMMScan or both. - * **SuperOrthogroups MCL clustering** - blastp e-value matrix between all pairs of orthogroups. + * **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters). + * **Protein classification method** - blastp (faster), HMMScan (slower but more sensative to the remote homologs) or both (more exhaustive). + * **Super Orthogroups** - Secondary MCL clusters of orthogroups. * **Minumum single copy taxa required in orthogroup** - Used with "Minumum single copy taxa required in orthogroup" configuration only. * **Minumum taxa required in single copy orthogroup** - Used with "Minumum single copy taxa required in orthogroup" configuration only. * **Corresponding coding sequences (CDS) fasta file** - Used only when selecting "Create orthogroup fasta files?". @@ -202,7 +208,7 @@ @unpublished{None, - author = {None}, + author = {Eric Wafula}, title = {None}, year = {None}, eprint = {None},