gffread: gffread.xml comparison

comparison gffread.xml @ 7:9c298cab341d draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gffread commit f40643d8b80299ebb84faebe92579321ac459746"

author	iuc
date	Sat, 25 Sep 2021 15:38:01 +0000
parents	bba49324f2fa
children

comparison

equal deleted inserted replaced

-:bba49324f2fa
+:9c298cab341d
-<tool id="gffread" name="gffread" version="@VERSION@.0">
+<tool id="gffread" name="gffread" version="@GALAXY_TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
 <description>Filters and/or converts GFF3/GTF2 records</description>
 <xrefs>
 <xref type="bio.tools">gffread</xref>
 </xrefs>
 <macros>
-<token name="@VERSION@">0.11.6</token>
+<!-- the version of this tool must not be lowered since in the past 2.x was used
+lets use small increments and hope that gffread catches up one day -->
+<token name="@GALAXY_TOOL_VERSION@">2.2.1.3</token>
+<token name="@TOOL_VERSION@">0.12.7</token>
+<token name="@VERSION_SUFFIX@">0</token>
 <xml name="fasta_output_select">
 <param name="fa_outputs" type="select" display="checkboxes" multiple="true" label="Select fasta outputs">
-<option value="-w exons.fa">fasta file with spliced exons for each GFF transcript (-w exons.fa)</option>
+<option value="-w exons.fa">fasta file with spliced exons for each GFF transcript (-w)</option>
-<option value="-x cds.fa">fasta file with spliced CDS for each GFF transcript (-x cds.fa)</option>
+<option value="-x cds.fa">fasta file with spliced CDS for each GFF transcript (-x)</option>
-<option value="-y pep.fa">protein fasta file with the translation of CDS for each record (-y pep.fa)</option>
+<option value="-y pep.fa">protein fasta file with the translation of CDS for each record (-y)</option>
 <option value="-W">for each fasta: record the exon coordinates projected onto the spliced sequence (-W)</option>
+<option value="-S">for protein fasta: use '*' instead of '.' as stop codon translation (-S)</option>
 </param>
 </xml>
 <xml name="ref_filtering_select">
 <param name="ref_filtering" type="select" display="checkboxes" multiple="true" label="reference based filters">
 <option value="-N">discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus, i.e. not GT-AG, GC-AG or AT-AC (-N)</option>
 <option value="-B">single-exon transcripts are also checked on the opposite strand (-B with -V)</option>
 -->
 </param>
 </xml>
 <xml name="trackname">
-<param name="tname" type="text" value="" optional="true" label="Trackname to use in the second column of each GFF output line" help="(-t track_name}">
+<param argument="-t" name="tname" type="text" value="" optional="true" label="Trackname to use in the second column of each GFF output line" help="">
 <validator type="regex">\w+</validator>
 </param>
 </xml>
 <xml name="merge_opts">
 <option value="-K">also collapse shorter, fully contained transcripts with fewer introns than the container (-K)</option>
 <option value="-Q">remove the containment restriction: multi-exon transcripts will be collapsed if just their introns match, while single-exon transcripts can partially overlap 80% (-Q)</option>
-<option value="-d dupinfo">output collapsing info (-d dupinfo)</option>
+<option value="-d dupinfo">output collapsing info (-d)</option>
 </xml>
 <xml name="cluster_opts">
 <option value="--force-exons"> make sure that the lowest level GFF features are printed as 'exon' features (--force-exons)</option>
 <option value="-Z">merge close exons into a single exon (for intron size &lt; 4) (-Z)</option>
 </xml>
 <expand macro="cluster_opts" />
 </param>
 </xml>
 </macros>
 <requirements>
-<requirement type="package" version="@VERSION@">gffread</requirement>
+<requirement type="package" version="@TOOL_VERSION@">gffread</requirement>
 </requirements>
+<version_command>gffread --version</version_command>
 <command detect_errors="aggressive">
 <![CDATA[
 #if $reference_genome.source == 'history':
 ln -s '$reference_genome.genome_fasta' genomeref.fa &&
 #end if
 gffread '$input'
+#if $input.ext.startswith("bed")
+--in-bed
+#end if
 #if $reference_genome.source == 'cached':
 -g '${reference_genome.fasta_indexes.fields.path}'
 #if $reference_genome.ref_filtering and str($reference_genome.ref_filtering) != '':
 #echo ' '.join(str($reference_genome.ref_filtering).split(','))
 #end if
 #if $reference_genome.fa_outputs and str($reference_genome.fa_outputs) != '':
 #echo ' ' + ' '.join(str($reference_genome.fa_outputs).split(','))
 #end if
 #end if
 #if $gffs.gff_fmt != 'none':
-#if $gffs.tname:
+#if $gffs.gff_fmt != 'bed' and $gffs.tname:
 -t '$gffs.tname'
 #end if
 #if $gffs.gff_fmt == 'gff':
+## TODO bug 'gft' -> 'gtf'
 #if $input.datatype.file_ext == 'gft':
 $gffs.ensembl
 #end if
-$gffs.output_cmd
-#elif $gffs.gff_fmt == 'gtf':
-$gffs.output_cmd
 #end if
-#end if
+#if $gffs.gff_fmt == 'gtf'
+-T
+#elif $gffs.gff_fmt == 'bed'
+--bed
+#end if
+-o output.$gffs.gff_fmt
+#end if
+## Missing options
+##
+## --ids
+## --nids
+## -l
+## --jmatch
+## --nc
+## --ignore-locus
+## -A -s (see above)
+##  --sort-alpha : chromosomes (reference sequences) are sorted alphabetically
+##  --sort-by : sort the reference sequences by the order in which their
+##       names are given in the <refseq.lst> file
+## Misc
+## --keep-exon-attrs : for -F option, do not attempt to reduce redundant
+## --attrs
+##      --keep-genes : in transcript-only mode (default), also preserve gene records
+##      --keep-comments: for GFF3 input/output, try to preserve comments
+## -B (see above)
+## -P
+##      --add-hasCDS : add a "hasCDS" attribute with value "true" for transcripts
+##           that have CDS features
+##      --adj-stop stop codon adjustment: enables -P and performs automatic
+##           adjustment of the CDS stop coordinate if premature or downstream
+##  --in-tlf: input GFF-like one-line-per-transcript format without exon/CDS
+##            features (see --tlf option below); automatic if the input
+##            filename ends with .tlf)
+##  --stream: fast processing of input GFF/BED transcripts as they are received
+##            ((no sorting, exons must be grouped by transcript in the input data)
+## Clustering
+## -Y
+## Output
+## --gene2exon
+## --t-adopt
+## -j
+## --w-add
+## --w-nocds
 ]]>
 </command>
 <inputs>
-<param name="input" type="data" format="gff3,gtf" label="Input GFF3 or GTF feature file"/>
+<param name="input" type="data" format="bed,gff3,gtf" label="Input BED, GFF3 or GTF feature file"/>
 <!-- filtering -->
 <param name="filtering" type="select" display="checkboxes" multiple="true" label="filters">
 <option value="-U">discard single-exon transcripts (-U)</option>
 <option value="-C">coding only: discard mRNAs that have no CDS feature (-C)</option>
 <option value="-G">only parse additional exon attributes from the first exon and move them to the mRNA level (useful for GTF input) (-G)</option>
 <option value="none">No</option>
 <option value="filter">Yes</option>
 </param>
 <when value="none"/>
 <when value="filter">
-<param name="range" type="text" value="" label="Only show transcripts overlapping coordinate range">
+<param argument="-r" name="range" type="text" value="" label="Only show transcripts overlapping coordinate range">
 <help><![CDATA[
-(-r [['strand']'chr':]'start'..'end') <br>
+[['strand']'chr':]'start'..'end' <br>
 examples: <br>
 1000..500000 <br>
 chr1:1000..500000 <br>
 +chr1:1000..500000 <br>
 -chr1:1000..500000
 ]]>
 </help>
 <validator type="regex">(([+-])?(\w+:))?\d+\.\.\d+</validator>
 </param>
-<param name="discard_partial" type="boolean" truevalue="-R" falsevalue="" checked="false"
+<param argument="-R" name="discard_partial" type="boolean" truevalue="-R" falsevalue="" checked="false"
-label="Discard all transcripts that are not fully contained within the given range" help="(-R)"/>
+label="Discard all transcripts that are not fully contained within the given range" help=""/>
 </when>
 </conditional>
-<param name="maxintron" type="integer" value="" optional="true" min="0" label="Filter out transcipts with large introns"
+<param argument="-i" name="maxintron" type="integer" value="" optional="true" min="0" label="Filter out transcipts with large introns"
-help="If set, discard transcripts having an intron larger (-i max_intron)"/>
+help="If set, discard transcripts having an intron larger"/>
-<param name="chr_replace" type="data" format="tabular" optional="true" label="Replace reference sequence names" >
+<param argument="-m" name="chr_replace" type="data" format="tabular" optional="true" label="Replace reference sequence names" >
-<help><![CDATA[(-m chr_replace) <br>
+<help><![CDATA[
 chr_replace is a reference sequence replacement table consisting of 2 columns: "original_ref_ID"  "new_ref_ID"<br>
 It is useful for switching between Ensembl and UCSC naming conventions <br>
 NOTE: GFF records on reference sequences that are not found among the "original_ref_ID" entries in this file will be filtered out
 ]]>
 </help>
 </param>
 -->
 <!-- merging -->
 <conditional name="merging">
-<param name="merge_sel" type="select" label="Transcript merging" help="(-M/--merge or --cluster-only)">
+<param name="merge_sel" type="select" label="Transcript merging" help="">
 <option value="none">none</option>
-<option value="merge">merge: cluster the input transcripts into loci, collapsing matching transcripts</option>
+<option value="merge">merge: cluster the input transcripts into loci, collapsing matching transcripts (--merge)</option>
-<option value="cluster">cluster-only: merge but without collapsing matching transcripts</option>
+<option value="cluster">cluster-only: merge but without collapsing matching transcripts (--cluster-only)</option>
 </param>
 <when value="none"/>
 <when value="merge">
 <param name="merge_cmd" type="hidden" value="--merge"/>
 <expand macro="merge_opt_sel" />
 </when>
 </conditional>
 <!-- reference sequence file -->
 <!-- Error: -g option is required for options -w, -x, -y, -V, -N, -M -->
 <conditional name="reference_genome">
-<param name="source" type="select" label="Reference Genome" help="(-g genome.fasta) NOTE: Required for fasta outputs">
+<param name="source" type="select" label="Reference Genome" help="NOTE: Required for fasta outputs">
 <option value="none">none</option>
 <option value="cached"></option>
 <option value="history">From your history</option>
 </param>
 <when value="none">
 </when>
 <when value="cached">
-<param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+<param argument="-g" name="fasta_indexes" type="select" label="Source FASTA Sequence">
 <options from_data_table="all_fasta"/>
 </param>
 <expand macro="ref_filtering_select" />
 <expand macro="fasta_output_select" />
 </when>
 <when value="history">
-<param name="genome_fasta" type="data" format="fasta" label="Genome Reference Fasta"/>
+<param argument="-g" name="genome_fasta" type="data" format="fasta" label="Genome Reference Fasta"/>
 <expand macro="ref_filtering_select" />
 <expand macro="fasta_output_select" />
 </when>
 </conditional>
 <conditional name="gffs">
 <param name="gff_fmt" type="select" label="Feature File Output" help="(-o output.gff3|output.gtf)">
 <option value="none">none</option>
 <option value="gff">GFF</option>
 <option value="gtf">GTF</option>
+<option value="bed">BED</option>
 </param>
 <when value="none">
 </when>
 <when value="gff">
-<param name="output_cmd" type="hidden" value="-o output.gff3"/>
+<param argument="-L" name="ensembl" type="boolean" truevalue="-L" falsevalue="" checked="false" label="Ensembl GTF to GFF3 conversion" help=""/>
-<param name="ensembl" type="boolean" truevalue="-L" falsevalue="" checked="false" label="Ensembl GTF to GFF3 conversion" help="(-L)"/>
 <expand macro="trackname" />
 </when>
 <when value="gtf">
-<param name="output_cmd" type="hidden" value="-T -o output.gtf"/>
 <expand macro="trackname" />
 </when>
+<when value="bed">
+</when>
 </conditional>
-<param name="full_gff_attribute_preservation" type="boolean" truevalue="-F" falsevalue="" checked="false"
+<param argument="-F" name="full_gff_attribute_preservation" type="boolean" truevalue="-F" falsevalue="" checked="false"
-label="full GFF attribute preservation (all attributes are shown)" help="(-F)"/>
+label="full GFF attribute preservation (all attributes are shown)" help=""/>
-<param name="decode_url" type="boolean" truevalue="-D" falsevalue="" checked="false"
+<param argument="-D" name="decode_url" type="boolean" truevalue="-D" falsevalue="" checked="false"
-label="decode url encoded characters within attributes" help="(-D)"/>
+label="decode url encoded characters within attributes" help=""/>
-<param name="expose" type="boolean" truevalue="-E" falsevalue="" checked="false"
+<param argument="-E" name="expose" type="boolean" truevalue="-E" falsevalue="" checked="false"
-label="warn about duplicate transcript IDs and other potential problems with the given GFF/GTF records" help="(-E)"/>
+label="warn about duplicate transcript IDs and other potential problems with the given GFF/GTF records" help=""/>
 </inputs>
 <outputs>
-<data name="output_gff" format="gff3" metadata_source="input" label="${tool.name} on ${on_string}: gff3" from_work_dir="output.gff3">
+<data name="output_gff" format="gff3" metadata_source="input" label="${tool.name} on ${on_string}: gff3" from_work_dir="output.gff">
 <filter>gffs['gff_fmt'] == 'gff'</filter>
 </data>
 <data name="output_gtf" format="gtf" metadata_source="input" label="${tool.name} on ${on_string}: gtf" from_work_dir="output.gtf">
 <filter>gffs['gff_fmt'] == 'gtf'</filter>
+</data>
+<data name="output_bed" format="bed" metadata_source="input" label="${tool.name} on ${on_string}: bed" from_work_dir="output.bed">
+<filter>gffs['gff_fmt'] == 'bed'</filter>
 </data>
 <data name="output_exons" format="fasta" label="${tool.name} on ${on_string}: exons.fa" from_work_dir="exons.fa">
 <filter>'fa_outputs' in reference_genome and str(reference_genome['fa_outputs']).find('exons.fa') > 0 </filter>
 </data>
 <data name="output_cds" format="fasta"  label="${tool.name} on ${on_string}: cds.fa" from_work_dir="cds.fa">
 <data name="output_dupinfo" format="txt" label="${tool.name} on ${on_string}: dupinfo" from_work_dir="dupinfo">
 <filter>'merge_options' in merging and merging['merge_options'].find('dupinfo') > 0</filter>
 </data>
 </outputs>
 <tests>
-<test>
+<test expect_num_outputs="1">
 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
 <param name="gff_fmt" value="gff"/>
-<output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" lines_diff="2" />
+<output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" lines_diff="4" />
 </test>
-<test>
+<test expect_num_outputs="1">
+<param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
+<param name="gff_fmt" value="gff"/>
+<output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" lines_diff="4" />
+</test>
+<test expect_num_outputs="1">
 <param name="input" ftype="gtf" value="ecoli-k12.gff3"/>
 <param name="gff_fmt" value="gff"/>
 <param name="full_gff_attribute_preservation" value="-F"/>
-<output name="output_gff" file="ecoli-k12.processed.gff3" ftype="gff3" lines_diff="2" />
+<output name="output_gff" file="ecoli-k12.processed.gff3" ftype="gff3" lines_diff="4" />
 </test>
-<test>
+<!-- bed output -->
-<param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
+<test expect_num_outputs="1">
-<param name="filtering" value="--no-pseudo"/>
+<param name="input" ftype="gff3" value="Homo_sapiens.GRCh37_19.71.gff3"/>
-<param name="gff_fmt" value="gtf"/>
+<param name="gff_fmt" value="bed"/>
-<output name="output_gtf">
+<output name="output_bed" ftype="bed">
 <assert_contents>
-<not_has_text text="pseudo" />
+<has_n_lines n="42"/>
-</assert_contents>
+<has_n_columns n="13"/>
-</output>
+</assert_contents>
-</test>
+</output>
-<test>
+</test>
+<!-- bed input and test tname -->
+<test expect_num_outputs="1">
+<param name="input" ftype="bed" value="Homo_sapiens.GRCh37_19.71.bed"/>
+<param name="gff_fmt" value="gff"/>
+<param name="tname" value="track name"/>
+<output name="output_bed" ftype="gff3">
+<assert_contents>
+<has_n_lines n="388"/>
+<!-- this will work with https://github.com/galaxyproject/galaxy/pull/12528 -->
+<!-- <has_n_columns n="9" comment="#"/> -->
+<has_text text="track name"/>
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="1">
 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
 <param name="region_filter" value="filter"/>
 <param name="range" value="19:496500..504965"/>
 <param name="gff_fmt" value="gtf"/>
 <output name="output_gtf">
 <has_text text="ENST00000587541" />
 <has_text text="ENST00000382683" />
 </assert_contents>
 </output>
 </test>
-<test>
+<test expect_num_outputs="1">
 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
 <param name="region_filter" value="filter"/>
 <param name="range" value="19:496500..504965"/>
 <param name="discard_partial" value="true"/>
 <param name="gff_fmt" value="gtf"/>
 <not_has_text text="ENST00000587541" />
 <has_text text="ENST00000382683" />
 </assert_contents>
 </output>
 </test>
-<test>
+<test expect_num_outputs="1">
 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
 <param name="filtering" value="-C"/>
 <param name="region_filter" value="filter"/>
 <param name="range" value="19:496500..504965"/>
 <param name="gff_fmt" value="gtf"/>
 <not_has_text text="ENST00000587541" />
 <has_text text="ENST00000382683" />
 </assert_contents>
 </output>
 </test>
-<test>
+<test expect_num_outputs="4">
 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
 <param name="source" value="history"/>
 <param name="genome_fasta" ftype="fasta" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa"/>
 <param name="fa_outputs" value="-w exons.fa,-x cds.fa,-y pep.fa"/>
 <param name="region_filter" value="filter"/>
 <has_text text="ENST00000346144" />
 <has_text text="MDFGLALLLAGLLGLLLGQSLQVKPLQVEPPEPVVAVALGASRQLTCRLACADRGASVQWRGLDTSLGAV" />
 </assert_contents>
 </output>
 </test>
+<test expect_num_outputs="1">
+<param name="input" ftype="gtf" value="stop_codons.gtf"/>
+<param name="source" value="history"/>
+<param name="genome_fasta" ftype="fasta" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa"/>
+<param name="fa_outputs" value="-y pep.fa,-S"/>
+<output name="output_pep">
+<assert_contents>
+<has_text text="ENST00000269812" />
+<has_text text="PLRGLHPRV*LQTPLERCPCWPPAGGTGGCPHCLLHLRLLQSPTPTALSEGGGAGTEAQPVTDVDPGRG*" />
+</assert_contents>
+</output>
+</test>
 </tests>
 <help>
 <![CDATA[
 **gffread   Filters and/or converts GFF3/GTF2 records**
 The gffread command is documented with the stringtie_ package.
 .. _stringtie: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread
+gffread v0.12.7. Usage: ::
-gffread v0.11.4. Usage: ::
+gffread [-g <genomic_seqs_fasta> | <dir>] [-s <seq_info.fsize>]
-gffread <input_gff> [-g <genomic_seqs_fasta> | <dir>][-s <seq_info.fsize>]
+[-o <outfile>] [-t <trackname>] [-r [<strand>]<chr>:<start>-<end> [-R]]
-[-o <outfile>] [-t <trackname>] [-r [[<strand>]<chr>:]<start>..<end> [-R]]
+[--jmatch <chr>:<start>-<end>] [--no-pseudo]
 [-CTVNJMKQAFPGUBHZWTOLE] [-w <exons.fa>] [-x <cds.fa>] [-y <tr_cds.fa>]
-[-i <maxintron>] [--bed] [--table <attrlist>] [--sort-by <refseq_list.txt>]
+[-j ][--ids <IDs.lst> | --nids <IDs.lst>] [--attrs <attr-list>] [-i <maxintron>]
+[--stream] [--bed | --gtf | --tlf] [--table <attrlist>] [--sort-by <ref.lst>]
+[<input_gff>]
 Filter, convert or cluster GFF/GTF/BED records, extract the sequence of
 transcripts (exon or CDS) and more.
 By default (i.e. without -O) only transcripts are processed, discarding any
 other non-transcript features. Default output is a simplified GFF3 with only
 the basic attributes.
-<input_gff> is a GFF file, use '-' for stdin
 Options:
+--ids discard records/transcripts if their IDs are not listed in <IDs.lst>
+--nids discard records/transcripts if their IDs are listed in <IDs.lst>
 -i   discard transcripts having an intron larger than <maxintron>
 -l   discard transcripts shorter than <minlen> bases
 -r   only show transcripts overlapping coordinate range <start>..<end>
 (on chromosome/contig <chr>, strand <strand> if provided)
 -R   for -r option, discard all transcripts that are not fully
 contained within the given range
+--jmatch only output transcripts matching the given junction
 -U   discard single-exon transcripts
 -C   coding only: discard mRNAs that have no CDS features
 --nc non-coding only: discard mRNAs that have CDS features
 --ignore-locus : discard locus features and attributes found in the input
 -A   use the description field from <seq_info.fsize> and add it
 as the value for a 'descr' attribute to the GFF record
 -s   <seq_info.fsize> is a tab-delimited file providing this info
 for each of the mapped sequences:
 <seq-name> <seq-length> <seq-description>
 (useful for -A option with mRNA/EST/protein mappings)
+Sorting: (by default, chromosomes are kept in the order they were found)
-Sorting: (by default, chromosomes are kept in the order they were found)
 --sort-alpha : chromosomes (reference sequences) are sorted alphabetically
 --sort-by : sort the reference sequences by the order in which their
 names are given in the <refseq.lst> file
 Misc options:
--F   preserve all GFF attributes (for non-exon features)
+-F   keep all GFF attributes (for non-exon features)
 --keep-exon-attrs : for -F option, do not attempt to reduce redundant
 exon/CDS attributes
 -G   do not keep exon attributes, move them to the transcript feature
 (for GFF3 output)
+--attrs <attr-list> only output the GTF/GFF attributes listed in <attr-list>
+which is a comma delimited list of attribute names to
 --keep-genes : in transcript-only mode (default), also preserve gene records
 --keep-comments: for GFF3 input/output, try to preserve comments
 -O   process other non-transcript GFF records (by default non-transcript
 records are ignored)
 -V   discard any mRNAs with CDS having in-frame stop codons (requires -g)
 --in-bed: input should be parsed as BED format (automatic if the input
 filename ends with .bed*)
 --in-tlf: input GFF-like one-line-per-transcript format without exon/CDS
 features (see --tlf option below); automatic if the input
 filename ends with .tlf)
+--stream: fast processing of input GFF/BED transcripts as they are received
+((no sorting, exons must be grouped by transcript in the input data)
 Clustering:
 -M/--merge : cluster the input transcripts into loci, discarding
-"duplicated" transcripts (those with the same exact introns
+"redundant" transcripts (those with the same exact introns
 and fully contained or equal boundaries)
 -d <dupinfo> : for -M option, write duplication info to file <dupinfo>
 --cluster-only: same as -M/--merge but without discarding any of the
 "duplicate" transcripts, only create "locus" features
 -K   for -M option: also discard as redundant the shorter, fully contained
 -Q   for -M option, no longer require boundary containment when assessing
 redundancy (can be combined with -K); only introns have to match for
 multi-exon transcripts, and >=80% overlap for single-exon transcripts
 -Y   for -M option, enforce -Q but also discard overlapping single-exon
 transcripts, even on the opposite strand (can be combined with -K)
 Output options:
 --force-exons: make sure that the lowest level GFF features are considered
 "exon" features
 --gene2exon: for single-line genes not parenting any transcripts, add an
 exon feature spanning the entire gene (treat it as a transcript)
 -D    decode url encoded characters within attributes
 -Z    merge very close exons into a single exon (when intron size<4)
 -g   full path to a multi-fasta file with the genomic sequences
 for all input mappings, OR a directory with single-fasta files
 (one per genomic sequence, with file names matching sequence names)
--w    write a fasta file with spliced exons for each GFF transcript
+-j    output the junctions and the corresponding transcripts
+-w    write a fasta file with spliced exons for each transcript
+--w-add <N> for the -w option, extract additional <N> bases
+both upstream and downstream of the transcript boundaries
+--w-nocds for -w, disable the output of CDS info in the FASTA file
 -x    write a fasta file with spliced CDS for each GFF transcript
 -y    write a protein fasta file with the translation of CDS for each record
--W    for -w and -x options, write in the FASTA defline the exon
+-W    for -w, -x and -y options, write in the FASTA defline all the exon
 coordinates projected onto the spliced sequence;
-for -y option, write transcript attributes in the FASTA defline
 -S    for -y option, use '*' instead of '.' as stop codon translation
--L    Ensembl GTF to GFF3 conversion (implies -F; should be used with -m)
+-L    Ensembl GTF to GFF3 conversion, adds version to IDs
 -m    <chr_replace> is a name mapping table for converting reference
 sequence names, having this 2-column format:
 <original_ref_ID> <new_ref_ID>
-WARNING: all GFF records on reference sequences whose original IDs
-are not found in the 1st column of this table will be discarded!
 -t    use <trackname> in the 2nd column of each GFF/GTF output line
--o    write the records into <outfile> instead of stdout
+-o    write the output records into <outfile> instead of stdout
 -T    main output will be GTF instead of GFF3
 --bed output records in BED format instead of default GFF3
 --tlf output "transcript line format" which is like GFF
-but exons, CDS features and related data are stored as GFF
+but with exons and CDS related features stored as GFF
 attributes in the transcript feature line, like this:
 exoncount=N;exons=<exons>;CDSphase=<N>;CDS=<CDScoords>
 <exons> is a comma-delimited list of exon_start-exon_end coordinates;
 <CDScoords> is CDS_start:CDS_end coordinates or a list like <exons>
 --table output a simple tab delimited format instead of GFF, with columns
 having the values of GFF attributes given in <attrlist>; special
 pseudo-attributes (prefixed by @) are recognized:
-@chr, @start, @end, @strand, @numexons, @exons, @cds, @covlen, @cdslen
+@id, @geneid, @chr, @start, @end, @strand, @numexons, @exons,
+@cds, @covlen, @cdslen
+If any of -w/-y/-x FASTA output files are enabled, the same fields
+(excluding @id) are appended to the definition line of corresponding
+FASTA records
 -v,-E expose (warn about) duplicate transcript IDs and other potential
 problems with the given GFF/GTF records
 ]]>
 </help>
 <citations>
 <citation type="doi">10.1038/nbt.1621</citation>
 </citations>

Mercurial > repos > devteam > gffread

comparison gffread.xml @ 7:9c298cab341d draft default tip