# HG changeset patch # User iuc # Date 1547245538 18000 # Node ID 1cdc7f1dd60521dc13d4701f5e113e6184618798 # Parent 3790ec5643b40fa93901718bd001cb2fb0a7afc7 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 62ed732cba355e695181924a8ed4cce49ca21c59 diff -r 3790ec5643b4 -r 1cdc7f1dd605 gemini_macros.xml --- a/gemini_macros.xml Mon Dec 17 11:58:07 2018 -0500 +++ b/gemini_macros.xml Fri Jan 11 17:25:38 2019 -0500 @@ -21,9 +21,17 @@ + + + + 10.1371/journal.pcbi.1003153 + + + + @@ -33,31 +41,36 @@ - - - - - - - + + + + + - - + + + - + - - - + + + + - - - + + + + + + + @@ -66,27 +79,23 @@ - + - + + + - - - - - - - - - - - - - + + + + @@ -100,10 +109,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + value.strip() + + + + + + + + + + value.strip() + + + + + + + + + + + + + + + + + value.strip() + + + not value or value.isdigit() + + + not value or value.isdigit() + + + + @@ -116,67 +205,50 @@ #end if - - #if str($filter.filter_selector) == 'yes' and $filter.filter: - --filter '${ str( $filter.filter ) }' + + #if str($report.report_selector) == 'full': + #set cols = "*" + #else: + #if $report.columns and str($report.columns) != '': + #set $cols = str($report.columns) + #else + #set $cols = '' + #end if + #if str($report.extra_cols).strip(): + #if $cols: + #set $cols = $cols + ', ' + str($report.extra_cols) + #else: + #set $cols = str($report.extra_cols) + #end if + #end if + #if not $cols: + #set $cols = "variant_id, gene" + #end if #end if - #if $report.report_selector != 'all': - --columns "${report.columns} - #if str($report.extra_cols).strip() - #echo ','+','.join(str($report.extra_cols).split()) - #end if - " + @SET_COLS@ + #if $cols != "*" + --columns '$cols' #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 10.1371/journal.pcbi.1003153 - - - - - - - - - - value is not None and value.metadata.gemini_version == "@VERSION@" - - - + = %d" % int($r.start)) + #end if + #if str($r.stop).strip(): + #silent $r_elements.append("end <= %d" % int($r.stop)) + #end if + #silent $region_elements.append("(%s)" % " AND ".join($r_elements)) + #end for + ]]> + diff -r 3790ec5643b4 -r 1cdc7f1dd605 gemini_query.xml --- a/gemini_query.xml Mon Dec 17 11:58:07 2018 -0500 +++ b/gemini_query.xml Fri Jan 11 17:25:38 2019 -0500 @@ -3,6 +3,30 @@ gemini_macros.xml query + + + + + + + + + + + + + + + + + @@ -10,91 +34,251 @@ 0: + --min-kindreds ${i.min_kindreds} + #end if + ${i.in} + #set $multiline_sql_expr = str($i.sample_filter) + #set $cmdln_param = "--sample-filter" + @MULTILN_SQL_EXPR_TO_CMDLN@ + #end for - $show_samples - $show_families - $family_wise - $header - $dgidb - #if $region.strip(): - --region "${region}" + #if str($query.oformat.report.format) == 'with_samples': + #set $sample_delim = str($query.oformat.report.sample_delim) or ',' + --show-samples --sample-delim '$sample_delim' + #elif str($query.oformat.report.format) == 'with_samples_flattened': + --show-samples --format sampledetail + #elif str($query.oformat.report.format) == 'with_families': + #set $sample_delim = str($query.oformat.report.sample_delim) or ',' + --show-families --sample-delim '$sample_delim' + #elif str($query.oformat.report.format) == 'carrier_summary': + --carrier-summary-by-phenotype + #if str($query.oformat.report.phenotype).strip(): + '${query.oformat.report.phenotype}' + #else: + affected + #end if + #else: + --format ${query.oformat.report.format} #end if - #if int($min_kindreds) > 0: - --min-kindreds $min_kindreds + + #if str($query.interface) == 'basic': + ## build the SQL query string from its components + #if str($query.oformat.report.format) in ('vcf', 'tped'): + #set $cols = "*" + #else: + #set $report = $query.oformat.report.report + @SET_COLS@ + #end if + #set $q = "SELECT %s FROM variants" % $cols + #set $where_clause_elements = [] + #if str($query.filter).strip(): + #silent $where_clause_elements.append(str($query.filter).strip()) + #end if + + #set $regions = $query.regions + @PARSE_REGION_ELEMENTS@ + #if $region_elements: + #silent $where_clause_elements.append(" OR ".join($region_elements)) + #end if + #if $where_clause_elements: + #set $q = $q + " WHERE " + " AND ".join($where_clause_elements) + #end if + #if str($query.oformat.report.order_by).strip(): + #set $q = $q + " ORDER BY " + str($query.oformat.report.order_by).strip() + str($query.oformat.report.sort_order) + #end if + #else + ## The user entered the SQL query string directly. + #set $q = str($query.q) #end if - ##--format FORMAT Format of output (JSON, TPED or default) # we will take default for the time being - ## --sample-delim STRING The delimiter to be used with the --show-samples option. #set $multiline_sql_expr = $q #set $cmdln_param = "-q" @MULTILN_SQL_EXPR_TO_CMDLN@ - "${ infile }" - > "${ outfile }" + '$infile' + > '$outfile' ]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + value.strip() + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
- + + + + + + - - + + + + @@ -106,10 +290,116 @@ `__ +in the GEMINI documentation. + +The tool supports regular genotype filters like:: + + gt.sample1 == HET and gt_depths.sample1 >= 15 + +, which would keep only variants for which sample 1 is a heterozygous carrier +and if the genomic position in sample1 is covered by at least 15 sequencing +reads, as well as GEMINI wildcard filters of the general form +*(COLUMN).(SAMPLE_FILTER).(RULE).(RULE_ENFORCEMENT)* like:: + + (gt_types).(phenotype==2).(!=HOM_REF).(all) + +, which keeps only variants for which all phenotypic samples are homozygous. + +*Sample filters* + +Sample filters have the same format as the second component of the genotype +wildcard filters above, so:: + + phenotype == 2 + +would filter for phenotypically affected samples. In this case, however, the +filter determines, from which samples variants should be reported, i.e., here, +only variants found in phenotypically affected samples become analyzed. You can +use the ``--in`` filter to adjust the exact meaning of the sample filter. + +*Region filters* + +They let you restrict your analysis to parts of the genome, which can be useful +if you have prior knowledge of the approximate location of a variant of +interest. + +If you specify more then one region filter, they get combined with a logical +*OR*, meaning variants and genes falling in *any* of the regions are reported. -http://gemini.readthedocs.org/en/latest/content/querying.html +*Additional constraints on variants* + +These get translated directly into the WHERE clause of an SQL query and, thus, +have to be expressed in valid SQL syntax. As an example you could use:: + + is_exonic = 1 and impact_severity != 'LOW' + +to indicate that you are only interested in exonic variants that are not of +*LOW* impact severity, *i.e.*, not silent mutations. + +Note that in SQL syntax tests for equality use a single ``=``, while genotype +filters (discussed above) are following Python syntax and use ``==`` for the +same purpose. Also note that non-numerical values need to be enclosed in +single-quotes, *e.g.* ``'LOW'``, but numerical values must *NOT* be. + +----- + +*Building your query with the Advanced query constructor* + +For the sake of simplicity, the basic mode of the tool limits your queries to +the variants table of the underlying database. While this still allows many +useful queries to be formulated, it prevents you from joining information from +other tables (in particular, the gene_detailed table) or to query a different +table directly. + +In advanced mode, you take responsibility for formulating the complete SQL +query in correct syntax, which allows you to do anything you could do with the +command line tool. Beyond querying other tables, this includes changing output +column names, deriving simple statistics on columns using the SQL Min, Max, +Count, Avg and Sum functions, and more. + +The price you pay for this extra flexibility is that you will have to make sure +that any other tool options you set are compatible with the result of your +particular query. For example, most output formats except the tabular default +output of GEMINI are incompatible with non-standard queries. Choosing +non-compatible options can result in them getting ignored silently, but also +in tool errors, or in problems with downstream tools. + +The chapter `Querying the GEMINI database +`__ of the +GEMINI documentation can get you started with formulating your own queries. + +Note that genotype filters and sample filters cannot be expressed as genuine +SQL queries, so even the Advanced query constructor is offering them. Region +filters and sort order of rows and columns on the other hand can be controlled +through SQL queries, like in this example:: + + SELECT gene, chrom, start, end, ref, alt FROM variants WHERE chrom = 'chr1' + AND start >= 10000000 and stop <= 20000000 and is_lof = 1 ORDER BY chrom, + start + +, which would report all loss-of-function variants between 10,000,000 and +20,000,000 on chr1 and report the selected columns sorted on chromosome, then +position. + ]]> diff -r 3790ec5643b4 -r 1cdc7f1dd605 repository_dependencies.xml --- a/repository_dependencies.xml Mon Dec 17 11:58:07 2018 -0500 +++ b/repository_dependencies.xml Fri Jan 11 17:25:38 2019 -0500 @@ -1,4 +1,4 @@ - + \ No newline at end of file