# HG changeset patch # User greg # Date 1452781491 18000 # Node ID 311febbd33d66d50bafdde4a81744c8db7827dbb # Parent cff5b7c9be55f5b61a8a3f178a6f14ec88caf4da Uploaded diff -r cff5b7c9be55 -r 311febbd33d6 extract_genomic_dna.py --- a/extract_genomic_dna.py Thu Jan 14 07:55:22 2016 -0500 +++ b/extract_genomic_dna.py Thu Jan 14 09:24:51 2016 -0500 @@ -68,7 +68,7 @@ parser = argparse.ArgumentParser() parser.add_option('--input_format', dest='input_format', help="Input dataset format") parser.add_option('--input', dest='input', help="Input dataset") -parser.add_option('--dbkey', dest='dbkey', help="Input dataset genome build") +parser.add_option('--genome', dest='genome', help="Input dataset genome build") parser.add_option('--interpret_features', dest='interpret_features', default=None, help="Interpret features if input format is gff") parser.add_option('--columns', dest='columns', help="Columns to use in input file") parser.add_option('--reference_genome_source', dest='reference_genome_source', help="Source of reference genome file") @@ -78,7 +78,7 @@ args = parser.parse_args() input_is_gff = args.input_format == 'gff' -interpret_features = args.interpret_features == "yes" +interpret_features = input_is_gff and args.interpret_features == "yes" if len(args.cols.split(',')) == 5: # Bed file. chrom_col, start_col, end_col, strand_col, name_col = parse_cols_arg(args.cols) @@ -166,14 +166,14 @@ try: sequence = nib.get(start, end - start) except Exception, e: - warning = "Unable to fetch the sequence from '%d' to '%d' for build '%s'. " % (start, end - start, args.dbkey) + warning = "Unable to fetch the sequence from '%d' to '%d' for build '%s'. " % (start, end - start, args.genome) warnings.append(warning) if not invalid_lines: invalid_lines = get_lines(feature) first_invalid_line = line_count skipped_lines += len(invalid_lines) continue - elif os.path.isfile(os.path.join(seq_dir, '%s.2bit' % args.dbkey)): + elif os.path.isfile(os.path.join(seq_dir, '%s.2bit' % args.genome)): if not(twobitfile): twobitfile = bx.seq.twobit.TwoBitFile(file(seq_path)) try: @@ -193,7 +193,7 @@ skipped_lines += len(invalid_lines) continue else: - warning = "Chromosome by name '%s' was not found for build '%s'. " % (chrom, args.dbkey) + warning = "Chromosome by name '%s' was not found for build '%s'. " % (chrom, args.genome) warnings.append(warning) if not invalid_lines: invalid_lines = get_lines(feature) @@ -201,7 +201,7 @@ skipped_lines += len(invalid_lines) continue if sequence == '': - warning = "Chrom: '%s', start: '%s', end: '%s' is either invalid or not present in build '%s'. " % (chrom, start, end, args.dbkey) + warning = "Chrom: '%s', start: '%d', end: '%d' is either invalid or not present in build '%s'. " % (chrom, start, end, args.genome) warnings.append(warning) if not invalid_lines: invalid_lines = get_lines(feature) @@ -215,7 +215,7 @@ c = 0 if input_is_gff: start, end = gff_util.convert_bed_coords_to_gff([start, end]) - fields = [args.dbkey, str(chrom), str(start), str(end), strand] + fields = [args.genome, str(chrom), str(start), str(end), strand] meta_data = "_".join(fields) if name.strip(): out.write(">%s %s\n" % (meta_data, name)) diff -r cff5b7c9be55 -r 311febbd33d6 extract_genomic_dna.xml --- a/extract_genomic_dna.xml Thu Jan 14 07:55:22 2016 -0500 +++ b/extract_genomic_dna.xml Thu Jan 14 09:24:51 2016 -0500 @@ -1,18 +1,20 @@ using coordinates from assembled/unassembled genomes + + faToTwoBit + - - - - - - - - - - - - - - - - - - - - + + + + + + + @@ -57,8 +46,8 @@ - - + + @@ -66,7 +55,7 @@ - + diff -r cff5b7c9be55 -r 311febbd33d6 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Jan 14 09:24:51 2016 -0500 @@ -0,0 +1,6 @@ + + + dbkey, value + +

+ diff -r cff5b7c9be55 -r 311febbd33d6 twobit.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/twobit.loc.sample Thu Jan 14 09:24:51 2016 -0500 @@ -0,0 +1,26 @@ +#This is a sample file distributed with Galaxy that is used by some +#tools. The twobit.loc file has this format (white space characters +#are TAB characters): +# +# +# +#So, for example, if you had droPer1 twobit files stored in +#/depot/data2/galaxy/droPer1/, then the twobit.loc entry +#would look like this: +# +#droPer1 /depot/data2/galaxy/droPer1/droPer1.2bit +# +#and your /depot/data2/galaxy/droPer1/ directory would +#contain all of your twobit files (e.g.): +# +#-rw-rw-r-- 1 nate galaxy 48972650 2007-05-04 11:27 droPer1.2bit +#...etc... +# +#Your twobit.loc file should include an entry per line for each twobit +#file you have stored. For example: +# +#droPer1 /depot/data2/galaxy/droPer1/droPer1.2bit +#apiMel2 /depot/data2/galaxy/apiMel2/apiMel2.2bit +#droAna1 /depot/data2/galaxy/droAna1/droAna1.2bit +#droAna2 /depot/data2/galaxy/droAna2/droAna2.2bit +#...etc...