Mercurial > repos > iuc > split_libraries_fastq
comparison generate_test_data.sh.orig @ 1:4ed38a03aac6 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit ea40e3d84e7850eb4226d6c85f709dcad18d4ba9
| author | iuc |
|---|---|
| date | Fri, 19 May 2017 03:19:56 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:cdd2c0176c38 | 1:4ed38a03aac6 |
|---|---|
| 1 #!/usr/bin/env bash | |
| 2 | |
| 3 # validate_mapping_file | |
| 4 validate_mapping_file.py \ | |
| 5 -m 'test-data/validate_mapping_file/map.tsv' \ | |
| 6 -o validate_mapping_file_output \ | |
| 7 -c '_' | |
| 8 cp validate_mapping_file_output/*.html 'test-data/validate_mapping_file/map.tsv.html' | |
| 9 cp validate_mapping_file_output/*.log 'test-data/validate_mapping_file/map.tsv.log' | |
| 10 cp validate_mapping_file_output/*corrected.txt 'test-data/validate_mapping_file/map.tsv_corrected.txt' | |
| 11 rm -rf validate_mapping_file_output | |
| 12 | |
| 13 # split_libraries_fastq | |
| 14 split_libraries_fastq.py \ | |
| 15 --sequence_read_fps 'test-data/split_libraries_fastq/forward_reads.fastq' \ | |
| 16 -o split_libraries \ | |
| 17 --mapping_fps 'test-data/map.tsv' \ | |
| 18 --barcode_read_fps 'test-data/split_libraries_fastq/barcodes.fastq' \ | |
| 19 --store_qual_scores \ | |
| 20 --store_demultiplexed_fastq \ | |
| 21 --max_bad_run_length 3 \ | |
| 22 --min_per_read_length_fraction 0.75 \ | |
| 23 --sequence_max_n 0 \ | |
| 24 --start_seq_id 0 \ | |
| 25 --barcode_type 'golay_12' \ | |
| 26 --max_barcode_errors 1.5 | |
| 27 cp split_libraries/histograms.txt 'test-data/split_libraries_fastq/histograms.tabular' | |
| 28 cp split_libraries/seqs.fna 'test-data/split_libraries_fastq/sequences.fasta' | |
| 29 cp split_libraries/seqs.qual 'test-data/split_libraries_fastq/sequence_qualities.qual' | |
| 30 cp split_libraries/seqs.fastq 'test-data/split_libraries_fastq/demultiplexed_sequences.fastq' | |
| 31 rm -rf split_libraries | |
| 32 | |
| 33 # split_libraries | |
| 34 split_libraries.py \ | |
| 35 --map 'test-data/split_libraries/mapping_file.txt' \ | |
| 36 -o split_libraries \ | |
| 37 --fasta 'test-data/split_libraries/reads_1.fna,test-data/split_libraries/reads_2.fna' \ | |
| 38 --qual 'test-data/split_libraries/reads_1.qual,test-data/split_libraries/reads_2.qual' \ | |
| 39 --min_qual_score 25 \ | |
| 40 --qual_score_window 0 \ | |
| 41 --record_qual_scores \ | |
| 42 --min_seq_length 200 \ | |
| 43 --max_seq_length 1000 \ | |
| 44 --max_ambig 6 \ | |
| 45 --max_homopolymer 6 \ | |
| 46 --max_primer_mismatch 0 \ | |
| 47 --barcode_type 'golay_12' \ | |
| 48 --max_barcode_errors 1.5 \ | |
| 49 --start_numbering_at 1 | |
| 50 cp split_libraries/seqs.fna 'test-data/split_libraries/seqs.fna' | |
| 51 cp split_libraries/split_library_log.txt 'test-data/split_libraries/split_library_log' | |
| 52 cp split_libraries/histograms.txt 'test-data/split_libraries/histograms.txt' | |
| 53 cp split_libraries/seqs_filtered.qual 'test-data/split_libraries/seqs_filtered.qual' | |
| 54 rm -rf split_libraries | |
| 55 | |
| 56 # pick_open_reference_otus | |
| 57 pick_open_reference_otus.py \ | |
| 58 --input_fps 'test-data/pick_open_reference_otus/sequences.fasta' \ | |
| 59 -o pick_open_reference_otus_1 \ | |
| 60 --reference_fp 'test-data/gg_13_8_79_otus.fasta' \ | |
| 61 --otu_picking_method 'uclust' \ | |
| 62 --new_ref_set_id 'New' \ | |
| 63 --parallel \ | |
| 64 --percent_subsample '0.001' \ | |
| 65 --prefilter_percent_id '0.0' \ | |
| 66 --minimum_failure_threshold '100000' \ | |
| 67 --min_otu_size '2' | |
| 68 cp pick_open_reference_otus_1/final_otu_map.txt 'test-data/pick_open_reference_otus/1_final_otu_map.txt' | |
| 69 cp pick_open_reference_otus_1/final_otu_map_mc*.txt 'test-data/pick_open_reference_otus/1_final_otu_map_mc.txt' | |
| 70 cp pick_open_reference_otus_1/rep_set.tre 'test-data/pick_open_reference_otus/1_rep_set_tree.tre' | |
| 71 rm -rf pick_open_reference_otus_1 | |
| 72 | |
| 73 pick_open_reference_otus.py \ | |
| 74 --input_fps 'test-data/pick_open_reference_otus/sequences.fasta' \ | |
| 75 -o pick_open_reference_otus_2 \ | |
| 76 --reference_fp 'test-data/gg_13_8_79_otus.fasta' \ | |
| 77 --otu_picking_method 'uclust' \ | |
| 78 --new_ref_set_id 'New' \ | |
| 79 --parallel \ | |
| 80 --percent_subsample '0.001' \ | |
| 81 --prefilter_percent_id '0.0' \ | |
| 82 --minimum_failure_threshold '100000' \ | |
| 83 --min_otu_size '3' \ | |
| 84 --suppress_taxonomy_assignment \ | |
| 85 --suppress_align_and_tree | |
| 86 cp pick_open_reference_otus_2/final_otu_map.txt 'test-data/pick_open_reference_otus/2_final_otu_map.txt' | |
| 87 cp pick_open_reference_otus_2/final_otu_map_mc*.txt 'test-data/pick_open_reference_otus/2_final_otu_map_mc.txt' | |
| 88 rm -rf pick_open_reference_otus_2 | |
| 89 | |
| 90 pick_open_reference_otus.py \ | |
| 91 --input_fps 'test-data/pick_open_reference_otus/sequences.fasta' \ | |
| 92 -o pick_open_reference_otus_3 \ | |
| 93 --reference_fp 'test-data/gg_13_8_79_otus.fasta' \ | |
| 94 --otu_picking_method 'uclust' \ | |
| 95 --new_ref_set_id 'New' \ | |
| 96 --parallel \ | |
| 97 --percent_subsample '0.001' \ | |
| 98 --prefilter_percent_id '0.0' \ | |
| 99 --minimum_failure_threshold '100000' \ | |
| 100 --min_otu_size '10' \ | |
| 101 --suppress_taxonomy_assignment | |
| 102 cp pick_open_reference_otus_3/final_otu_map.txt 'test-data/pick_open_reference_otus/3_final_otu_map.txt' | |
| 103 cp pick_open_reference_otus_3/final_otu_map_mc*.txt 'test-data/pick_open_reference_otus/3_final_otu_map_mc.txt' | |
| 104 cp pick_open_reference_otus_3/rep_set.tre 'test-data/pick_open_reference_otus/3_rep_set_tree.tre' | |
| 105 rm -rf pick_open_reference_otus_3 | |
| 106 | |
| 107 # core_diversity_analyses | |
| 108 # Data are from test data in https://github.com/biocore/qiime | |
| 109 core_diversity_analyses.py \ | |
| 110 --input_biom_fp 'test-data/core_diversity_analyses/otu_table.biom' \ | |
| 111 -o core_diversity_analyses_1 \ | |
| 112 --mapping_fp 'test-data/core_diversity_analyses/map.txt' \ | |
| 113 --sampling_depth 22 \ | |
| 114 --tree_fp 'test-data/core_diversity_analyses/rep_set.tre' | |
| 115 cp core_diversity_analyses_1/bdiv_even22/unweighted_unifrac_pc.txt 'test-data/core_diversity_analyses/unweighted_unifrac_pc.txt' | |
| 116 rm -rf core_diversity_analyses_1 | |
| 117 | |
| 118 core_diversity_analyses.py \ | |
| 119 --input_biom_fp 'test-data/core_diversity_analyses/otu_table.biom' \ | |
| 120 -o core_diversity_analyses_2 \ | |
| 121 --mapping_fp 'test-data/core_diversity_analyses/map.txt' \ | |
| 122 --sampling_depth 22 \ | |
| 123 --nonphylogenetic_diversity \ | |
| 124 --suppress_taxa_summary \ | |
| 125 --suppress_beta_diversity \ | |
| 126 --suppress_alpha_diversity \ | |
| 127 --suppress_group_significance | |
| 128 rm -rf core_diversity_analyses_2 | |
| 129 | |
| 130 # summarize_taxa | |
| 131 cp 'test-data/core_diversity_analyses/otu_table.biom' 'test-data/summarize_taxa/otu_table.biom' | |
| 132 cp 'test-data/core_diversity_analyses/map.txt' 'test-data/summarize_taxa/map.txt' | |
| 133 | |
| 134 summarize_taxa.py \ | |
| 135 -i 'test-data/summarize_taxa/otu_table.biom' \ | |
| 136 -o summarize_taxa_1 \ | |
| 137 -L '2,3,4,5,6' \ | |
| 138 -m 'test-data/summarize_taxa/map.txt' \ | |
| 139 --md_identifier "taxonomy" \ | |
| 140 --delimiter ";" | |
| 141 cp summarize_taxa_1/*_L2.txt "test-data/summarize_taxa/1_L2.txt" | |
| 142 cp summarize_taxa_1/*_L3.txt "test-data/summarize_taxa/1_L3.txt" | |
| 143 cp summarize_taxa_1/*_L4.txt "test-data/summarize_taxa/1_L4.txt" | |
| 144 cp summarize_taxa_1/*_L5.txt "test-data/summarize_taxa/1_L5.txt" | |
| 145 cp summarize_taxa_1/*_L6.txt "test-data/summarize_taxa/1_L6.txt" | |
| 146 rm -rf summarize_taxa_1 | |
| 147 | |
| 148 summarize_taxa.py \ | |
| 149 -i 'test-data/summarize_taxa/otu_table.biom' \ | |
| 150 -o summarize_taxa_2 \ | |
| 151 -L '3,6' \ | |
| 152 --md_identifier "taxonomy" \ | |
| 153 --delimiter ";" | |
| 154 cp summarize_taxa_2/*_L3.txt "test-data/summarize_taxa/2_L3.txt" | |
| 155 cp summarize_taxa_2/*_L6.txt "test-data/summarize_taxa/2_L6.txt" | |
| 156 rm -rf summarize_taxa_2 | |
| 157 | |
| 158 # make_emperor | |
| 159 cp 'test-data/core_diversity_analyses/unweighted_unifrac_pc.txt' 'test-data/make_emperor/unweighted_unifrac_pc.txt' | |
| 160 cp 'test-data/core_diversity_analyses/map.txt' 'test-data/make_emperor/map.txt' | |
| 161 cp 'test-data/summarize_taxa/2_L3.txt' 'test-data/make_emperor/2_L3.txt' | |
| 162 | |
| 163 make_emperor.py \ | |
| 164 --input_coords 'test-data/make_emperor/unweighted_unifrac_pc.txt' \ | |
| 165 -o make_emperor_1 \ | |
| 166 --map_fp 'test-data/make_emperor/map.txt' \ | |
| 167 --number_of_axes '10' \ | |
| 168 --add_unique_columns \ | |
| 169 --number_of_segments 8 | |
| 170 rm -rf make_emperor_1 | |
| 171 | |
| 172 make_emperor.py \ | |
| 173 --input_coords 'test-data/make_emperor/unweighted_unifrac_pc.txt' \ | |
| 174 -o make_emperor_2 \ | |
| 175 --map_fp 'test-data/make_emperor/map.txt' \ | |
| 176 --number_of_axes '10' \ | |
| 177 --add_unique_columns \ | |
| 178 --number_of_segments 8 \ | |
| 179 --taxa_fp 'test-data/make_emperor/2_L3.txt' \ | |
| 180 --n_taxa_to_keep 10 | |
| 181 rm -rf make_emperor_2 | |
| 182 | |
| 183 #alpha_rarefaction | |
| 184 alpha_rarefaction.py \ | |
| 185 --otu_table_fp "test-data/alpha_rarefaction/otu_table.biom" \ | |
| 186 --mapping_fp "test-data/alpha_rarefaction/mapping_file.txt" \ | |
| 187 -o alpha_rarefaction \ | |
| 188 --num_steps '2' \ | |
| 189 --tree_fp "test-data/alpha_rarefaction/rep_set.tre" \ | |
| 190 --min_rare_depth '10' \ | |
| 191 --max_rare_depth '50' \ | |
| 192 --retain_intermediate_files | |
| 193 rm -rf alpha_rarefaction | |
| 194 | |
| 195 ##beta_diversity | |
| 196 beta_diversity.py \ | |
| 197 --input_path 'test-data/beta_diversity/otu_table.biom' \ | |
| 198 -o beta_diversity_1 \ | |
| 199 --metrics 'unweighted_unifrac,weighted_unifrac' \ | |
| 200 --tree_path 'test-data/beta_diversity/rep_set.tre' | |
| 201 md5 'beta_diversity_1/unweighted_unifrac_otu_table.txt' | |
| 202 md5 'beta_diversity_1/weighted_unifrac_otu_table.txt' | |
| 203 rm -rf beta_diversity_1 | |
| 204 | |
| 205 beta_diversity.py \ | |
| 206 --input_path 'test-data/beta_diversity/otu_table.biom' \ | |
| 207 -o beta_diversity_2 \ | |
| 208 --metrics 'abund_jaccard,binary_chisq,binary_chord,binary_euclidean,binary_hamming,binary_jaccard,binary_lennon,binary_ochiai,binary_pearson,binary_sorensen_dice,bray_curtis,canberra,chisq,chord,euclidean,gower,hellinger,kulczynski,manhattan,morisita_horn,pearson,soergel,spearman_approx,specprof,unifrac_g,unifrac_g_full_tree,unweighted_unifrac,unweighted_unifrac_full_tree,weighted_normalized_unifrac,weighted_unifrac' \ | |
| 209 --tree_path 'test-data/beta_diversity/rep_set.tre' | |
| 210 md5 'beta_diversity_2/canberra_otu_table.txt' | |
| 211 md5 'beta_diversity_2/pearson_otu_table.txt' | |
| 212 rm -rf beta_diversity_2 | |
| 213 | |
| 214 # jackknifed_beta_diversity | |
| 215 jackknifed_beta_diversity.py \ | |
| 216 --otu_table_fp 'test-data/jackknifed_beta_diversity/otu_table.biom' \ | |
| 217 --mapping_fp 'test-data/jackknifed_beta_diversity/map.txt' \ | |
| 218 -o jackknifed_beta_diversity \ | |
| 219 --seqs_per_sample '10' \ | |
| 220 --tree_fp 'test-data/jackknifed_beta_diversity/rep_set.tre' \ | |
| 221 --master_tree 'consensus' \ | |
| 222 --parallel | |
| 223 rm -rf jackknifed_beta_diversity | |
| 224 | |
| 225 #beta_diversity_through_plots | |
| 226 beta_diversity_through_plots.py \ | |
| 227 --otu_table_fp 'test-data/beta_diversity_through_plots/otu_table.biom' \ | |
| 228 --mapping_fp 'test-data/beta_diversity_through_plots/map.txt' \ | |
| 229 --output_dir beta_diversity_through_plots \ | |
| 230 --tree_fp 'test-data/beta_diversity_through_plots/rep_set.tre' \ | |
| 231 --parallel | |
| 232 cp beta_diversity_through_plots/unweighted_unifrac_dm.txt 'test-data/beta_diversity_through_plots/' | |
| 233 cp beta_diversity_through_plots/unweighted_unifrac_pc.txt 'test-data/beta_diversity_through_plots/' | |
| 234 cp beta_diversity_through_plots/weighted_unifrac_dm.txt 'test-data/beta_diversity_through_plots/' | |
| 235 cp beta_diversity_through_plots/weighted_unifrac_pc.txt 'test-data/beta_diversity_through_plots/' | |
| 236 rm -rf beta_diversity_through_plots | |
| 237 | |
| 238 # assign_taxonomy | |
| 239 assign_taxonomy.py \ | |
| 240 --input_fasta_fp 'test-data/assign_taxonomy/uclust_input_seqs.fasta' \ | |
| 241 --assignment_method 'uclust' \ | |
| 242 --min_consensus_fraction '0.51' \ | |
| 243 --similarity '0.9' \ | |
| 244 --uclust_max_accepts '3' \ | |
| 245 -o assign_taxonomy_uclust | |
| 246 cp assign_taxonomy_uclust/uclust_input_seqs_tax_assignments.txt 'test-data/assign_taxonomy/uclust_taxonomic_assignation.txt' | |
| 247 rm -rf assign_taxonomy_uclust | |
| 248 | |
| 249 #assign_taxonomy.py \ | |
| 250 # --input_fasta_fp 'test-data/assign_taxonomy/rdp_input_seqs.fasta' \ | |
| 251 # --id_to_taxonomy_fp 'test-data/assign_taxonomy/rdp_id_to_taxonomy.txt' \ | |
| 252 # --assignment_method 'rdp' \ | |
| 253 # --confidence '3' \ | |
| 254 # -o assign_taxonomy_rdp | |
| 255 | |
| 256 #assign_taxonomy.py \ | |
| 257 # --input_fasta_fp 'test-data/assign_taxonomy/rtax_ref_seq_set.fna' \ | |
| 258 # --id_to_taxonomy_fp 'test-data/assign_taxonomy/rtax_id_to_taxonomy.txt' \ | |
| 259 # --assignment_method 'rtax' \ | |
| 260 # --read_1_seqs_fp 'test-data/assign_taxonomy/read_1.seqs.fna' \ | |
| 261 # --read_2_seqs_fp 'test-data/assign_taxonomy/read_2.seqs.fna' \ | |
| 262 # --single_ok \ | |
| 263 # --no_single_ok_generic \ | |
| 264 # --read_id_regex "\S+\s+(\S+)" \ | |
| 265 # --amplicon_id_regex "(\S+)\s+(\S+?)\/" \ | |
| 266 # --header_id_regex "\S+\s+(\S+?)\/" \ | |
| 267 # -o assign_taxonomy_rtax | |
| 268 #ls assign_taxonomy_rtax | |
| 269 | |
| 270 #assign_taxonomy.py \ | |
| 271 # --input_fasta_fp 'test-data/assign_taxonomy/mothur_ref_seq_set.fna' \ | |
| 272 # --id_to_taxonomy_fp 'test-data/assign_taxonomy/mothur_id_to_taxonomy.txt' \ | |
| 273 # --assignment_method 'mothur' \ | |
| 274 # --confidence 0.5 \ | |
| 275 # -o assign_taxonomy_mothur | |
| 276 #ls assign_taxonomy_mothur | |
| 277 | |
| 278 assign_taxonomy.py \ | |
| 279 --input_fasta_fp 'test-data/assign_taxonomy/mothur_ref_seq_set.fna' \ | |
| 280 --assignment_method 'sortmerna' \ | |
| 281 --min_consensus_fraction "0.51" \ | |
| 282 --similarity "0.9" \ | |
| 283 --sortmerna_e_value "1.0" \ | |
| 284 --sortmerna_coverage "0.9" \ | |
| 285 --sortmerna_best_N_alignments "5" \ | |
| 286 -o assign_taxonomy_sortmerna | |
| 287 cp assign_taxonomy_sortmerna/sortmerna_map.blast 'test-data/assign_taxonomy/sortmerna_map.blast' | |
| 288 cp assign_taxonomy_sortmerna/mothur_ref_seq_set_tax_assignments.txt 'test-data/assign_taxonomy/sortmerna_taxonomic_assignation.txt' | |
| 289 rm -rf assign_taxonomy_sortmerna | |
| 290 <<<<<<< HEAD | |
| 291 ======= | |
| 292 | |
| 293 # filter_otus_from_otu_table | |
| 294 filter_otus_from_otu_table.py \ | |
| 295 --input_fp 'test-data/filter_otus_from_otu_table/otu_table.biom' \ | |
| 296 --min_count '2' \ | |
| 297 --max_count '1000' \ | |
| 298 --min_samples '5' \ | |
| 299 --max_samples '350' \ | |
| 300 --output_fp 'test-data/filter_otus_from_otu_table/filtered_otu_table.biom' | |
| 301 | |
| 302 filter_otus_from_otu_table.py \ | |
| 303 --input_fp 'test-data/filter_otus_from_otu_table/otu_table.biom' \ | |
| 304 --otu_ids_to_exclude_fp 'test-data/filter_otus_from_otu_table/chimeric_otus.txt' \ | |
| 305 --output_fp 'test-data/filter_otus_from_otu_table/chimera_filtered_otu_table.biom' | |
| 306 | |
| 307 filter_otus_from_otu_table.py \ | |
| 308 --input_fp 'test-data/filter_otus_from_otu_table/otu_table.biom' \ | |
| 309 --otu_ids_to_exclude_fp 'test-data/filter_otus_from_otu_table/chimeric_otus.txt' \ | |
| 310 --negate_ids_to_exclude \ | |
| 311 --output_fp 'test-data/filter_otus_from_otu_table/chimera_picked_otu_table.biom' | |
| 312 >>>>>>> Update of the Qiime_core generate_test_data script |
