Mercurial > repos > pjbriggs > amplicon_analysis_pipeline
comparison install_amplicon_analysis.sh @ 39:d19fca1c009c draft
planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit c3970ae16b02774148dbf950880f07255734a9a8-dirty
| author | pjbriggs |
|---|---|
| date | Wed, 17 Oct 2018 08:14:27 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 38:7b2c40450792 | 39:d19fca1c009c |
|---|---|
| 1 #!/bin/sh -e | |
| 2 # | |
| 3 # Prototype script to setup a conda environment with the | |
| 4 # dependencies needed for the Amplicon_analysis_pipeline | |
| 5 # script | |
| 6 # | |
| 7 # Handle command line | |
| 8 usage() | |
| 9 { | |
| 10 echo "Usage: $(basename $0) [DIR]" | |
| 11 echo "" | |
| 12 echo "Installs the Amplicon_analysis_pipeline package plus" | |
| 13 echo "dependencies in directory DIR (or current directory " | |
| 14 echo "if DIR not supplied)" | |
| 15 } | |
| 16 if [ ! -z "$1" ] ; then | |
| 17 # Check if help was requested | |
| 18 case "$1" in | |
| 19 --help|-h) | |
| 20 usage | |
| 21 exit 0 | |
| 22 ;; | |
| 23 esac | |
| 24 # Assume it's the installation directory | |
| 25 cd $1 | |
| 26 fi | |
| 27 # Versions | |
| 28 PIPELINE_VERSION=1.2.3 | |
| 29 RDP_CLASSIFIER_VERSION=2.2 | |
| 30 # Directories | |
| 31 TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION} | |
| 32 BIN_DIR=${TOP_DIR}/bin | |
| 33 CONDA_DIR=${TOP_DIR}/conda | |
| 34 CONDA_BIN=${CONDA_DIR}/bin | |
| 35 CONDA_LIB=${CONDA_DIR}/lib | |
| 36 CONDA=${CONDA_BIN}/conda | |
| 37 ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}" | |
| 38 ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME | |
| 39 # | |
| 40 # Functions | |
| 41 # | |
| 42 # Report failure and terminate script | |
| 43 fail() | |
| 44 { | |
| 45 echo "" | |
| 46 echo ERROR $@ >&2 | |
| 47 echo "" | |
| 48 echo "$(basename $0): installation failed" | |
| 49 exit 1 | |
| 50 } | |
| 51 # | |
| 52 # Rewrite the shebangs in the installed conda scripts | |
| 53 # to remove the full path to conda 'bin' directory | |
| 54 rewrite_conda_shebangs() | |
| 55 { | |
| 56 pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g" | |
| 57 find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \; | |
| 58 } | |
| 59 # | |
| 60 # Install conda | |
| 61 install_conda() | |
| 62 { | |
| 63 echo "++++++++++++++++" | |
| 64 echo "Installing conda" | |
| 65 echo "++++++++++++++++" | |
| 66 if [ -e ${CONDA_DIR} ] ; then | |
| 67 echo "*** $CONDA_DIR already exists ***" >&2 | |
| 68 return | |
| 69 fi | |
| 70 local cwd=$(pwd) | |
| 71 local wd=$(mktemp -d) | |
| 72 cd $wd | |
| 73 wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh | |
| 74 bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR} | |
| 75 echo Installed conda in ${CONDA_DIR} | |
| 76 # Update the installation files | |
| 77 # This is to avoid problems when the length the installation | |
| 78 # directory path exceeds the limit for the shebang statement | |
| 79 # in the conda files | |
| 80 echo "" | |
| 81 echo -n "Rewriting conda shebangs..." | |
| 82 rewrite_conda_shebangs | |
| 83 echo "ok" | |
| 84 echo -n "Adding conda bin to PATH..." | |
| 85 PATH=${CONDA_BIN}:$PATH | |
| 86 echo "ok" | |
| 87 cd $cwd | |
| 88 rm -rf $wd/* | |
| 89 rmdir $wd | |
| 90 } | |
| 91 # | |
| 92 # Create conda environment | |
| 93 install_conda_packages() | |
| 94 { | |
| 95 echo "+++++++++++++++++++++++++" | |
| 96 echo "Installing conda packages" | |
| 97 echo "+++++++++++++++++++++++++" | |
| 98 local cwd=$(pwd) | |
| 99 local wd=$(mktemp -d) | |
| 100 cd $wd | |
| 101 cat >environment.yml <<EOF | |
| 102 name: ${ENV_NAME} | |
| 103 channels: | |
| 104 - defaults | |
| 105 - conda-forge | |
| 106 - bioconda | |
| 107 dependencies: | |
| 108 - python=2.7 | |
| 109 - cutadapt=1.11 | |
| 110 - sickle-trim=1.33 | |
| 111 - bioawk=1.0 | |
| 112 - pandaseq=2.8.1 | |
| 113 - spades=3.5.0 | |
| 114 - fastqc=0.11.3 | |
| 115 - qiime=1.8.0 | |
| 116 - blast-legacy=2.2.26 | |
| 117 - fasta-splitter=0.2.4 | |
| 118 - rdp_classifier=$RDP_CLASSIFIER_VERSION | |
| 119 - vsearch=1.1.3 | |
| 120 # Need to explicitly specify libgfortran | |
| 121 # version (otherwise get version incompatible | |
| 122 # with numpy=1.7.1) | |
| 123 - libgfortran=1.0 | |
| 124 # Compilers needed to build R | |
| 125 - gcc_linux-64 | |
| 126 - gxx_linux-64 | |
| 127 - gfortran_linux-64 | |
| 128 EOF | |
| 129 ${CONDA} env create --name "${ENV_NAME}" -f environment.yml | |
| 130 echo Created conda environment in ${ENV_DIR} | |
| 131 cd $cwd | |
| 132 rm -rf $wd/* | |
| 133 rmdir $wd | |
| 134 } | |
| 135 # | |
| 136 # Install all the non-conda dependencies in a single | |
| 137 # function (invokes separate functions for each package) | |
| 138 install_non_conda_packages() | |
| 139 { | |
| 140 echo "+++++++++++++++++++++++++++++" | |
| 141 echo "Installing non-conda packages" | |
| 142 echo "+++++++++++++++++++++++++++++" | |
| 143 # Temporary working directory | |
| 144 local wd=$(mktemp -d) | |
| 145 local cwd=$(pwd) | |
| 146 local wd=$(mktemp -d) | |
| 147 cd $wd | |
| 148 # Amplicon analysis pipeline | |
| 149 echo -n "Installing Amplicon_analysis_pipeline..." | |
| 150 if [ -e ${BIN_DIR}/Amplicon_analysis_pipeline.sh ] ; then | |
| 151 echo "already installed" | |
| 152 else | |
| 153 install_amplicon_analysis_pipeline | |
| 154 echo "ok" | |
| 155 fi | |
| 156 # ChimeraSlayer | |
| 157 echo -n "Installing ChimeraSlayer..." | |
| 158 if [ -e ${BIN_DIR}/ChimeraSlayer.pl ] ; then | |
| 159 echo "already installed" | |
| 160 else | |
| 161 install_chimeraslayer | |
| 162 echo "ok" | |
| 163 fi | |
| 164 # Uclust | |
| 165 echo -n "Installing uclust for QIIME/pyNAST..." | |
| 166 if [ -e ${BIN_DIR}/uclust ] ; then | |
| 167 echo "already installed" | |
| 168 else | |
| 169 install_uclust | |
| 170 echo "ok" | |
| 171 fi | |
| 172 # R 3.2.1" | |
| 173 echo -n "Checking for R 3.2.1..." | |
| 174 if [ -e ${BIN_DIR}/R ] ; then | |
| 175 echo "R already installed" | |
| 176 else | |
| 177 echo "not found" | |
| 178 install_R_3_2_1 | |
| 179 fi | |
| 180 } | |
| 181 # | |
| 182 # Amplicon analyis pipeline | |
| 183 install_amplicon_analysis_pipeline() | |
| 184 { | |
| 185 local wd=$(mktemp -d) | |
| 186 local cwd=$(pwd) | |
| 187 local wd=$(mktemp -d) | |
| 188 cd $wd | |
| 189 wget -q https://github.com/MTutino/Amplicon_analysis/archive/v${PIPELINE_VERSION}.tar.gz | |
| 190 tar zxf v${PIPELINE_VERSION}.tar.gz | |
| 191 cd Amplicon_analysis-${PIPELINE_VERSION} | |
| 192 INSTALL_DIR=${TOP_DIR}/share/amplicon_analysis_pipeline-${PIPELINE_VERSION} | |
| 193 mkdir -p $INSTALL_DIR | |
| 194 ln -s $INSTALL_DIR ${TOP_DIR}/share/amplicon_analysis_pipeline | |
| 195 for f in *.sh ; do | |
| 196 /bin/cp $f $INSTALL_DIR | |
| 197 done | |
| 198 /bin/cp -r uc2otutab $INSTALL_DIR | |
| 199 mkdir -p ${BIN_DIR} | |
| 200 cat >${BIN_DIR}/Amplicon_analysis_pipeline.sh <<EOF | |
| 201 #!/usr/bin/env bash | |
| 202 # | |
| 203 # Point to Qiime config | |
| 204 export QIIME_CONFIG_FP=${TOP_DIR}/qiime/qiime_config | |
| 205 # Set up the RDP jar file | |
| 206 export RDP_JAR_PATH=${TOP_DIR}/share/rdp_classifier/rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar | |
| 207 # Put the scripts onto the PATH | |
| 208 export PATH=${BIN_DIR}:${INSTALL_DIR}:\$PATH | |
| 209 # Activate the conda environment | |
| 210 export PATH=${CONDA_BIN}:\$PATH | |
| 211 source ${CONDA_BIN}/activate ${ENV_NAME} | |
| 212 # Execute the driver script with the supplied arguments | |
| 213 $INSTALL_DIR/Amplicon_analysis_pipeline.sh \$@ | |
| 214 exit \$? | |
| 215 EOF | |
| 216 chmod 0755 ${BIN_DIR}/Amplicon_analysis_pipeline.sh | |
| 217 cat >${BIN_DIR}/install_reference_data.sh <<EOF | |
| 218 #!/usr/bin/env bash -e | |
| 219 # | |
| 220 function usage() { | |
| 221 echo "Usage: \$(basename \$0) DIR" | |
| 222 } | |
| 223 if [ -z "\$1" ] ; then | |
| 224 usage | |
| 225 exit 0 | |
| 226 elif [ "\$1" == "--help" ] || [ "\$1" == "-h" ] ; then | |
| 227 usage | |
| 228 echo "" | |
| 229 echo "Install reference data into DIR" | |
| 230 exit 0 | |
| 231 fi | |
| 232 echo "==========================================" | |
| 233 echo "Installing Amplicon analysis pipeline data" | |
| 234 echo "==========================================" | |
| 235 if [ ! -e "\$1" ] ; then | |
| 236 echo "Making directory \$1" | |
| 237 mkdir -p \$1 | |
| 238 fi | |
| 239 cd \$1 | |
| 240 DATA_DIR=\$(pwd) | |
| 241 echo "Installing reference data under \$DATA_DIR" | |
| 242 $INSTALL_DIR/References.sh | |
| 243 echo "" | |
| 244 echo "Use '-r \$DATA_DIR' when running Amplicon_analysis_pipeline.sh" | |
| 245 echo "to use the reference data from this directory" | |
| 246 echo "" | |
| 247 echo "\$(basename \$0): finished" | |
| 248 EOF | |
| 249 chmod 0755 ${BIN_DIR}/install_reference_data.sh | |
| 250 cd $cwd | |
| 251 rm -rf $wd/* | |
| 252 rmdir $wd | |
| 253 } | |
| 254 # | |
| 255 # ChimeraSlayer | |
| 256 install_chimeraslayer() | |
| 257 { | |
| 258 local cwd=$(pwd) | |
| 259 local wd=$(mktemp -d) | |
| 260 cd $wd | |
| 261 wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz | |
| 262 tar zxf microbiomeutil_2010-04-29.tar.gz | |
| 263 cd microbiomeutil_2010-04-29 | |
| 264 INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29 | |
| 265 mkdir -p $INSTALL_DIR | |
| 266 ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer | |
| 267 /bin/cp -r ChimeraSlayer $INSTALL_DIR | |
| 268 cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF | |
| 269 #!/usr/bin/env bash | |
| 270 export PATH=$INSTALL_DIR:\$PATH | |
| 271 $INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@ | |
| 272 EOF | |
| 273 chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl | |
| 274 chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl | |
| 275 cd $cwd | |
| 276 rm -rf $wd/* | |
| 277 rmdir $wd | |
| 278 } | |
| 279 # | |
| 280 # uclust required for QIIME/pyNAST | |
| 281 # License only allows this version to be used with those two packages | |
| 282 # See: http://drive5.com/uclust/downloads1_2_22q.html | |
| 283 install_uclust() | |
| 284 { | |
| 285 local wd=$(mktemp -d) | |
| 286 local cwd=$(pwd) | |
| 287 local wd=$(mktemp -d) | |
| 288 cd $wd | |
| 289 wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64 | |
| 290 INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22 | |
| 291 mkdir -p $INSTALL_DIR | |
| 292 ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust | |
| 293 /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust | |
| 294 chmod 0755 ${INSTALL_DIR}/uclust | |
| 295 ln -s ${INSTALL_DIR}/uclust ${BIN_DIR} | |
| 296 cd $cwd | |
| 297 rm -rf $wd/* | |
| 298 rmdir $wd | |
| 299 } | |
| 300 # | |
| 301 # R 3.2.1 | |
| 302 # Can't use version from conda due to dependency conflicts | |
| 303 install_R_3_2_1() | |
| 304 { | |
| 305 . ${CONDA_BIN}/activate ${ENV_NAME} | |
| 306 local cwd=$(pwd) | |
| 307 local wd=$(mktemp -d) | |
| 308 cd $wd | |
| 309 echo -n "Fetching R 3.2.1 source code..." | |
| 310 wget -q http://cran.r-project.org/src/base/R-3/R-3.2.1.tar.gz | |
| 311 echo "ok" | |
| 312 INSTALL_DIR=${TOP_DIR} | |
| 313 mkdir -p $INSTALL_DIR | |
| 314 echo -n "Unpacking source code..." | |
| 315 tar xzf R-3.2.1.tar.gz >INSTALL.log 2>&1 | |
| 316 echo "ok" | |
| 317 cd R-3.2.1 | |
| 318 echo -n "Running configure..." | |
| 319 ./configure --prefix=$INSTALL_DIR --with-x=no --with-readline=no >>INSTALL.log 2>&1 | |
| 320 echo "ok" | |
| 321 echo -n "Running make..." | |
| 322 make >>INSTALL.log 2>&1 | |
| 323 echo "ok" | |
| 324 echo -n "Running make install..." | |
| 325 make install >>INSTALL.log 2>&1 | |
| 326 echo "ok" | |
| 327 cd $cwd | |
| 328 rm -rf $wd/* | |
| 329 rmdir $wd | |
| 330 . ${CONDA_BIN}/deactivate | |
| 331 } | |
| 332 setup_pipeline_environment() | |
| 333 { | |
| 334 echo "+++++++++++++++++++++++++++++++" | |
| 335 echo "Setting up pipeline environment" | |
| 336 echo "+++++++++++++++++++++++++++++++" | |
| 337 # vsearch113 | |
| 338 echo -n "Setting up vsearch113..." | |
| 339 if [ -e ${BIN_DIR}/vsearch113 ] ; then | |
| 340 echo "already exists" | |
| 341 elif [ ! -e ${ENV_DIR}/bin/vsearch ] ; then | |
| 342 echo "failed" | |
| 343 fail "vsearch not found" | |
| 344 else | |
| 345 ln -s ${ENV_DIR}/bin/vsearch ${BIN_DIR}/vsearch113 | |
| 346 echo "ok" | |
| 347 fi | |
| 348 # fasta_splitter.pl | |
| 349 echo -n "Setting up fasta_splitter.pl..." | |
| 350 if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then | |
| 351 echo "already exists" | |
| 352 elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then | |
| 353 echo "failed" | |
| 354 fail "fasta-splitter.pl not found" | |
| 355 else | |
| 356 ln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl | |
| 357 echo "ok" | |
| 358 fi | |
| 359 # rdp_classifier.jar | |
| 360 local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar | |
| 361 echo -n "Setting up rdp_classifier.jar..." | |
| 362 if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then | |
| 363 echo "already exists" | |
| 364 elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then | |
| 365 echo "failed" | |
| 366 fail "rdp_classifier.jar not found" | |
| 367 else | |
| 368 mkdir -p ${TOP_DIR}/share/rdp_classifier | |
| 369 ln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} | |
| 370 echo "ok" | |
| 371 fi | |
| 372 # qiime_config | |
| 373 echo -n "Setting up qiime_config..." | |
| 374 if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then | |
| 375 echo "already exists" | |
| 376 else | |
| 377 mkdir -p ${TOP_DIR}/qiime | |
| 378 cat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config | |
| 379 qiime_scripts_dir ${ENV_DIR}/bin | |
| 380 EOF-qiime-config | |
| 381 echo "ok" | |
| 382 fi | |
| 383 } | |
| 384 # | |
| 385 # Remove the compilers from the conda environment | |
| 386 # Not sure if this step is necessary | |
| 387 remove_conda_compilers() | |
| 388 { | |
| 389 echo "+++++++++++++++++++++++++++++++++++++++++" | |
| 390 echo "Removing compilers from conda environment" | |
| 391 echo "+++++++++++++++++++++++++++++++++++++++++" | |
| 392 ${CONDA} remove -y -n ${ENV_NAME} gcc_linux-64 gxx_linux-64 gfortran_linux-64 | |
| 393 } | |
| 394 # | |
| 395 # Top level script does the installation | |
| 396 echo "=======================================" | |
| 397 echo "Amplicon_analysis_pipeline installation" | |
| 398 echo "=======================================" | |
| 399 echo "Installing into ${TOP_DIR}" | |
| 400 if [ -e ${TOP_DIR} ] ; then | |
| 401 fail "Directory already exists" | |
| 402 fi | |
| 403 mkdir -p ${TOP_DIR} | |
| 404 install_conda | |
| 405 install_conda_packages | |
| 406 install_non_conda_packages | |
| 407 setup_pipeline_environment | |
| 408 remove_conda_compilers | |
| 409 echo "====================================" | |
| 410 echo "Amplicon_analysis_pipeline installed" | |
| 411 echo "====================================" | |
| 412 echo "" | |
| 413 echo "Install reference data using:" | |
| 414 echo "" | |
| 415 echo "\$ ${BIN_DIR}/install_reference_data.sh DIR" | |
| 416 echo "" | |
| 417 echo "Run pipeline scripts using:" | |
| 418 echo "" | |
| 419 echo "\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..." | |
| 420 echo "" | |
| 421 echo "(or add ${BIN_DIR} to your PATH)" | |
| 422 echo "" | |
| 423 echo "$(basename $0): finished" | |
| 424 ## | |
| 425 # |
