# HG changeset patch # User blankenberg # Date 1570571410 14400 # Node ID d2db71f5eeb8352d8178f4da41d52cbc8e84d45b Create PLINK1.9 diff -r 000000000000 -r d2db71f5eeb8 plink.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plink.xml Tue Oct 08 17:50:10 2019 -0400 @@ -0,0 +1,11859 @@ + + + plink + + + + + + plink --version + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + denote an optional modifier (or if '|' is present, a set + of mutually exclusive optional modifiers). Use the EXACT text in the + definition, e.g. '--dummy acgt'. + * There's one exception to the angle brackets/exact text rule: when an angle + bracket term ends with '=[value]', '[value]' designates a variable + parameter. + * {curly braces} denote an optional parameter, where the text between the + braces describes its nature. + * An ellipsis (...) indicates that you may enter multiple parameters of the + specified type. + + plink [input flag(s)...] {command flag(s)...} {other flag(s)...} + plink --help {flag name(s)...} + +Most PLINK runs require exactly one main input fileset. The following flags +are available for defining its form and location: + + --bfile {prefix} : Specify .bed + .bim + .fam prefix (default 'plink'). + --bed [filename] : Specify full name of .bed file. + --bim [filename] : Specify full name of .bim file. + --fam [filename] : Specify full name of .fam file. + + --keep-autoconv : With --file/--tfile/--lfile/--vcf/--bcf/--data/--23file, + don't delete autogenerated binary fileset at end of run. + + --file {prefix} : Specify .ped + .map filename prefix (default 'plink'). + --ped [filename] : Specify full name of .ped file. + --map [filename] : Specify full name of .map file. + + --no-fid : .fam/.ped file does not contain column 1 (family ID). + --no-parents : .fam/.ped file does not contain columns 3-4 (parents). + --no-sex : .fam/.ped file does not contain column 5 (sex). + --no-pheno : .fam/.ped file does not contain column 6 (phenotype). + + --tfile {prefix} : Specify .tped + .tfam filename prefix (default 'plink'). + --tped [fname] : Specify full name of .tped file. + --tfam [fname] : Specify full name of .tfam file. + + --lfile {prefix} : Specify .lgen + .map + .fam (long-format fileset) prefix. + --lgen [fname] : Specify full name of .lgen file. + --reference [fn] : Specify default allele file accompanying .lgen input. + --allele-count : When used with --lfile/--lgen + --reference, specifies + that the .lgen file contains reference allele counts. + + --vcf [filename] : Specify full name of .vcf or .vcf.gz file. + --bcf [filename] : Specify full name of BCF2 file. + + --data {prefix} : Specify Oxford .gen + .sample prefix (default 'plink'). + --gen [filename] : Specify full name of .gen or .gen.gz file. + --bgen [f] : Specify full name of .bgen file. + --sample [fname] : Specify full name of .sample file. + + --23file [fname] {FID} {IID} {sex} {pheno} {pat. ID} {mat. ID} : + Specify 23andMe input file. + + --grm-gz {prfx} : Specify .grm.gz + .grm.id (GCTA rel. matrix) prefix. + --grm-bin {prfx} : Specify .grm.bin + .grm.N.bin + .grm.id (GCTA triangular + binary relationship matrix) filename prefix. + + --dummy [sample ct] [SNP ct] {missing geno freq} {missing pheno freq} + + This generates a fake input dataset with the specified number of samples + and SNPs. By default, the missing genotype and phenotype frequencies are + zero, and genotypes are As and Bs (change the latter with + 'acgt'/'1234'/'12'). The 'scalar-pheno' modifier causes a normally + distributed scalar phenotype to be generated instead of a binary one. + + --simulate [simulation parameter file] + --simulate-qt [simulation parameter file] + --simulate generates a fake input dataset with disease-associated SNPs, + while --simulate-qt generates a dataset with quantitative trait loci. + +Output files have names of the form 'plink.{extension}' by default. You can +change the 'plink' prefix with + + --out [prefix] : Specify prefix for output files. + +Most runs also require at least one of the following commands: + + --make-bed + Create a new binary fileset. Unlike the automatic text-to-binary + converters (which only heed chromosome filters), this supports all of + PLINK's filtering flags. + --make-just-bim + --make-just-fam + Variants of --make-bed which only write a new .bim or .fam file. Can be + used with only .bim/.fam input. + USE THESE CAUTIOUSLY. It is very easy to desynchronize your binary + genotype data and your .bim/.fam indexes if you use these commands + improperly. If you have any doubt, stick with --make-bed. + + --recode [output format] <01 | 12> + + Create a new text fileset with all filters applied. The following output + formats are supported: + * '23': 23andMe 4-column format. This can only be used on a single + sample's data (--keep may be handy), and does not support multicharacter + allele codes. + * 'A': Sample-major additive (0/1/2) coding, suitable for loading from R. + If you need uncounted alleles to be named in the header line, add the + 'include-alt' modifier. + * 'AD': Sample-major additive (0/1/2) + dominant (het=1/hom=0) coding. + Also supports 'include-alt'. + * 'A-transpose': Variant-major 0/1/2. + * 'beagle': Unphased per-autosome .dat and .map files, readable by early + BEAGLE versions. + * 'beagle-nomap': Single .beagle.dat file. + * 'bimbam': Regular BIMBAM format. + * 'bimbam-1chr': BIMBAM format, with a two-column .pos.txt file. Does not + support multiple chromosomes. + * 'fastphase': Per-chromosome fastPHASE files, with + .chr-[chr #].recode.phase.inp filename extensions. + * 'fastphase-1chr': Single .recode.phase.inp file. Does not support + multiple chromosomes. + * 'HV': Per-chromosome Haploview files, with .chr-[chr #][.ped + .info] + filename extensions. + * 'HV-1chr': Single Haploview .ped + .info file pair. Does not support + multiple chromosomes. + * 'lgen': PLINK 1 long-format (.lgen + .fam + .map), loadable with --lfile. + * 'lgen-ref': .lgen + .fam + .map + .ref, loadable with --lfile + + --reference. + * 'list': Single genotype-based list, up to 4 lines per variant. To omit + nonmale genotypes on the Y chromosome, add the 'omit-nonmale-y' modifier. + * 'rlist': .rlist + .fam + .map fileset, where the .rlist file is a + genotype-based list which omits the most common genotype for each + variant. Also supports 'omit-nonmale-y'. + * 'oxford': Oxford-format .gen + .sample. With the 'gen-gz' modifier, the + .gen file is gzipped. + * 'ped': PLINK 1 sample-major (.ped + .map), loadable with --file. + * 'compound-genotypes': Same as 'ped', except that the space between each + pair of same-variant allele codes is removed. + * 'structure': Structure-format. + * 'transpose': PLINK 1 variant-major (.tped + .tfam), loadable with + --tfile. + * 'vcf', 'vcf-fid', 'vcf-iid': VCFv4.2. 'vcf-fid' and 'vcf-iid' cause + family IDs or within-family IDs respectively to be used for the sample + IDs in the last header row, while 'vcf' merges both IDs and puts an + underscore between them. If the 'bgz' modifier is added, the VCF file is + block-gzipped. + The A2 allele is saved as the reference and normally flagged as not based + on a real reference genome (INFO:PR). When it is important for reference + alleles to be correct, you'll also want to include --a2-allele and + --real-ref-alleles in your command. + In addition, + * The '12' modifier causes A1 (usually minor) alleles to be coded as '1' + and A2 alleles to be coded as '2', while '01' maps A1 -> 0 and A2 -> 1. + * The 'tab' modifier makes the output mostly tab-delimited instead of + mostly space-delimited. 'tabx' and 'spacex' force all tabs and all + spaces, respectively. + + --flip-scan + (alias: --flipscan) + LD-based scan for case/control strand inconsistency. + + --write-covar + If a --covar file is loaded, --make-bed/--make-just-fam and --recode + automatically generate an updated version (with all filters applied). + However, if you do not wish to simultaneously generate a new genotype file, + you can use --write-covar to just produce a pruned covariate file. + + --write-cluster + If clusters are specified with --within/--family, this generates a new + cluster file (with all filters applied). The 'omit-unassigned' modifier + causes unclustered samples to be omitted from the file; otherwise their + cluster is 'NA'. + + --write-set + --set-table + If sets have been defined, --write-set dumps 'END'-terminated set + membership lists to {output prefix}.set, while --set-table writes a + variant-by-set membership table to {output prefix}.set.table. + + --merge [.ped filename] [.map filename] + --merge [text fileset prefix] + --bmerge [.bed filename] [.bim filename] [.fam filename] + --bmerge [binary fileset prefix] + Merge the given fileset with the initially loaded fileset, writing the + result to {output prefix}.bed + .bim + .fam. (It is no longer necessary to + simultaneously specify --make-bed.) + --merge-list [filename] + Merge all filesets named in the text file with the reference fileset, if + one was specified. (However, this can also be used *without* a reference; + in that case, the newly created fileset is then treated as the reference by + most other PLINK operations.) The text file is interpreted as follows: + * If a line contains only one name, it is assumed to be the prefix for a + binary fileset. + * If a line contains exactly two names, they are assumed to be the full + filenames for a text fileset (.ped first, then .map). + * If a line contains exactly three names, they are assumed to be the full + filenames for a binary fileset (.bed, then .bim, then .fam). + + --write-snplist + --list-23-indels + --write-snplist writes a .snplist file listing the names of all variants + which pass the filters and inclusion thresholds you've specified, while + --list-23-indels writes the subset with 23andMe-style indel calls (D/I + allele codes). + + --list-duplicate-vars + --list-duplicate-vars writes a .dupvar file describing all groups of + variants with matching positions and allele codes. + * By default, A1/A2 allele assignments are ignored; use 'require-same-ref' + to override this. + * Normally, the report contains position and allele codes. To remove them + (and produce a file directly usable with e.g. --extract/--exclude), use + 'ids-only'. Note that this command will fail in 'ids-only' mode if any + of the reported IDs are not unique. + * 'suppress-first' causes the first variant ID in each group to be omitted + from the report. + + --freq + --freqx + --freq generates a basic allele frequency (or count, if the 'counts' + modifier is present) report. This can be combined with --within/--family + to produce a cluster-stratified allele frequency/count report instead, or + the 'case-control' modifier to report case and control allele frequencies + separately. + --freqx generates a more detailed genotype count report, designed for use + with --read-freq. + + --missing + Generate sample- and variant-based missing data reports. If clusters are + defined, the variant-based report is cluster-stratified. 'gz' causes the + output files to be gzipped. + + --test-mishap + Check for association between missing calls and flanking haplotypes. + + --hardy + Generate a Hardy-Weinberg exact test p-value report. (This does NOT + simultaneously filter on the p-value any more; use --hwe for that.) With + the 'midp' modifier, the test applies the mid-p adjustment described in + Graffelman J, Moreno V (2013) The mid p-value in exact tests for + Hardy-Weinberg Equilibrium. + + --mendel + Generate a Mendel error report. The 'summaries-only' modifier causes the + .mendel file (listing every single error) to be skipped. + + --het + --ibc + Estimate inbreeding coefficients. --het reports method-of-moments + estimates, while --ibc calculates all three values described in Yang J, Lee + SH, Goddard ME and Visscher PM (2011) GCTA: A Tool for Genome-wide Complex + Trait Analysis. (That paper also describes the relationship matrix + computation we reimplement.) + * These functions require decent MAF estimates. If there are very few + samples in your immediate fileset, --read-freq is practically mandatory + since imputed MAFs are wildly inaccurate in that case. + * They also assume the marker set is in approximate linkage equilibrium. + * By default, --het omits the n/(n-1) multiplier in Nei's expected + homozygosity formula. The 'small-sample' modifier causes it to be + included, while forcing --het to use MAFs imputed from founders in the + immediate dataset. + + --check-sex {female max F} {male min F} + --check-sex ycount {female max F} {male min F} {female max Y obs} + {male min Y obs} + --check-sex y-only {female max Y obs} {male min Y obs} + --impute-sex {female max F} {male min F} + --impute-sex ycount {female max F} {male min F} {female max Y obs} + {male min Y obs} + --impute-sex y-only {female max Y obs} {male min Y obs} + --check-sex normally compares sex assignments in the input dataset with + those imputed from X chromosome inbreeding coefficients. + * Make sure that the X chromosome pseudo-autosomal region has been split + off (with e.g. --split-x) before using this. + * You also need decent MAF estimates (so, with very few samples in your + immediate fileset, use --read-freq), and your marker set should be in + approximate linkage equilibrium. + * By default, F estimates smaller than 0.2 yield female calls, and values + larger than 0.8 yield male calls. If you pass numeric parameter(s) to + --check-sex, the first two control these thresholds. + There are now two modes which consider Y chromosome data. + * In 'ycount' mode, gender is still imputed from the X chromosome, but + female calls are downgraded to ambiguous whenever more than 0 nonmissing + Y genotypes are present, and male calls are downgraded when fewer than 0 + are present. (Note that these are counts, not rates.) These thresholds + are controllable with --check-sex ycount's optional 3rd and 4th numeric + parameters. + * In 'y-only' mode, gender is imputed from nonmissing Y genotype counts. + The male minimum threshold defaults to 1 instead of zero in this case. + --impute-sex changes sex assignments to the imputed values, and is + otherwise identical to --check-sex. It must be used with + --make-bed/--recode/--write-covar. + + --fst + (alias: --Fst) + Estimate Wright's Fst for each autosomal diploid variant using the method + introduced in Weir BS, Cockerham CC (1984) Estimating F-statistics for the + analysis of population structure, given a set of subpopulations defined via + --within. Raw and weighted global means are also reported. + * If you're interested in the global means, it is usually best to perform + this calculation on a marker set in approximate linkage equilibrium. + * If you have only two subpopulations, you can represent them with + case/control status and use the 'case-control' modifier. + + --indep [window size] [step size (variant ct)] [VIF threshold] + --indep-pairwise [window size] [step size (variant ct)] [r^2 threshold] + --indep-pairphase [window size] [step size (variant ct)] [r^2 threshold] + Generate a list of markers in approximate linkage equilibrium. With the + 'kb' modifier, the window size is in kilobase instead of variant count + units. (Pre-'kb' space is optional, i.e. '--indep-pairwise 500 kb 5 0.5' + and '--indep-pairwise 500kb 5 0.5' have the same effect.) + Note that you need to rerun PLINK using --extract or --exclude on the + .prune.in/.prune.out file to apply the list to another computation. + + --r + + --r2 + + LD statistic reports. --r yields raw inter-variant correlations, while + --r2 reports their squares. You can request results for all pairs in + matrix format (if you specify 'bin' or one of the shape modifiers), all + pairs in table format ('inter-chr'), or a limited window in table format + (default). + * The 'gz' modifier causes the output text file to be gzipped. + * 'bin' causes the output matrix to be written in double-precision binary + format, while 'bin4' specifics single-precision binary. The matrix is + square if no shape is explicitly specified. + * By default, text matrices are tab-delimited; 'spaces' switches this. + * 'in-phase' adds a column with in-phase allele pairs to table-formatted + reports. (This cannot be used with very long allele codes.) + * 'dprime' adds the absolute value of Lewontin's D-prime statistic to + table-formatted reports, and forces both r/r^2 and D-prime to be based on + the maximum likelihood solution to the cubic equation discussed in Gaunt + T, Rodriguez S, Day I (2007) Cubic exact solutions for the estimation of + pairwise haplotype frequencies. + 'dprime-signed' keeps the sign, while 'd' skips division by D_{max}. + * 'with-freqs' adds MAF columns to table-formatted reports. + * Since the resulting file can easily be huge, you're required to add the + 'yes-really' modifier when requesting an unfiltered, non-distributed all + pairs computation on more than 400k variants. + * These computations can be subdivided with --parallel (even when the + 'square' modifier is active). + --ld [variant ID] [variant ID] + This displays haplotype frequencies, r^2, and D' for a single pair of + variants. When there are multiple biologically possible solutions to the + haplotype frequency cubic equation, all are displayed (instead of just the + maximum likelihood solution identified by --r/--r2), along with HWE exact + test statistics. + + --show-tags [filename] + --show-tags all + * If a file is specified, list all variants which tag at least one variant + named in the file. (This will normally be a superset of the original + list, since a variant is considered to tag itself here.) + * If 'all' mode is specified, for each variant, each *other* variant which + tags it is reported. + + --blocks + Estimate haplotype blocks, via Haploview's interpretation of the block + definition suggested by Gabriel S et al. (2002) The Structure of Haplotype + Blocks in the Human Genome. + * Normally, samples with missing phenotypes are not considered by this + computation; the 'no-pheno-req' modifier lifts this restriction. + * Normally, size-2 blocks may not span more than 20kb, and size-3 blocks + are limited to 30kb. The 'no-small-max-span' modifier removes these + limits. + The .blocks file is valid input for PLINK 1.07's --hap command. However, + the --hap... family of flags has not been reimplemented in PLINK 1.9 due to + poor phasing accuracy relative to other software; for now, we recommend + using BEAGLE instead of PLINK for case/control haplotype association + analysis. (You can use '--recode beagle' to export data to BEAGLE 3.3.) + We apologize for the inconvenience, and plan to develop variants of the + --hap... flags which handle pre-phased data effectively. + + --distance <1-ibs> + + Write a lower-triangular tab-delimited table of (weighted) genomic + distances in allele count units to {output prefix}.dist, and a list of the + corresponding sample IDs to {output prefix}.dist.id. The first row of the + .dist file contains a single {genome 1-genome 2} distance, the second row + has the {genome 1-genome 3} and {genome 2-genome 3} distances in that + order, etc. + * It is usually best to perform this calculation on a marker set in + approximate linkage equilibrium. + * If the 'square' or 'square0' modifier is present, a square matrix is + written instead; 'square0' fills the upper right triangle with zeroes. + * If the 'gz' modifier is present, a compressed .dist.gz file is written + instead of a plain text file. + * If the 'bin' modifier is present, a binary (square) matrix of + double-precision floating point values, suitable for loading from R, is + instead written to {output prefix}.dist.bin. ('bin4' specifies + single-precision numbers instead.) This can be combined with 'square0' + if you still want the upper right zeroed out, or 'triangle' if you don't + want to pad the upper right at all. + * If the 'ibs' modifier is present, an identity-by-state matrix is written + to {output prefix}.mibs. '1-ibs' causes distances expressed as genomic + proportions (i.e. 1 - IBS) to be written to {output prefix}.mdist. + Combine with 'allele-ct' if you want to generate the usual .dist file as + well. + * By default, distance rescaling in the presence of missing genotype calls + is sensitive to allele count distributions: if variant A contributes, on + average, twice as much to other pairwise distances as variant B, a + missing call at variant A will result in twice as large of a missingness + correction. To turn this off (because e.g. your missing calls are highly + nonrandom), use the 'flat-missing' modifier. + * The computation can be subdivided with --parallel. + --distance-matrix + --ibs-matrix + These deprecated commands are equivalent to '--distance 1-ibs flat-missing + square' and '--distance ibs flat-missing square', respectively, except that + they generate space- instead of tab-delimited text matrices. + + --make-rel + + Write a lower-triangular variance-standardized realized relationship matrix + to {output prefix}.rel, and corresponding IDs to {output prefix}.rel.id. + * It is usually best to perform this calculation on a marker set in + approximate linkage equilibrium. + * 'square', 'square0', 'triangle', 'gz', 'bin', and 'bin4' act as they do + on --distance. + * The 'cov' modifier removes the variance standardization step, causing a + covariance matrix to be calculated instead. + * By default, the diagonal elements in the relationship matrix are based on + --ibc's Fhat1; use the 'ibc2' or 'ibc3' modifiers to base them on Fhat2 + or Fhat3 instead. + * The computation can be subdivided with --parallel. + --make-grm-gz + --make-grm-bin + --make-grm-gz writes the relationships in GCTA's original gzipped list + format, which describes one pair per line, while --make-grm-bin writes them + in GCTA 1.1+'s single-precision triangular binary format. Note that these + formats explicitly report the number of valid observations (where neither + sample has a missing call) for each pair, which is useful input for some + scripts. + These computations can be subdivided with --parallel. + + --rel-cutoff {val} + (alias: --grm-cutoff) + Exclude one member of each pair of samples with relatedness greater than + the given cutoff value (default 0.025). If no later operation will cause + the list of remaining samples to be written to disk, this will save it to + {output prefix}.rel.id. + Note that maximizing the remaining sample size is equivalent to the NP-hard + maximum independent set problem, so we use a greedy algorithm instead of + guaranteeing optimality. (Use the --make-rel and --keep/--remove flags if + you want to try to do better.) + + --ibs-test {permutation count} + --groupdist {iters} {d} + Given case/control phenotype data, these commands consider three subsets of + the distance matrix: pairs of affected samples, affected-unaffected pairs, + and pairs of unaffected samples. Each of these subsets has a distribution + of pairwise genomic distances; --ibs-test uses permutation to estimate + p-values re: which types of pairs are most similar, while --groupdist + focuses on the differences between the centers of these distributions and + estimates standard errors via delete-d jackknife. + + --regress-distance {iters} {d} + Linear regression of pairwise genomic distances on pairwise average + phenotypes and vice versa, using delete-d jackknife for standard errors. A + scalar phenotype is required. + * With less than two parameters, d is set to {number of people}^0.6 rounded + down. With no parameters, 100k iterations are run. + --regress-rel {iters} {d} + Linear regression of pairwise genomic relationships on pairwise average + phenotypes, and vice versa. Defaults for iters and d are the same as for + --regress-distance. + + --genome + Generate an identity-by-descent report. + * It is usually best to perform this calculation on a marker set in + approximate linkage equilibrium. + * The 'rel-check' modifier excludes pairs of samples with different FIDs + from the final report. + * 'full' adds raw pairwise comparison data to the report. + * The P(IBD=0/1/2) estimator employed by this command sometimes yields + numbers outside the range [0,1]; by default, these are clipped. The + 'unbounded' modifier turns off this clipping. + * Then, when PI_HAT^2 < P(IBD=2), 'nudge' adjusts the final P(IBD=0/1/2) + estimates to a theoretically possible configuration. + * The computation can be subdivided with --parallel. + + --homozyg + + --homozyg-snp [min var count] + --homozyg-kb [min length] + --homozyg-density [max inverse density (kb/var)] + --homozyg-gap [max internal gap kb length] + --homozyg-het [max hets] + --homozyg-window-snp [scanning window size] + --homozyg-window-het [max hets in scanning window hit] + --homozyg-window-missing [max missing calls in scanning window hit] + --homozyg-window-threshold [min scanning window hit rate] + These commands request a set of run-of-homozygosity reports, and allow you + to customize how they are generated. + * If you're satisfied with all the default settings described below, just + use --homozyg with no modifiers. Otherwise, --homozyg lets you change a + few binary settings: + * 'group{-verbose}' adds a report on pools of overlapping runs of + homozygosity. (Automatically set when --homozyg-match is present.) + * With 'group{-verbose}', 'consensus-match' causes pairwise segmental + matches to be called based on the variants in the pool's consensus + segment, rather than the variants in the pairwise intersection. + * Due to how the scanning window algorithm works, it is possible for a + reported ROH to be adjacent to a few homozygous variants. The 'extend' + modifier causes them to be included in the reported ROH if that + wouldn't cause a violation of the --homozyg-density bound. + * By default, segment bp lengths are calculated as [end bp position] - + [start bp position] + 1. Therefore, reports normally differ slightly + from PLINK 1.07, which does not add 1 at the end. For testing + purposes, you can use the 'subtract-1-from-lengths' modifier to apply + the old formula. + * By default, only runs of homozygosity containing at least 100 variants, + and of total length >= 1000 kilobases, are noted. You can change these + minimums with --homozyg-snp and --homozyg-kb, respectively. + * By default, a ROH must have at least one variant per 50 kb on average; + change this bound with --homozyg-density. + * By default, if two consecutive variants are more than 1000 kb apart, they + cannot be in the same ROH; change this bound with --homozyg-gap. + * By default, a ROH can contain an unlimited number of heterozygous calls; + you can impose a limit with --homozyg-het. + * By default, the scanning window contains 50 variants; change this with + --homozyg-window-snp. + * By default, a scanning window hit can contain at most 1 heterozygous + call and 5 missing calls; change these limits with --homozyg-window-het + and --homozyg-window-missing, respectively. + * By default, for a variant to be eligible for inclusion in a ROH, the hit + rate of all scanning windows containing the variant must be at least + 0.05; change this threshold with --homozyg-window-threshold. + + --cluster + Cluster samples using a pairwise similarity statistic (normally IBS). + * The 'cc' modifier forces every cluster to have at least one case and one + control. + * The 'group-avg' modifier causes clusters to be joined based on average + instead of minimum pairwise similarity. + * The 'missing' modifier causes clustering to be based on + identity-by-missingness instead of identity-by-state, and writes a + space-delimited identity-by-missingness matrix to disk. + * The 'only2' modifier causes only a .cluster2 file (which is valid input + for --within) to be written; otherwise 2 other files will be produced. + * By default, IBS ties are not broken in the same manner as PLINK 1.07, so + final cluster solutions tend to differ. This is generally harmless. + However, to simplify testing, you can use the 'old-tiebreaks' modifier to + force emulation of the old algorithm. + + --pca {count}
+ Calculates a variance-standardized relationship matrix (use + --make-rel/--make-grm-gz/--make-grm-bin to dump it), and extracts the top + 20 principal components. + * It is usually best to perform this calculation on a marker set in + approximate linkage equilibrium. + * You can change the number of PCs by passing a numeric parameter. + * The 'header' modifier adds a header line to the .eigenvec output file. + (For compatibility with the GCTA flag of the same name, the default is no + header line.) + * The 'tabs' modifier causes the .eigenvec file(s) to be tab-delimited. + * The 'var-wts' modifier requests an additional .eigenvec.var file with PCs + expressed as variant weights instead of sample weights. + + --neighbour [n1] [n2] + (alias: --neighbor) + Report IBS distances from each sample to their n1th- to n2th-nearest + neighbors, associated Z-scores, and the identities of those neighbors. + Useful for outlier detection. + + --assoc + + --assoc + --model + + + Basic association analysis report. + Given a case/control phenotype, --assoc performs a 1df chi-square allelic + test, while --model performs 4 other tests as well (1df dominant gene + action, 1df recessive gene action, 2df genotypic, Cochran-Armitage trend). + * With 'fisher'/'fisher-midp', Fisher's exact test is used to generate + p-values. 'fisher-midp' also applies Lancaster's mid-p adjustment. + * 'perm' causes an adaptive permutation test to be performed. + * 'mperm=[value]' causes a max(T) permutation test with the specified + number of replications to be performed. + * 'perm-count' causes the permutation test report to include counts instead + of frequencies. + * 'counts' causes --assoc to report allele counts instead of frequencies. + * 'set-test' tests the significance of variant sets. Requires permutation; + can be customized with --set-p/--set-r2/--set-max. + * 'dom', 'rec', 'gen', and 'trend' force the corresponding test to be used + as the basis for --model permutation. (By default, the most significant + result among the allelic, dominant, and recessive tests is used.) + * 'trend-only' causes only the trend test to be performed. + Given a quantitative phenotype, --assoc normally performs a Wald test. + * In this case, the 'qt-means' modifier causes trait means and standard + deviations stratified by genotype to be reported as well. + * 'lin' causes the Lin statistic to be computed, and makes it the basis for + multiple-testing corrections and permutation tests. + Several other flags (most notably, --aperm) can be used to customize the + permutation test. + + --mh + (alias: --cmh) + --bd + --mh2 + --homog + Given a case/control phenotype and a set of clusters, --mh computes 2x2xK + Cochran-Mantel-Haenszel statistics for each variant, while --bd also + performs the Breslow-Day test for odds ratio homogeneity. Permutation and + variant set testing based on the CMH (default) or Breslow-Day (when + 'perm-bd' is present) statistic are supported. + The following similar analyses are also available: + * --mh2 swaps the roles of case/control status and cluster membership, + performing a phenotype-stratified IxJxK Cochran-Mantel-Haenszel test on + association between cluster assignments and genotypes. + * --homog executes an alternative to the Breslow-Day test, based on + partitioning of the chi-square statistic. + + --gxe {covariate index} + Given both a quantitative phenotype and a case/control covariate loaded + with --covar defining two groups, --gxe compares the regression coefficient + derived from considering only members of one group to the regression + coefficient derived from considering only members of the other. By + default, the first covariate in the --covar file defines the groups; use + e.g. '--gxe 3' to base them on the third covariate instead. + + --linear + + + --logistic + + + Multi-covariate association analysis on a quantitative (--linear) or + case/control (--logistic) phenotype. Normally used with --covar. + * 'perm' normally causes an adaptive permutation test to be performed on + the main effect, while 'mperm=[value]' starts a max(T) permutation test. + * 'perm-count' causes the permutation test report to include counts instead + of frequencies. + * 'set-test' tests the significance of variant sets. Requires permutation; + can be customized with --set-p/--set-r2/--set-max. + * The 'genotypic' modifier adds an additive effect/dominance deviation 2df + joint test (0/1/2 and 0/1/0 coding), while 'hethom' uses 0/0/1 and 0/1/0 + coding instead. If permutation is also requested, these modifiers cause + permutation to be based on the joint test. + * 'dominant' and 'recessive' specify a model assuming full dominance or + recessiveness, respectively, for the A1 allele. + * 'no-snp' causes regression to be performed only on the phenotype and the + covariates, without reference to genomic data. If permutation is also + requested, results are reported for all covariates. + * 'hide-covar' removes covariate-specific lines from the report. + * By default, sex (male = 1, female = 0) is automatically added as a + covariate on X chromosome variants, and nowhere else. The 'sex' modifier + causes it to be added everywhere, while 'no-x-sex' excludes it. + * 'interaction' adds genotype x covariate interactions to the model. This + cannot be used with the usual permutation tests; use --tests to define + the permutation test statistic instead. + * 'intercept' causes intercepts to be included in the main report. + * For logistic regressions, the 'beta' modifier causes regression + coefficients instead of odds ratios to be reported. + * With --linear, the 'standard-beta' modifier standardizes the phenotype + and all predictors to zero mean and unit variance before regression. + + --dosage [allele dosage file] + + + --dosage [list file] list + + + --write-dosage + Process (possibly gzipped) text files with variant-major allelic dosage + data. This cannot be used with a regular input fileset; instead, you must + *only* specify a .fam and possibly a .map file, and you can't specify any + other commands. + * PLINK 2.0 will have first-class support for genotype probabilities. An + equivalent data import flag will be provided then, and --dosage will be + retired. + * By default, --dosage assumes that only one allelic dosage file should be + loaded. To specify multiple files, + 1. create a master list with one entry per line. There are normally two + supported formats for this list: just a filename per line, or variant + batch numbers in the first column and filenames in the second. + 2. Provide the name of that list as the first --dosage parameter. + 3. Add the 'list' modifier. + * By default, --dosage assumes the allelic dosage file(s) contain a header + line, which has 'SNP' in column i+1, 'A1' in column i+j+2, 'A2' in column + i+j+3, and sample FID/IIDs starting from column i+j+k+4. (i/j/k are + normally zero, but can be changed with 'skip0', 'skip1', and 'skip2' + respectively.) If such a header line is not present, + * when all samples appear in the same order as they do in the .fam file, + you can use the 'noheader' modiifer. + * Otherwise, use the 'sepheader' modifier, and append sample ID filenames + to your 'list' file entries. + * The 'format' modifier lets you specify the number of values used to + represent each dosage. 'format=1' normally indicates a single 0..2 A1 + expected count; 'dose1' modifies this to a 0..1 frequency. 'format=2' + (the default) indicates a 0..1 homozygous A1 likelihood followed by a + 0..1 het likelihood, while 'format=3' indicates 0..1 hom A1, 0..1 het, + 0..1 hom A2. + * 'Zout' causes the output file to be gzipped. + * Normally, an association analysis is performed. 'standard-beta' and + 'sex' behave as they are supposed to with --linear/--logistic. + 'case-control-freqs' causes case and control allele frequencies to be + reported separately. + * There are three alternate modes which cause the association analysis to + be skipped. + * 'occur' requests a simple variant occurrence report. + * --write-dosage causes a simple merged file matching the 'format' + specification (not including 'dose1') to be generated. + * --score applies a linear scoring system to the dosages. + + --lasso [h2 estimate] {min lambda} + Estimate variant effect sizes via LASSO regression. You must provide an + additive heritability estimate to calibrate the regression. + Note that this method may require a very large sample size (e.g. hundreds + of thousands) to be effective on complex polygenic traits. + + --test-missing + Check for association between missingness and case/control status, using + Fisher's exact test. The 'midp' modifier causes Lancaster's mid-p + adjustment to be applied. + + --make-perm-pheno [ct] + Generate phenotype permutations and write them to disk, without invoking an + association test. + + --tdt + + Report transmission disequilibrium test statistics, given case/control + phenotypes and pedigree information. + * A Mendel error check is performed before the main tests; offending + genotypes are treated as missing by this analysis. + * By default, the basic TDT p-value is based on a chi-square test unless + you request the exact binomial test with 'exact' or 'exact-midp'. + * 'perm'/'mperm=[value]' requests a family-based adaptive or max(T) + permutation test. By default, the permutation test statistic is the + basic TDT p-value; 'parentdt1'/'parentdt2' cause parenTDT or combined + test p-values, respectively, to be considered instead. + * 'set-test' tests the significance of variant sets. This cannot be used + with exact tests for now. + The 'poo' modifier causes a parent-of-origin analysis to be performed + instead, with transmissions from heterozygous fathers and heterozygous + mothers considered separately. + * The parent-of-origin analysis does not currently support exact tests. + * By default, the permutation test statistic is the absolute + parent-of-origin test Z score; 'pat'/'mat' cause paternal or maternal TDT + chi-square statistics, respectively, to be considered instead. + + --qfam + --qfam-parents + --qfam-between + --qfam-total + QFAM family-based association test for quantitative traits. + * A Mendel error check is performed before the main tests; offending + genotypes are treated as missing by this analysis. + * This procedure requires permutation. 'perm' and 'perm-count' have the + usual meanings. However, 'mperm=[value]' just specifies a fixed number + of permutations; the method does not support a proper max(T) test. + * The 'emp-se' modifier adds BETA and EMP_SE (empirical standard error for + beta) fields to the .perm output file. + + --annotate [PLINK report] + + + Add annotations to a variant-based PLINK report. This requires an + annotation source: + * 'attrib=[file]' specifies a (possibly gzipped) attribute file. + * 'ranges=[file]' specifies a gene/range list file. + (Both source types can be specified simultaneously.) The following options + are also supported: + * 'filter=[file]' causes only variants within one of the ranges in the file + to be included in the new report. + * 'snps=[file]' causes only variants named in the file to be included in + the new report. + * The 'NA' modifier causes unannotated variants to have 'NA' instead of '.' + in the new report's ANNOT column, while the 'prune' modifier excludes + them entirely. + * The 'block' modifier replaces the single ANNOT column with a 0/1-coded + column for each possible annotation. + * With 'ranges', + * 'subset=[file]' causes only intervals named in the subset file to be + loaded from the ranges file. + * interval annotations normally come with a parenthesized signed distance + to the interval boundary (0 if the variant is located inside the + interval; this is always true without --border). They can be excluded + with the 'minimal' modifier. + * the 'distance' modifier adds 'DIST' and 'SGN' columns describing signed + distance to the nearest interval. + * When --pfilter is present, high p-values are filtered out. + + --clump [PLINK report filename(s)...] + Process association analysis report(s) with 'SNP' and p-value columns, + organizing results by LD-based clumps. Multiple filenames can be separated + by spaces or commas. + + --gene-report [PLINK report] [gene range file] + Generate a gene-based report from a variant-based report. + * When --pfilter is present, high p-values are filtered out. + * When --extract (without 'range') is present, only variants named in the + --extract file are considered. + + --meta-analysis [PLINK report filenames...] + --meta-analysis [PLINK report filenames...] + + + Perform a meta-analysis on several variant-based reports with 'SNP' and + 'SE' fields. + * Normally, an 'OR' odds ratio field must also be present in each input + file. With 'logscale', 'BETA' log-odds values/regression coefficients + are expected instead, but the generated report will still contain odds + ratio estimates. With 'qt', both input and output values are regression + betas. + * 'CHR', 'BP', and 'A1' fields are also normally required. 'no-map' causes + them to all be ignored, while 'no-allele' causes just 'A1' to be ignored. + * If 'A2' fields are present, and neither 'no-map' nor 'no-allele' was + specified, A1/A2 allele flips are handled properly. Otherwise, A1 + mismatches are thrown out. + * 'study' causes study-specific effect estimates to be collated in the + meta-analysis report. + * 'report-all' causes variants present in only a single input file to be + included in the meta-analysis report. + * 'weighted-z' requests weighted Z-score-based p-values (as computed by the + Abecasis Lab's METAL software) in addition to the usual inverse + variance-based analysis. This requires P and effective sample size + fields. + * When --extract (without 'range') is present, only variants named in the + --extract file are considered. + * Unless 'no-map' is specified, chromosome filters are also respected. + + --fast-epistasis + + --epistasis + Scan for epistatic interactions. --fast-epistasis inspects 3x3 joint + genotype count tables and only applies to case/control phenotypes, while + --epistasis performs linear or logistic regression. + * By default, --fast-epistasis uses the PLINK 1.07 allele-based test. Two + newer tests are now supported: 'boost' invokes the likelihood ratio test + introduced by Wan X et al. (2010) BOOST: A Fast Approach to Detecting + Gene-Gene Interactions in Genome-wide Case-Control Studies, while + 'joint-effects' applies the joint effects test introduced in Ueki M, + Cordell HJ (2012) Improved statistics for genome-wide interaction + analysis. + * The original --fast-epistasis test normally applies the variance and + empty cell corrections suggested by Ueki and Cordell's paper. To disable + them, use the 'no-ueki' modifier. + * 'case-only' requests a case-only instead of a case/control test. + * By default, all pairs of variants across the entire genome are tested. + To just test pairs of variants within a single set, add the 'set-by-set' + modifier and load exactly one set with --set/--make-set; with exactly two + sets loaded, all variants in one set are tested against all variants in + the other. 'set-by-all' tests all variants in one set against the entire + genome instead. + * 'nop' strips p-values from the main report. + * These computations can be subdivided with --parallel; however... + --epistasis-summary-merge [common file prefix] [ct] + When a --{fast-}epistasis job is subdivided with --parallel, the main + report can be assembled at the end by applying Unix 'cat' in the usual + manner, but the .summary.1, .summary.2, ... files may require a specialized + merge. --epistasis-summary-merge takes care of the latter. + + --twolocus [variant ID] [variant ID] + Two-locus joint genotype count report. + + --score [filename] {i} {j} {k}
+ + Apply a linear scoring system to each sample. + The input file should have one line per scored variant. Variant IDs are + read from column #i, allele codes are read from column #j, and scores are + read from column #k, where i defaults to 1, j defaults to i+1, and k + defaults to j+1. + * The 'header' modifier causes the first nonempty line of the input file to + be ignored; otherwise, --score assumes there is no header line. + * By default, final scores are averages of the valid per-variant scores. + The 'sum' modifier causes sums to be reported instead. (This cannot be + used with 'no-mean-imputation'. And for backward compatibility, 'sum' is + automatically on with dosage data unless 'no-sum' is specified.) + * By default, copies of the unnamed allele contribute zero to score, while + missing genotypes contribute an amount proportional to the loaded (via + --read-freq) or imputed allele frequency. To throw out missing + observations instead (decreasing the denominator in the final average + when this happens), use the 'no-mean-imputation' modifier. + * Alternatively, you can use the 'center' modifier to shift all scores to + mean zero. + * This command can be used with dosage data. By default, the 'CNT' column + is omitted from the output file in this case; use 'include-cnt' to keep + it. Also, note that scores are multiplied by 0..1 dosages, not 0..2 + diploid allele counts, unless the 'double-dosage' modifier is present. + + --write-var-ranges [block ct] + Divide the set of variants into equal-size blocks. (Can be used with + --snps to split a job across multiple machines.) + +The following other flags are supported. (Order of operations is described at +https://www.cog-genomics.org/plink2/order .) + --script [fname] : Include command-line options from file. + --rerun {log} : Rerun commands in log (default 'plink.log'). + --version : Display only version number before exiting. + --silent : Suppress output to console. + --gplink : Reserved for interoperation with gPLINK. + --missing-genotype [char] : Set missing genotype code (normally '0'). + --double-id : Set both FIDs and IIDs to the VCF/BCF sample ID. + --const-fid {ID} : Set all FIDs to the given constant (default '0'). + --id-delim {d} : Parse sample IDs as [FID][d][IID] (default delim '_'). + --vcf-idspace-to [c] : Convert spaces in sample IDs to the given character. + --biallelic-only : Skip VCF variants with 2+ alt. alleles. + --vcf-min-qual [val] : Skip VCF variants with low/missing QUAL. + --vcf-filter {exception(s)...} : Skip variants which have FILTER failures. + --vcf-require-gt : Skip variants with no GT field. + --vcf-min-gq [val] : No-call a genotype when GQ is below the + given threshold. + --vcf-min-gp [val] : No-call a genotype when 0-1 scaled GP is + below the given threshold. + --vcf-half-call [m] : Specify how '0/.' and similar VCF GT values should be + handled. The following four modes are supported: + * 'error'/'e' (default) errors out and reports line #. + * 'haploid'/'h' treats them as haploid calls. + * 'missing'/'m' treats them as missing. + * 'reference'/'r' treats the missing value as 0. + --oxford-single-chr [chr nm] : Specify single-chromosome .gen file with + ignorable first column. + --oxford-pheno-name [col nm] : Import named phenotype from the .sample file. + --hard-call-threshold [val] : When an Oxford-format fileset is loaded, calls + --hard-call-threshold random with uncertainty level greater than 0.1 are + normally treated as missing. You can adjust + this threshold by providing a numeric + parameter, or randomize all calls with + 'random'. + --missing-code {string list} : Comma-delimited list of missing phenotype + (alias: --missing_code) values for Oxford-format filesets (def. 'NA'). + --simulate-ncases [num] : Set --simulate case count (default 1000). + --simulate-ncontrols [n] : Set --simulate control count (default 1000). + --simulate-prevalence [p] : Set --simulate disease prevalence (default 0.01). + --simulate-n [num] : Set --simulate-qt sample count (default 1000). + --simulate-label [prefix] : Set --simulate{-qt} FID/IID name prefix. + --simulate-missing [freq] : Set --simulate{-qt} missing genotype frequency. + --allow-extra-chr <0> : Permit unrecognized chromosome codes. The '0' + (alias: --aec) modifier causes them to be treated as if they had + been set to zero. + --chr-set [autosome ct] : + Specify a nonhuman chromosome set. The first parameter sets the number of + diploid autosome pairs if positive, or haploid chromosomes if negative. + Given diploid autosomes, the remaining modifiers indicate the absence of + the named non-autosomal chromosomes. + --cow/--dog/--horse/--mouse/--rice/--sheep : Shortcuts for those species. + --autosome-num [value] : Alias for '--chr-set [value] no-y no-xy no-mt'. + --cm-map [fname pattern] {chr} : Use SHAPEIT-format recombination maps to set + centimorgan positions. To process more than + one chromosome, include a '@' in the first + parameter where the chrom. number belongs, + e.g. 'genetic_map_chr@_combined_b37.txt'. + --zero-cms : Zero out centimorgan positions. + --pheno [fname] : Load phenotype data from the specified file, instead of + using the values in the main input fileset. + --all-pheno : For basic association tests, loop through all phenotypes + in --pheno file. + --mpheno [n] : Load phenotype from column (n+2) in --pheno file. + --pheno-name [c] : If --pheno file has a header row, use column with the + given name. + --pheno-merge : When the main input fileset contains an phenotype value + for a sample, but the --pheno file does not, use the + original value instead of treating the phenotype as + missing. + --missing-phenotype [v] : Set missing phenotype value (normally -9). + --1 : Expect case/control phenotypes to be coded as + 0 = control, 1 = case, instead of the usual + 0 = missing, 1 = control, 2 = case. + --make-pheno [fn] [val] : Define a new case/control phenotype. If the val + parameter is '*', all samples listed in the given + file are cases, and everyone else is a control. + (Note that, in some shells, it is necessary to + surround the * with quotes.) + Otherwise, all samples with third column entry + equal to the val parameter are cases, and all other + samples mentioned in the file are controls. + --tail-pheno [Lt] {Hbt} : Downcode a scalar phenotype to a case/control + phenotype. All samples with phenotype values + greater than Hbt are cases, and all with values + less than or equal to Lt are controls. If Hbt is + unspecified, it is equal to Lt; otherwise, + in-between phenotype values are set to missing. + --covar [filename] : Specify covariate file. + --covar-name [...] : Specify covariate(s) in --covar file by name. + Separate multiple names with spaces or commas, and + use dashes to designate ranges. + --covar-number [...] : Specify covariate(s) in --covar file by index. + --no-const-covar : Exclude constant covariates. + --within [f] : Specify initial cluster assignments. + --mwithin [n] : Load cluster assignments from column n+2. + --family : Create a cluster for each family ID. + --loop-assoc [f] : Run specified case/control association + commands once for each cluster in the file, + using cluster membership as the phenotype. + --set [filename] : Load sets from a .set file. + --set-names [name(s)...] : Load only sets named on the command line. + Use spaces to separate multiple names. + --subset [filename] : Load only sets named in the given text file. + --set-collapse-all [set name] : Merge all sets. + --complement-sets : Invert all sets. (Names gain 'C_' prefixes.) + --make-set-complement-all [s] : --set-collapse-all + inversion. + --make-set [filename] : Define sets from a list of named bp ranges. + --make-set-border [kbs] : Stretch regions in --make-set file. + --make-set-collapse-group : Define sets from groups instead of sets in + --make-set file. + --keep [filename] : Exclude all samples not named in the file. + --remove [filename] : Exclude all samples named in the file. + --keep-fam [filename] : Exclude all families not named in the file. + --remove-fam [fname] : Exclude all families named in the file. + --extract [f] : Exclude all variants not named in the file. + --exclude [f] : Exclude all variants named in the file. + --keep-clusters [filename] : These can be used individually or in + --keep-cluster-names [name(s)...] combination to define a list of + clusters to keep; all samples not in a + cluster in that list are then excluded. + Use spaces to separate cluster names + for --keep-cluster-names. + --remove-clusters [filename] : Exclude all clusters named in the file. + --remove-cluster-names [name(s)...] : Exclude the named clusters. + --gene [sets...] : Exclude variants not in a set named on the command line. + (Separate multiple set names with spaces.) + --gene-all : Exclude variants which aren't a member of any set. (PLINK + 1.07 automatically did this under some circumstances.) + --attrib [f] {att lst} : Given a file assigning attributes to variants, and a + --attrib-indiv [f] {a} comma-delimited list (with no whitespace) of + attribute names, remove variants/samples which are + either missing from the file or don't have any of + the listed attributes. If some attribute names in + the list are preceded by '-', they are treated as + 'negative match conditions' instead: variants with + at least one negative match attribute are removed. + The first character in the list cannot be a '-', due + to how command-line parsing works; add a comma in + front to get around this. + --chr [chrs...] : Exclude all variants not on the given chromosome(s). + Valid choices for humans are 0 (unplaced), 1-22, X, Y, XY, + and MT. Separate multiple chromosomes with spaces and/or + commas, and use a dash (no adjacent spaces permitted) to + denote a range, e.g. '--chr 1-4, 22, xy'. + --not-chr [...] : Reverse of --chr (exclude variants on listed chromosomes). + --autosome : Exclude all non-autosomal variants. + --autosome-xy : Exclude all non-autosomal variants, except those with + chromosome code XY (pseudo-autosomal region of X). + --snps-only : Exclude non-SNP variants. By default, SNP = both + allele codes are single-character; 'just-acgt' + restricts SNP codes to {A,C,G,T,a,c,g,t,[missing]}. + --from [var ID] : Use ID(s) to specify a variant range to load. When used + --to [var ID] together, both variants must be on the same chromosome. + --snp [var ID] : Specify a single variant to load. + --exclude-snp [] : Specify a single variant to exclude. + --window [kbs] : With --snp or --exclude-snp, loads/excludes all variants + within half the specified kb distance of the named one. + --from-bp [pos] : Use physical position(s) to define a variant range to + --to-bp [pos] load. --from-kb/--to-kb/--from-mb/--to-mb allow decimal + --from-kb [pos] values. You must also specify a single chromosome (using + --to-kb [pos] e.g. --chr) when using these flags. + --from-mb [pos] + --to-mb [pos] + --snps [var IDs...] : Use IDs to specify variant range(s) to load or + --exclude-snps [...] exclude. E.g. '--snps rs1111-rs2222, rs3333, rs4444'. + --thin [p] : Randomly remove variants, retaining each with prob. p. + --thin-count [n] : Randomly remove variants until n of them remain. + --bp-space [bps] : Remove variants so that each pair is no closer than the + given bp distance. (Equivalent to VCFtools --thin.) + --thin-indiv [p] : Randomly remove samples, retaining with prob. p. + --thin-indiv-count [n] : Randomly remove samples until n of them remain. + --filter [f] [val(s)...] : Exclude all samples without a 3rd column entry in + the given file matching one of the given + space-separated value(s). + --mfilter [n] : Match against (n+2)th column instead. + --geno {val} : Exclude variants with missing call frequencies greater + than a threshold (default 0.1). (Note that the default + threshold is only applied if --geno is invoked without a + parameter; when --geno is not invoked, no per-variant + missing call frequency ceiling is enforced at all. Other + inclusion/exclusion default thresholds work the same way.) + --mind {val} : Exclude samples with missing call frequencies greater than + a threshold (default 0.1). + --oblig-missing [f1] [f2] : Specify blocks of missing genotype calls for + --geno/--mind to ignore. The first file should + have variant IDs in the first column and block + IDs in the second, while the second file should + have FIDs in the first column, IIDs in the + second, and block IDs in the third. + --prune : Remove samples with missing phenotypes. + --maf {freq} : Exclude variants with minor allele frequency lower than + a threshold (default 0.01). + --max-maf [freq] : Exclude variants with MAF greater than the threshold. + --mac [ct] : Exclude variants with minor allele count lower than the + (alias: --min-ac) given threshold. + --max-mac [ct] : Exclude variants with minor allele count greater than + (alias: --max-ac) the given threshold. + --maf-succ : Rule of succession MAF estimation (used in EIGENSOFT). + Given j observations of one allele and k >= j observations + of the other, infer a MAF of (j+1) / (j+k+2), rather than + the default j / (j+k). + --read-freq [fn] : Estimate MAFs and heterozygote frequencies from the given + --freq{x} report, instead of the input fileset. + --hwe [p] : Exclude variants with Hardy-Weinberg + equilibrium exact test p-values below a + threshold. + --me [t] [v] : Filter out trios and variants with Mendel error + rates exceeding the given thresholds. + --me-exclude-one {ratio} : Make --me exclude only one sample per trio. + --qual-scores [f] {qcol} {IDcol} {skip} : Filter out variants with + out-of-range quality scores. + Default range is now [0, \infty ). + --qual-threshold [min qual score] : Set --qual-scores range floor. + --qual-max-threshold [max qual score] : Set --qual-scores range ceiling. + --allow-no-sex : Do not treat ambiguous-sex samples as having missing + phenotypes in analysis commands. (Automatic /w --no-sex.) + --must-have-sex : Force ambiguous-sex phenotypes to missing on + --make-bed/--make-just-fam/--recode/--write-covar. + --filter-cases : Include only cases in the current analysis. + --filter-controls : Include only controls. + --filter-males : Include only males. + --filter-females : Include only females. + --filter-founders : Include only founders. + --filter-nonfounders : Include only nonfounders. + --nonfounders : Include nonfounders in allele freq/HWE calculations. + --make-founders : Clear parental IDs for those + with 1+ missing parent(s). + --recode-allele [fn] : With --recode A/A-transpose/AD, count alleles named in + the file (otherwise A1 alleles are always counted). + --output-chr [MT code] : Set chromosome coding scheme in output files by + providing the desired human mitochondrial code. + (Options are '26', 'M', 'MT', '0M', 'chr26', 'chrM', + and 'chrMT'.) + --output-missing-genotype [ch] : Set the code used to represent missing + genotypes in output files (normally the + --missing-genotype value). + --output-missing-phenotype [s] : Set the string used to represent missing + phenotypes in output files (normally the + --missing-phenotype value). + --zero-cluster [f] : In combination with --within/--family, set blocks of + genotype calls to missing. The input file should have + variant IDs in the first column and cluster IDs in the + second. This must now be used with --make-bed and no + other output commands. + --set-hh-missing : Cause --make-bed and --recode to set heterozygous + haploid genotypes to missing. + --set-mixed-mt-missing : Cause --make-bed and --recode to set mixed MT + genotypes to missing. + --split-x [bp1] [bp2] : Changes chromosome code of all X chromosome + --split-x [build] variants with bp position <= bp1 or >= bp2 + to XY. The following build codes are + supported as shorthand: + * 'b36'/'hg18' = NCBI 36, 2709521/154584237 + * 'b37'/'hg19' = GRCh37, 2699520/154931044 + * 'b38'/'hg38' = GRCh38, 2781479/155701383 + By default, PLINK errors out when no + variants would be affected by --split-x; + the 'no-fail' modifier (useful in scripts) + overrides this. + --merge-x : Merge XY chromosome back with X. + --set-me-missing : Cause --make-bed to set Mendel errors to missing. + --fill-missing-a2 : Cause --make-bed to replace all missing calls with + homozygous A2 calls. + --set-missing-var-ids [t] : Given a template string with a '@' where the + chromosome code should go and '#' where the bp + coordinate belongs, --set-missing-var-ids + assigns chromosome-and-bp-based IDs to unnamed + variants. + You may also use '$1' and '$2' to refer to + allele names in the template string, and in + fact this becomes essential when multiple + variants share the same coordinate. + --new-id-max-allele-len [n] : Specify maximum number of leading characters + from allele names to include in new variant IDs + (default 23). + --missing-var-code [string] : Change unnamed variant code (default '.'). + --update-chr [f] {chrcol} {IDcol} {skip} : Update variant chromosome codes. + --update-cm [f] {cmcol} {IDcol} {skip} : Update centimorgan positions. + --update-map [f] {bpcol} {IDcol} {skip} : Update variant bp positions. + --update-name [f] {newcol} {oldcol} {skip} : Update variant IDs. + --update-alleles [fname] : Update variant allele codes. + --allele1234 : Interpret/recode A/C/G/T alleles as 1/2/3/4. + With 'multichar', converts all A/C/G/Ts in allele + names to 1/2/3/4s. + --alleleACGT : Reverse of --allele1234. + --update-ids [f] : Update sample IDs. + --update-parents [f] : Update parental IDs. + --update-sex [f] {n} : Update sexes. Sex (1 or M = male, 2 or F = female, 0 + = missing) is loaded from column n+2 (default n is 1). + --flip [filename] : Flip alleles (A<->T, C<->G) for SNP IDs in the file. + --flip-subset [fn] : Only apply --flip to samples in --flip-subset file. + --flip-scan-window [ct+1] : Set --flip-scan max variant ct dist. (def. 10). + --flip-scan-window-kb [x] : Set --flip-scan max kb distance (default 1000). + --flip-scan-threshold [x] : Set --flip-scan min correlation (default 0.5). + --keep-allele-order : Keep the allele order defined in the .bim file, + --real-ref-alleles instead of forcing A2 to be the major allele. + --real-ref-alleles also removes 'PR' from the INFO + values emitted by --recode vcf{-fid/-iid}. + --a1-allele [f] {a1col} {IDcol} {skip} : Force alleles in the file to A1. + --a2-allele [filename] {a2col} {IDcol} {skip} : + Force alleles in the file to A2. ("--a2-allele [VCF filename] 4 3 '#'", + which scrapes reference allele assignments from a VCF file, is especially + useful.) + --indiv-sort [m] {f} : Specify FID/IID sort order. The following four modes + are supported: + * 'none'/'0' keeps samples in the order they were + loaded. Default for non-merge operations. + * 'natural'/'n' invokes 'natural sort', e.g. + 'id2' < 'ID3' < 'id10'. Default when merging. + * 'ascii'/'a' sorts in ASCII order, e.g. + 'ID3' < 'id10' < 'id2'. + * 'file'/'f' uses the order in the given file (named + in the second parameter). + For now, only --merge/--bmerge/--merge-list and + --make-bed/--make-just-fam respect this flag. + --with-phenotype : Include more sample info + in new .cov file. + --dummy-coding {N} : Split categorical variables (n categories, + 2 < n <= N, default N is 49) into n-1 binary + dummy variables when writing covariate file. + --merge-mode [n] : Adjust --{b}merge/--merge-list behavior based on a + numeric code. + 1 (default) = ignore missing calls, otherwise difference + -> missing + 2 = only overwrite originally missing calls + 3 = only overwrite when nonmissing in new file + 4/5 = never overwrite and always overwrite, respectively + 6 = report all mismatching calls without merging + 7 = report mismatching nonmissing calls without merging + --merge-equal-pos : With --merge/--bmerge/--merge-list, merge variants with + different names but identical positions. (Exception: + same-position chromosome code 0 variants aren't merged.) + --mendel-duos : Make Mendel error checks consider samples with only one + parent in the dataset. + --mendel-multigen : Make Mendel error checks consider (great-)grandparental + genotypes when parental genotype data is missing. + --ld-window [ct+1] : Set --r/--r2 max variant ct pairwise distance (usu. 10). + --ld-window-kb [x] : Set --r/--r2 max kb pairwise distance (usually 1000). + --ld-window-cm [x] : Set --r/--r2 max centimorgan pairwise distance. + --ld-window-r2 [x] : Set threshold for --r2 report inclusion (usually 0.2). + --ld-snp [var ID] : Set first variant in all --r/--r2 pairs. + --ld-snps [vID...] : Restrict first --r/--r2 variant to the given ranges. + --ld-snp-list [f] : Restrict first --r/--r2 var. to those named in the file. + --list-all : Generate the 'all' mode report when using --show-tags in + file mode. + --tag-kb [kbs] : Set --show-tags max tag kb distance (default 250). + --tag-r2 [val] : Set --show-tags min tag r-squared (default 0.8) + --tag-mode2 : Use two-column --show-tags (file mode) I/O format. + --ld-xchr [code] : Set Xchr model for --indep{-pairwise}, --r/--r2, + --flip-scan, and --show-tags. + 1 (default) = males coded 0/1, females 0/1/2 (A1 dosage) + 2 = males coded 0/2 + 3 = males coded 0/2, but females given double weighting + --blocks-max-kb [kbs] : Set --blocks maximum haploblock span (def. 200). + --blocks-min-maf [cutoff] : Adjust --blocks MAF minimum (default 0.05). + --blocks-strong-lowci [x] : Set --blocks 'strong LD' CI thresholds (defaults + --blocks-strong-highci [x] 0.70 and 0.98). + --blocks-recomb-highci [x] : Set 'recombination' CI threshold (default 0.90). + --blocks-inform-frac [x] : Force haploblock [strong LD pairs]:[total + informative pairs] ratios to be larger than this + value (default 0.95). + --distance-wts exp=[x] : When computing genomic distances, assign each + variant a weight of (2q(1-q))^{-x}, where q + is the loaded or inferred MAF. + --read-dists [dist file] {id file} : Load a triangular binary distance matrix + instead of recalculating from scratch. + --ppc-gap [val] : Minimum number of base pairs, in thousands, between + informative pairs of markers used in --genome PPC test. + 500 if unspecified. + --min [cutoff] : Specify minimum PI_HAT for inclusion in --genome report. + --max [cutoff] : Specify maximum PI_HAT for inclusion in --genome report. + --homozyg-match [] : Set minimum concordance across jointly homozygous + variants for a pairwise allelic match to be declared. + --pool-size [ct] : Set minimum size of pools in '--homozyg group' report. + --read-genome [fn] : Load --genome report for --cluster/--neighbour, instead + of recalculating IBS and PPC test p-values from scratch. + --ppc [p-val] : Specify minimum PPC test p-value within a cluster. + --mc [max size] : Specify maximum cluster size. + --mcc [c1] [c2] : Specify maximum case and control counts per cluster. + --K [min count] : Specify minimum cluster count. + --ibm [val] : Specify minimum identity-by-missingness. + --match [f] {mv} : Use covariate values to restrict clustering. Without + --match-type, two samples can only be in the same cluster + if all covariates match. The optional second parameter + specifies a covariate value to treat as missing. + --match-type [f] : Refine interpretation of --match file. The --match-type + file is expected to be a single line with as many entries + as the --match file has covariates; '0' entries specify + 'negative matches' (i.e. samples with equal covariate + values cannot be in the same cluster), '1' entries specify + 'positive matches' (default), and '-1' causes the + corresponding covariate to be ignored. + --qmatch [f] {m} : Force all members of a cluster to have similar + --qt [fname] quantitative covariate values. The --qmatch file contains + the covariate values, while the --qt file is a list of + nonnegative tolerances (and '-1's marking covariates to + skip). + --pca-cluster-names [...] : These can be used individually or in combination + --pca-clusters [fname] to define a list of clusters to use in the basic + --pca computation. (--pca-cluster-names expects + a space-delimited sequence of cluster names, + while --pca-clusters expects a file with one + cluster name per line.) All samples outside + those clusters will then be projected on to the + calculated PCs. + --mds-plot [dims] : + Multidimensional scaling analysis. Requires --cluster. + --cell [thresh] : Skip some --model tests when a contingency table entry is + smaller than the given threshold. + --condition [var ID] : Add one variant as a --linear + or --logistic covariate. + --condition-list [f] : Add variants named in the file + as --linear/--logistic covs. + --parameters [...] : Include only the given covariates/interactions in the + --linear/--logistic models, identified by a list of + 1-based indices and/or ranges of them. + --tests {...} : Perform a (joint) test on the specified term(s) in the + --linear/--logistic model, identified by 1-based + indices and/or ranges of them. If permutation was + requested, it is based on this test. + * Note that, when --parameters is also present, the + indices refer to the terms remaining AFTER pruning by + --parameters. + * You can use '--tests all' to include all terms. + --vif [max VIF] : Set VIF threshold for --linear multicollinearity check + (default 50). + --xchr-model [code] : Set the X chromosome --linear/--logistic model. + 0 = skip sex and haploid chromosomes + 1 (default) = add sex as a covariate on X chromosome + 2 = code male genotypes 0/2 instead of 0/1 + 3 = test for interaction between genotype and sex + --lasso-select-covars {cov(s)...} : Subject some or all covariates to LASSO + model selection. + --adjust : Report some multiple-testing corrections. + --lambda [val] : Set genomic control lambda for --adjust. + --ci [size] : Report confidence intervals for odds ratios. + --pfilter [val] : Filter out association test results with higher p-values. + --aperm [min perms - 1] {max perms} {alpha} {beta} {init interval} {slope} : + Set up to six parameters controlling adaptive permutation tests. + * The first two control the minimum and maximum number of permutations that + may be run for each variant; default values are 5 and 1000000. + * The next two control the early termination condition. A + 100% * (1 - beta/2T) confidence interval is calculated for each empirical + p-value, where T is the total number of variants; whenever this + confidence interval doesn't contain alpha, the variant is exempted from + further permutation testing. Default values are 0 and 1e-4. + * The last two control when the early termination condition is checked. If + a check occurs at permutation #p, the next check occurs after + [slope]p + [init interval] more permutations (rounded down). Default + initial interval is 1, and default slope is 0.001. + --mperm-save : Save best max(T) permutation test statistics. + --mperm-save-all : Save all max(T) permutation test statistics. + --set-p [p-val] : Adjust set test significant variant p-value ceiling + (default 0.05). + --set-r2 {v} : Adjust set test significant variant pairwise r^2 + ceiling (default 0.5). 'write' causes violating + pairs to be dumped to {output prefix}.ldset. + --set-max [ct] : Adjust set test maximum # of significant variants + considered per set (default 5). + --set-test-lambda [v] : Specify genomic control correction for set test. + --border [kbs] : Extend --annotate range intervals by given # kbs. + --annotate-snp-field [nm] : Set --annotate variant ID field name. + --clump-p1 [pval] : Set --clump index var. p-value ceiling (default 1e-4). + --clump-p2 [pval] : Set --clump secondary p-value threshold (default 0.01). + --clump-r2 [r^2] : Set --clump r^2 threshold (default 0.5). + --clump-kb [kbs] : Set --clump kb radius (default 250). + --clump-snp-field [n...] : Set --clump variant ID field name (default + 'SNP'). With multiple field names, earlier names + take precedence over later ones. + --clump-field [name...] : Set --clump p-value field name (default 'P'). + --clump-allow-overlap : Let --clump non-index vars. join multiple clumps. + --clump-verbose : Request extended --clump report. + --clump-annotate [hdr...] : Include named extra fields in --clump-verbose and + --clump-best reports. (Field names can be + separated with spaces or commas.) + --clump-range [filename] : Report overlaps between clumps and regions. + --clump-range-border [kb] : Stretch regions in --clump-range file. + --clump-index-first : Extract --clump index vars. from only first file. + --clump-replicate : Exclude clumps which contain secondary results + from only one file. + --clump-best : Report best proxy for each --clump index var. + --meta-analysis-snp-field [n...] : Set --meta-analysis variant ID, A1/A2 + --meta-analysis-a1-field [n...] allele, p-value, and/or effective sample + --meta-analysis-a2-field [n...] size field names. Defauls are 'SNP', + --meta-analysis-p-field [n...] 'A1', 'A2', 'P', and 'NMISS', + --meta-analysis-ess-field [n...] respectively. When multiple parameters + are given to these flags, earlier names + take precedence over later ones. + Note that, if the numbers of cases and + controls are unequal, effective sample + size should be + 4 / (1/[# cases] + 1/[# controls]). + --meta-analysis-report-dups : When a variant appears multiple times in + in the same file, report that. + --gene-list-border [kbs] : Extend --gene-report regions by given # of kbs. + --gene-subset [filename] : Specify gene name subset for --gene-report. + --gene-report-snp-field [] : Set --gene-report variant ID field name (default + 'SNP'). Only relevant with --extract. + --gap [kbs] : Set '--fast-epistasis case-only' min. gap (default 1000). + --epi1 [p-value] : Set --{fast-}epistasis reporting threshold (default + 5e-6 for 'boost', 1e-4 otherwise). + --epi2 [p-value] : Set threshold for contributing to SIG_E count (def. 0.01). + --je-cellmin [n] : Set required number of observations per 3x3x2 contingency + table cell for joint-effects test (default 5). + --q-score-range [range file] [data file] {i} {j}
: + Apply --score to subset(s) of variants in the primary score list based + on e.g. p-value ranges. + * The first file should have range labels in the first column, p-value + lower bounds in the second column, and upper bounds in the third column. + Lines with too few entries, or nonnumeric values in the second or third + column, are ignored. + * The second file should contain a variant ID and a p-value on each + nonempty line (except possibly the first). Variant IDs are read from + column #i and p-values are read from column #j, where i defaults to 1 and + j defaults to i+1. The 'header' modifier causes the first nonempty line + of this file to be skipped. + --parallel [k] [n] : Divide the output matrix into n pieces, and only compute + the kth piece. The primary output file will have the + piece number included in its name, e.g. plink.rel.13 or + plink.rel.13.gz if k is 13. Concatenating these files + in order will yield the full matrix of interest. (Yes, + this can be done before unzipping.) + N.B. This generally cannot be used to directly write a + symmetric square matrix. Choose square0 or triangle + shape instead, and postprocess as necessary. + --memory [val] : Set size, in MB, of initial workspace malloc attempt. + (Practically mandatory when using GNU parallel.) + --threads [val] : Set maximum number of concurrent threads. + This has one known limitation: some BLAS/LAPACK linear + algebra operations are multithreaded in a way that PLINK + cannot control. If this is problematic, you should + recompile against single-threaded BLAS/LAPACK. + --d [char] : Change variant/covariate range delimiter (normally '-'). + --seed [val...] : Set random number seed(s). Each value must be an + integer between 0 and 4294967295 inclusive. + --perm-batch-size [val] : Set number of permutations per batch for some + permutation tests. + --output-min-p [p] : Specify minimum p-value to write to reports. + --debug : Use slower, more crash-resistant logging method. + +Primary methods paper: +Chang CC, Chow CC, Tellier LCAM, Vattikuti S, Purcell SM, Lee JJ (2015) +Second-generation PLINK: rising to the challenge of larger and richer datasets. +GigaScience, 4. + +For further documentation and support, consult the main webpage +(https://www.cog-genomics.org/plink2 ) and/or the mailing list +(https://groups.google.com/d/forum/plink2-users ). + + ]]> + + 10.1186/s13742-015-0047-8 + @ARTICLE{Blankenberg19-anvio, + author = {Daniel Blankenberg, et al}, + title = {In preparation..}, + } + +