xaxt = "n", yaxt = "n", bty = "n") draw.circle(0,0,1,col = wes_palette("Zissou")[2]) arrows(0,0,.95,0,col = wes_palette("Zissou")[3], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Zissou")[4], lwd = 3) wes_palettes draw.circle(0,0,1,col = wes_palette("Royal1")[2]) draw.circle(0,0,1,col = wes_palette("Royal1")[1]) draw.circle(0,0,1,col = wes_palette("Royal1")[3]) draw.circle(0,0,1,col = wes_palette("Royal1")[4]) draw.circle(0,0,1,col = wes_palette("Royal1")[5]) draw.circle(0,0,1,col = wes_palette("Royal1")[6]) draw.circle(0,0,1,col = wes_palette("Royal2")[1]) arrows(0,0,.95,0,col = wes_palette("Royal2")[3], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Roayl2")[5], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Roayl2")[4], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Roayl2")[2], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Royal2")[2], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Royal2")[4], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Royal2")[5], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Royal2")[6], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Royal2")[2], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Royal1")[2], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Royal1")[1], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Royal1")[3], lwd = 3) draw.circle(0,0,1,col = wes_palette("Royal2")[1], border = NULL) arrows(0,0,.95,0,col = wes_palette("Royal2")[3], lwd = 3) ?draw.circle draw.circle(0,0,1,col = wes_palette("Royal2")[1], border = wes_palette("Royal2")[1]) arrows(0,0,.95,0,col = wes_palette("Royal2")[3], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Royal1")[3], lwd = 3) draw.circle(0,0,1,col = wes_palette("Royal2")[1]) arrows(0,0,.95,0,col = wes_palette("Royal2")[3], lwd = 3) arrows(0,0,0,1.05,col = wes_palette("Royal1")[3], lwd = 3) X <- matrix(c(1,.5,0,1), byrow = T, nrow = 2) X X <- matrix(c(1,.5,0,1), byrow = T, nrow = 2) X ?draw.ellipse draw.ellipse(X) draw.ellipse(X[,1], X[,2]) draw.ellipse(x=0,y=0, a = X[,1], b = X[,2]) plot(0,0,col=NA, xlab = "", ylab = "", xlim = c(-1.5,1.5), ylim = c(-1.5,1.5), xaxt = "n", yaxt = "n", bty = "n") draw.ellipse(x=0,y=0, a = X[,1], b = X[,2]) t(X) %*% X draw.ellipse(x=1,y=1.25, a = sqrt(.5), b = sqrt(.5)) plot(c(0,10), c(0,10), type="n", main="test draw.ellipse") draw.ellipse(c(3,7), c(8,8), c(0.5,1), c(1,0.5), col=c(2,4), angle=c(45,0), segment=rbind(c(0,45),c(45,360))) plot(c(0,10), c(0,10), type="n", main="test draw.ellipse") draw.ellipse(c(3,7), c(8,8), c(0.5,1), c(1,0.5), col=c(2,4), angle=c(45,0), segment=rbind(c(0,45),c(45,360))) ?svd hilbert <- function(n) { i <- 1:n; 1 / outer(i - 1, i, "+") } X <- hilbert(9)[, 1:6] X image(X) (s <- svd(X)) D <- diag(s$d) s$u %*% D %*% t(s$v) # X = U D V' t(s$u) %*% X %*% s$v # D = U' X V dim(X) str(s) dim(X) (s <- svd(X, nu=3, nv = 3)) D <- diag(s$d) s$u %*% D %*% t(s$v) # X = U D V' str(s) u %*% t(v) s$u %*% t(s$v) hilbert <- function(n) { i <- 1:n; 1 / outer(i - 1, i, "+") } hilbert(2) outer(0:1,1:2,"+") 1/outer(0:1,1:2,"+") library(tcR) .05*2/.005 .05*.005 .05*.005/2 .5*2/.005 .5*.005/2 .55*.005/2 .5*.004 .5*.004 .5*.005 .55*.005 1.05*.005 .00275-.0025 install.packages("ineq") 549.17/5 549.17/5*4 549.17-109.834 (549.17-109.834)/4 440.26/2 164.68+263.44 (164.68+263.44)/2 109.84+200+214.06 109.84+200+214.06-70 source("https://bioconductor.org/biocLite.R") biocLite("powerTCR") setwd("~/Box Sync/School/research - Qunhua/Project_1/tcr_package/powerTCR") ##---------------------------------------------------------------------------------- ## Density, distribution, and quantile functions, random number generation ## for discrete truncated gamma and discrete gpd ##---------------------------------------------------------------------------------- ddiscgamma <- function(x, shape, rate, thresh, phiu, shift = 0, log = FALSE){ if(any(x != floor(x))){ stop("x must be an integer") } out <- rep(0, length(x)) up <- pgamma(x+1-shift, shape=shape, rate=rate) down <- pgamma(x-shift, shape=shape, rate=rate) if(!log){ b <- pgamma(thresh-shift, shape=shape, rate=rate) out[x < thresh] <- ((1-phiu)*(up-down)/b)[x < thresh] } else{ b <- pgamma(thresh-shift, shape=shape, rate=rate, log = TRUE) out[x < thresh] <- (log(1-phiu)+log(up-down) - b)[x < thresh] } out } pdiscgamma <- function(q, shape, rate, thresh, phiu, shift = 0){ probs <- ddiscgamma(0:q, shape, rate, thresh, phiu, shift) sum(probs) } qdiscgamma <- function(p, shape, rate, thresh, phiu, shift = 0){ qtrunc(p/(1-phiu), spec = "gamma", a=0, b=thresh-shift, shape=shape,rate=rate) %>% floor + shift } rdiscgamma <- function(n, shape, rate, thresh, shift = 0){ rtrunc(n, spec = "gamma", a=0, b=thresh-shift, shape=shape, rate=rate) %>% floor + shift } ddiscgpd <- function(x, thresh, sigma, xi, phiu, log = FALSE){ up <- evmix::pgpd(x+1, u=thresh, sigmau=sigma, xi=xi) down <- evmix::pgpd(x, u=thresh, sigmau=sigma, xi=xi) if(!log){ phiu*(up-down) } else{ log(phiu) + log(up-down) } } pdiscgpd <- function(q, thresh, sigma, xi, phiu){ probs <- ddiscgpd(thresh:q, thresh, sigma, xi, phiu) sum(probs) } qdiscgpd <- function(p, thresh, sigma, xi, phiu){ evmix::qgpd(p/phiu, u=thresh, sigmau = sigma, xi=xi) %>% floor } rdiscgpd <- function(n, thresh, sigma, xi){ evmix::rgpd(n, u=thresh, sigmau=sigma, xi=xi) %>% floor } ##---------------------------------------------------------------------------------- ## negative log likelihood and parameter estimation functions ## for discrete truncated gamma and discrete gpd ##---------------------------------------------------------------------------------- discgammanll <- function(param, dat, thresh, phiu, shift=0){ shape <- exp(param[1]) rate <- exp(param[2]) if(any(dat > thresh-1)){ warning("data must be less than the threshold") } ll <- log(ddiscgamma(dat, shape, rate, thresh, phiu, shift)) sum(-ll) } discgpdnll <- function(param, dat, thresh, phiu){ sigma <- exp(param[1]) xi <- param[2] ll <- log(ddiscgpd(dat, thresh, sigma, xi, phiu)) sum(-ll) } fdiscgamma <- function(param, dat, thresh, phiu, shift = 0, method, ...){ opt <- optim(log(param), discgammanll, dat=dat, thresh=thresh, phiu=phiu, shift=shift, method=method, hessian = TRUE, ...) opt } fdiscgpd <- function(param, dat, thresh, phiu, method, ...){ opt <- optim(c(log(param[1]),param[2]), discgpdnll, dat=dat, thresh=thresh, phiu=phiu, method=method, hessian = TRUE, ...) opt } ##----------------------------------------------------------------------------------------------------- ## Fitting function, random number generation, quantile function, density, and distribution functions ##----------------------------------------------------------------------------------------------------- fdiscgammagpd <- function(x, useq, shift = NULL, pvector=NULL, std.err = TRUE, method = "Nelder-Mead", ...){ if(!is(x, "numeric")){ stop("x must be numeric.") } if(!is.null(shift)){ if(!is(shift, "numeric")){ stop("shift must be numeric.") } if(shift != round(shift)){ stop("shift must be an integer.") } } if(!is(useq, "numeric")){ stop("useq must be numeric.") } if(any(x != round(x))){ stop("all elements in x must be integers.") } if(any(useq != round(useq))){ stop("all elements in useq must be integers.") } if(!is.null(pvector) & !(length(pvector) == 5)){ stop("pvector must contain 5 elements.") } if(!(is.logical(std.err))){ stop("std.err must be TRUE or FALSE.") } if(!(method %in% c("Nelder-Mead","BFGS", "CG", "L-BFGS-B", "SANN", "Brent"))){ stop("invalid method supplied.") } if(is.null(shift)){ shift <- min(x) } if (is.null(pvector)) { pvector <- rep(NA,5) s <- log(mean(x+0.5))-mean(log(x+0.5)) k <- (3-s + sqrt((s-3)^2 + 24*s))/12/s pvector[1] <- k pvector[2] <- k/mean(x) pvector[3] <- as.vector(quantile(x, 0.9)) xu <- x[x>=pvector[3]] initfgpd <- evmix::fgpd(xu, min(xu)-10^(-5)) pvector[4] <- initfgpd$mle[1] pvector[5] <- initfgpd$mle[2] } bulk <- lapply(1:length(useq), function(idx,x,useq) x < useq[idx], x=x, useq=useq) tail <- lapply(1:length(useq), function(idx,x,useq) x >= useq[idx], x=x, useq=useq) phiu <- lapply(1:length(useq), function(idx,tail) mean(tail[[idx]]), tail=tail) gammfit <- list() gpdfit <- list() nllhu <- rep(NA, length(useq)) for(i in 1:length(useq)){ gammfit[[i]] <- tryCatch(expr = fdiscgamma(pvector[1:2],x[bulk[[i]]], useq[i], phiu[[i]], shift, method=method), error = function(err) message("gamma part could not be fit at a specified threshold.") NA) gpdfit[[i]] <- tryCatch(expr = fdiscgpd(pvector[4:5], x[tail[[i]]], useq[i], phiu[[i]], method=method), error = function(err) { pvec3 <- as.vector(quantile(x,1-phiu[[i]])) xu <- x[x>=pvec3] initfgpd.adj <- evmix::fgpd(x, min(xu)-10^(-5)) pvec4 <- initfgpd.adj$mle[1] pvec5 <- initfgpd.adj$mle[2] tryCatch(expr = fdiscgpd(c(pvec4,pvec5), x[tail[[i]]], useq[i], phiu[[i]], method=method), error = function(err2) message("GPD part could not be fit at a specified threshold.") NA) }) nllhu[i] <- tryCatch(expr = gammfit[[i]]$value + gpdfit[[i]]$value, error = function(err) NA) } bestfit <- which.min(nllhu) fit.out <- list(gammfit[[bestfit]], gpdfit[[bestfit]]) names(fit.out) <- c("bulk","tail") mle <- c(mean(x >= useq[bestfit]), exp(fit.out$bulk$par), useq[bestfit], exp(fit.out$tail$par[1]), fit.out$tail$par[2]) names(mle) <- c("phi","shape","rate","thresh","sigma","xi") if(std.err){ H <- fit.out$bulk$hessian %>% rbind(matrix(rep(0,4),nrow = 2)) %>% cbind(rbind(matrix(rep(0,4),nrow = 2),fit.out$tail$hessian)) fisherInf <- tryCatch(expr = solve(H), error = function(err) NA) out <- list(x = as.vector(x), init = as.vector(pvector), useq = useq, nllhuseq = nllhu, optim = fit.out, nllh = nllhu[bestfit], mle=mle, fisherInformation = fisherInf) } else{ out <- list(x = as.vector(x), init = as.vector(pvector), useq = useq, nllhuseq = nllhu, optim = fit.out, nllh = nllhu[bestfit], mle=mle) } out } # Need this for rgammagpd qdiscgammagpd <- function(p, shape, rate, u, sigma, xi, phiu=NULL, shift = 0){ if(!is(p, "numeric")){ stop("p must be numeric.") } if(!is(shift, "numeric")){ stop("shift must be numeric.") } if(shift != round(shift)){ stop("shift must be an integer.") } if(any(c(shape, rate, sigma) <= 0)){ stop("shape, rate, and sigma must all be positive.") } if(!is.null(phiu)){ if(phiu < 0 | phiu > 1){ stop("phiu must be in [0,1].") } } if(p < 0 | p > 1){ stop("p must be in [0,1].") } if(is.null(phiu)){ phiu <- 1-pdiscgamma(u-1, shape=shape, rate=rate, thresh=Inf, phiu = 0, shift=shift) } phib <- (1-phiu) nb <- sum(p < phib) nu <- sum(p >= phib) q <- p if(nb > 0){ q[p < phib] <- qdiscgamma(p[which(p < phib)], shape=shape, rate=rate, thresh=u, phiu=phiu, shift=shift) } if(nu > 0){ q[p >= phib] <- qdiscgpd((p[p >= phib]-phib), thresh=u, sigma=sigma, xi=xi, phiu=phiu) } q } # Generate data from the model!! rdiscgammagpd <- function(n, shape, rate, u, sigma, xi, phiu=NULL, shift = 0){ if(!is(n, "numeric")){ stop("n must be numeric.") } if(!is(shift, "numeric")){ stop("shift must be numeric.") } if(any(n != floor(n))){ stop("n must be an integer") } if(shift != round(shift)){ stop("shift must be an integer.") } if(any(c(shape, rate, sigma) <= 0)){ stop("shape, rate, and sigma must all be positive.") } if(!is.null(phiu)){ if(phiu < 0 | phiu > 1){ stop("phiu must be in [0,1].") } } if(is.null(phiu)){ phiu <- 1-pdiscgamma(u-1, shape=shape, rate=rate, thresh=Inf, phiu = 0, shift=shift) } p <- runif(n) qdiscgammagpd(p, shape=shape, rate=rate, u=u, sigma=sigma, xi=xi, phiu=phiu, shift=shift) } ddiscgammagpd <- function(x, shape, rate, u, sigma, xi, phiu=NULL, shift = 0, log = FALSE){ if(!is(x, "numeric")){ stop("x must be numeric.") } if(!is(shift, "numeric")){ stop("shift must be numeric.") } if(any(x != floor(x))){ stop("x must be an integer") } if(shift != round(shift)){ stop("shift must be an integer.") } if(any(c(shape, rate, sigma) <= 0)){ stop("shape, rate, and sigma must all be positive.") } if(!is.null(phiu)){ if(phiu < 0 | phiu > 1){ stop("phiu must be in [0,1].") } } if(is.null(phiu)){ phiu <- 1-pdiscgamma(u-1, shape=shape, rate=rate, thresh=Inf, phiu = 0, shift=shift) } out <- rep(NA, length(x)) if(sum(x>=u) != 0){ out[x>=u] <- ddiscgpd(x[x>=u], u, sigma, xi, phiu, log=log) } if(sum(x<u) != 0){ out[x<u] <- ddiscgamma(x[x<u], shape, rate, u, phiu, shift, log=log) } out } pdiscgammagpd <- function(q, shape, rate, u, sigma, xi, phiu=NULL, shift=0){ if(!is(x, "numeric")){ stop("x must be numeric.") } if(!is(shift, "numeric")){ stop("shift must be numeric.") } if(any(x != floor(x))){ stop("x must be an integer") } if(shift != round(shift)){ stop("shift must be an integer.") } if(any(c(shape, rate, sigma) <= 0)){ stop("shape, rate, and sigma must all be positive.") } if(!is.null(phiu)){ if(phiu < 0 | phiu > 1){ stop("phiu must be in [0,1].") } } probs <- sapply(q, function(q) ddiscgammagpd(0:q, shape, rate, u, sigma, xi, phiu, shift) %>% sum) probs } ##-------------------------------------------------------------------- ## Borrow functionality from the tcR package to help folks read files ##-------------------------------------------------------------------- parseFile <- function(file, format = c('mitcr', 'mitcrbc', 'migec', 'vdjtools', 'immunoseq', 'mixcr', 'imseq'), inframe = TRUE){ if(!(is.character(file))){ stop("file must be a character string.") } if(!(format %in% c('mitcr', 'mitcrbc', 'migec', 'vdjtools', 'immunoseq', 'mixcr', 'imseq'))){ stop("invalid format specified.") } if(!(is.logical(inframe))){ stop("inframe must be TRUE or FALSE.") } data <- tcR::parse.file(.filename = file, .format = format) if(inframe){ data <- get.inframes(data) } counts <- data$Read.count counts } parseFolder <- function(folder, format = c('mitcr', 'mitcrbc', 'migec', 'vdjtools', 'immunoseq', 'mixcr', 'imseq'), inframe = TRUE){ if(!(is.character(folder))){ stop("folder must be a character string.") } if(!(format %in% c('mitcr', 'mitcrbc', 'migec', 'vdjtools', 'immunoseq', 'mixcr', 'imseq'))){ stop("invalid format specified.") } if(!(is.logical(inframe))){ stop("inframe must be TRUE or FALSE.") } dataList <- parse.folder(.folderpath = folder, .format = format) if(inframe){ dataList <- map(dataList, get.inframes) } counts <- map(dataList, "Read.count") counts } # This function does model fitting and threshold selection as described in Desponds et al (2016) # Details on page 5 of supplement (right column) # x is a vector of clone sizes fdesponds <- function(x){ if(!is(x, "numeric")){ stop("x must be numeric.") } if(any(x != round(x))){ stop("all elements in x must be integers.") } x <- sort(x) Cmins <- unique(x) alpha <- rep(NA, length(Cmins)) KS <- rep(NA, length(Cmins)) n <- rep(NA, length(Cmins)) for(i in 1:length(Cmins)){ d <- x[x > Cmins[i]] n[i] <- length(d) alpha[i] <- n[i]*(sum(log(d/Cmins[i])))^(-1) + 1 empir.cdf <- rep(NA, length(d)) for(j in 1:length(d)){ empir.cdf[j] <- mean(d <= d[j]) } est.cdf <- VGAM::ppareto(d, scale=Cmins[i], shape = alpha[i]-1) KS[i] <- max(abs(est.cdf - empir.cdf)) } min.KS <- min(KS, na.rm = TRUE) Cmin <- Cmins[KS == min(KS, na.rm = TRUE)][1] powerlaw.exponent <- alpha[KS == min(KS, na.rm = TRUE)] powerlaw.exponent <- powerlaw.exponent[!is.na(powerlaw.exponent)] out <- c(min.KS, Cmin, powerlaw.exponent, powerlaw.exponent-1) names(out) <- c("min.KS", "Cmin", "powerlaw.exponent", "pareto.alpha") out } data("repertoires") repertoires data("repertoires") repertoires