xaxt = "n", yaxt = "n", bty = "n")
draw.circle(0,0,1,col = wes_palette("Zissou")[2])
arrows(0,0,.95,0,col = wes_palette("Zissou")[3], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Zissou")[4], lwd = 3)
wes_palettes
draw.circle(0,0,1,col = wes_palette("Royal1")[2])
draw.circle(0,0,1,col = wes_palette("Royal1")[1])
draw.circle(0,0,1,col = wes_palette("Royal1")[3])
draw.circle(0,0,1,col = wes_palette("Royal1")[4])
draw.circle(0,0,1,col = wes_palette("Royal1")[5])
draw.circle(0,0,1,col = wes_palette("Royal1")[6])
draw.circle(0,0,1,col = wes_palette("Royal2")[1])
arrows(0,0,.95,0,col = wes_palette("Royal2")[3], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Roayl2")[5], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Roayl2")[4], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Roayl2")[2], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Royal2")[2], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Royal2")[4], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Royal2")[5], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Royal2")[6], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Royal2")[2], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Royal1")[2], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Royal1")[1], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Royal1")[3], lwd = 3)
draw.circle(0,0,1,col = wes_palette("Royal2")[1], border = NULL)
arrows(0,0,.95,0,col = wes_palette("Royal2")[3], lwd = 3)
?draw.circle
draw.circle(0,0,1,col = wes_palette("Royal2")[1], border = wes_palette("Royal2")[1])
arrows(0,0,.95,0,col = wes_palette("Royal2")[3], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Royal1")[3], lwd = 3)
draw.circle(0,0,1,col = wes_palette("Royal2")[1])
arrows(0,0,.95,0,col = wes_palette("Royal2")[3], lwd = 3)
arrows(0,0,0,1.05,col = wes_palette("Royal1")[3], lwd = 3)
X <- matrix(c(1,.5,0,1), byrow = T, nrow = 2)
X
X <- matrix(c(1,.5,0,1), byrow = T, nrow = 2)
X
?draw.ellipse
draw.ellipse(X)
draw.ellipse(X[,1], X[,2])
draw.ellipse(x=0,y=0, a = X[,1], b = X[,2])
plot(0,0,col=NA, xlab = "", ylab = "",
xlim = c(-1.5,1.5), ylim = c(-1.5,1.5),
xaxt = "n", yaxt = "n", bty = "n")
draw.ellipse(x=0,y=0, a = X[,1], b = X[,2])
t(X) %*% X
draw.ellipse(x=1,y=1.25, a = sqrt(.5), b = sqrt(.5))
plot(c(0,10), c(0,10), type="n", main="test draw.ellipse")
draw.ellipse(c(3,7), c(8,8), c(0.5,1), c(1,0.5), col=c(2,4),
angle=c(45,0), segment=rbind(c(0,45),c(45,360)))
plot(c(0,10), c(0,10), type="n", main="test draw.ellipse")
draw.ellipse(c(3,7), c(8,8), c(0.5,1), c(1,0.5), col=c(2,4),
angle=c(45,0), segment=rbind(c(0,45),c(45,360)))
?svd
hilbert <- function(n) { i <- 1:n; 1 / outer(i - 1, i, "+") }
X <- hilbert(9)[, 1:6]
X
image(X)
(s <- svd(X))
D <- diag(s$d)
s$u %*% D %*% t(s$v) #  X = U D V'
t(s$u) %*% X %*% s$v #  D = U' X V
dim(X)
str(s)
dim(X)
(s <- svd(X, nu=3, nv = 3))
D <- diag(s$d)
s$u %*% D %*% t(s$v) #  X = U D V'
str(s)
u %*% t(v)
s$u %*% t(s$v)
hilbert <- function(n) { i <- 1:n; 1 / outer(i - 1, i, "+") }
hilbert(2)
outer(0:1,1:2,"+")
1/outer(0:1,1:2,"+")
library(tcR)
.05*2/.005
.05*.005
.05*.005/2
.5*2/.005
.5*.005/2
.55*.005/2
.5*.004
.5*.004
.5*.005
.55*.005
1.05*.005
.00275-.0025
install.packages("ineq")
549.17/5
549.17/5*4
549.17-109.834
(549.17-109.834)/4
440.26/2
164.68+263.44
(164.68+263.44)/2
109.84+200+214.06
109.84+200+214.06-70
source("https://bioconductor.org/biocLite.R")
biocLite("powerTCR")
setwd("~/Box Sync/School/research - Qunhua/Project_1/tcr_package/powerTCR")
##----------------------------------------------------------------------------------
## Density, distribution, and quantile functions, random number generation
## for discrete truncated gamma and discrete gpd
##----------------------------------------------------------------------------------
ddiscgamma <- function(x, shape, rate, thresh, phiu, shift = 0, log = FALSE){
if(any(x != floor(x))){
stop("x must be an integer")
}
out <- rep(0, length(x))
up <- pgamma(x+1-shift, shape=shape, rate=rate)
down <- pgamma(x-shift, shape=shape, rate=rate)
if(!log){
b <- pgamma(thresh-shift, shape=shape, rate=rate)
out[x < thresh] <- ((1-phiu)*(up-down)/b)[x < thresh]
} else{
b <- pgamma(thresh-shift, shape=shape, rate=rate, log = TRUE)
out[x < thresh] <- (log(1-phiu)+log(up-down) - b)[x < thresh]
}
out
}
pdiscgamma <- function(q, shape, rate, thresh, phiu, shift = 0){
probs <- ddiscgamma(0:q, shape, rate, thresh, phiu, shift)
sum(probs)
}
qdiscgamma <- function(p, shape, rate, thresh, phiu, shift = 0){
qtrunc(p/(1-phiu), spec = "gamma", a=0,
b=thresh-shift, shape=shape,rate=rate) %>% floor + shift
}
rdiscgamma <- function(n, shape, rate, thresh, shift = 0){
rtrunc(n, spec = "gamma", a=0,
b=thresh-shift, shape=shape, rate=rate) %>% floor + shift
}
ddiscgpd <- function(x, thresh, sigma, xi, phiu, log = FALSE){
up <- evmix::pgpd(x+1, u=thresh, sigmau=sigma, xi=xi)
down <- evmix::pgpd(x, u=thresh, sigmau=sigma, xi=xi)
if(!log){
phiu*(up-down)
} else{
log(phiu) + log(up-down)
}
}
pdiscgpd <- function(q, thresh, sigma, xi, phiu){
probs <- ddiscgpd(thresh:q, thresh, sigma, xi, phiu)
sum(probs)
}
qdiscgpd <- function(p, thresh, sigma, xi, phiu){
evmix::qgpd(p/phiu, u=thresh, sigmau = sigma, xi=xi) %>% floor
}
rdiscgpd <- function(n, thresh, sigma, xi){
evmix::rgpd(n, u=thresh, sigmau=sigma, xi=xi) %>% floor
}
##----------------------------------------------------------------------------------
## negative log likelihood and parameter estimation functions
## for discrete truncated gamma and discrete gpd
##----------------------------------------------------------------------------------
discgammanll <- function(param, dat, thresh, phiu, shift=0){
shape <- exp(param[1])
rate <- exp(param[2])
if(any(dat > thresh-1)){ warning("data must be less than the threshold") }
ll <- log(ddiscgamma(dat, shape, rate, thresh, phiu, shift))
sum(-ll)
}
discgpdnll <- function(param, dat, thresh, phiu){
sigma <- exp(param[1])
xi <- param[2]
ll <- log(ddiscgpd(dat, thresh, sigma, xi, phiu))
sum(-ll)
}
fdiscgamma <- function(param, dat, thresh, phiu, shift = 0, method, ...){
opt <- optim(log(param), discgammanll, dat=dat, thresh=thresh,
phiu=phiu, shift=shift, method=method, hessian = TRUE, ...)
opt
}
fdiscgpd <- function(param, dat, thresh, phiu, method, ...){
opt <- optim(c(log(param[1]),param[2]), discgpdnll, dat=dat, thresh=thresh,
phiu=phiu, method=method, hessian = TRUE, ...)
opt
}
##-----------------------------------------------------------------------------------------------------
## Fitting function, random number generation, quantile function, density, and distribution functions
##-----------------------------------------------------------------------------------------------------
fdiscgammagpd <- function(x, useq, shift = NULL, pvector=NULL,
std.err = TRUE, method = "Nelder-Mead", ...){
if(!is(x, "numeric")){
stop("x must be numeric.")
}
if(!is.null(shift)){
if(!is(shift, "numeric")){
stop("shift must be numeric.")
}
if(shift != round(shift)){
stop("shift must be an integer.")
}
}
if(!is(useq, "numeric")){
stop("useq must be numeric.")
}
if(any(x != round(x))){
stop("all elements in x must be integers.")
}
if(any(useq != round(useq))){
stop("all elements in useq must be integers.")
}
if(!is.null(pvector) & !(length(pvector) == 5)){
stop("pvector must contain 5 elements.")
}
if(!(is.logical(std.err))){
stop("std.err must be TRUE or FALSE.")
}
if(!(method %in% c("Nelder-Mead","BFGS",
"CG", "L-BFGS-B", "SANN", "Brent"))){
stop("invalid method supplied.")
}
if(is.null(shift)){
shift <- min(x)
}
if (is.null(pvector)) {
pvector <- rep(NA,5)
s <- log(mean(x+0.5))-mean(log(x+0.5))
k <- (3-s + sqrt((s-3)^2 + 24*s))/12/s
pvector[1] <- k
pvector[2] <- k/mean(x)
pvector[3] <- as.vector(quantile(x, 0.9))
xu <- x[x>=pvector[3]]
initfgpd <- evmix::fgpd(xu, min(xu)-10^(-5))
pvector[4] <- initfgpd$mle[1]
pvector[5] <- initfgpd$mle[2]
}
bulk <- lapply(1:length(useq),
function(idx,x,useq) x < useq[idx], x=x, useq=useq)
tail <- lapply(1:length(useq),
function(idx,x,useq) x >= useq[idx], x=x, useq=useq)
phiu <- lapply(1:length(useq),
function(idx,tail) mean(tail[[idx]]), tail=tail)
gammfit <- list()
gpdfit <- list()
nllhu <- rep(NA, length(useq))
for(i in 1:length(useq)){
gammfit[[i]] <- tryCatch(expr = fdiscgamma(pvector[1:2],x[bulk[[i]]],
useq[i],
phiu[[i]],
shift,
method=method),
error = function(err)
message("gamma part could not be fit at a
specified threshold.")
NA)
gpdfit[[i]] <- tryCatch(expr = fdiscgpd(pvector[4:5],
x[tail[[i]]],
useq[i],
phiu[[i]],
method=method),
error = function(err) {
pvec3 <- as.vector(quantile(x,1-phiu[[i]]))
xu <- x[x>=pvec3]
initfgpd.adj <-
evmix::fgpd(x, min(xu)-10^(-5))
pvec4 <- initfgpd.adj$mle[1]
pvec5 <- initfgpd.adj$mle[2]
tryCatch(expr = fdiscgpd(c(pvec4,pvec5),
x[tail[[i]]],
useq[i],
phiu[[i]],
method=method),
error = function(err2)
message("GPD part could not be fit
at a specified
threshold.")
NA)
})
nllhu[i] <- tryCatch(expr = gammfit[[i]]$value + gpdfit[[i]]$value,
error = function(err) NA)
}
bestfit <- which.min(nllhu)
fit.out <- list(gammfit[[bestfit]], gpdfit[[bestfit]])
names(fit.out) <- c("bulk","tail")
mle <- c(mean(x >= useq[bestfit]),
exp(fit.out$bulk$par),
useq[bestfit],
exp(fit.out$tail$par[1]),
fit.out$tail$par[2])
names(mle) <- c("phi","shape","rate","thresh","sigma","xi")
if(std.err){
H <- fit.out$bulk$hessian %>% rbind(matrix(rep(0,4),nrow = 2)) %>%
cbind(rbind(matrix(rep(0,4),nrow = 2),fit.out$tail$hessian))
fisherInf <- tryCatch(expr = solve(H), error = function(err) NA)
out <- list(x = as.vector(x), init = as.vector(pvector),
useq = useq, nllhuseq = nllhu,
optim = fit.out, nllh = nllhu[bestfit],
mle=mle, fisherInformation = fisherInf)
} else{
out <- list(x = as.vector(x), init = as.vector(pvector),
useq = useq, nllhuseq = nllhu,
optim = fit.out, nllh = nllhu[bestfit],
mle=mle)
}
out
}
# Need this for rgammagpd
qdiscgammagpd <- function(p, shape, rate, u, sigma, xi, phiu=NULL, shift = 0){
if(!is(p, "numeric")){
stop("p must be numeric.")
}
if(!is(shift, "numeric")){
stop("shift must be numeric.")
}
if(shift != round(shift)){
stop("shift must be an integer.")
}
if(any(c(shape, rate, sigma) <= 0)){
stop("shape, rate, and sigma must all be positive.")
}
if(!is.null(phiu)){
if(phiu < 0 | phiu > 1){
stop("phiu must be in [0,1].")
}
}
if(p < 0 | p > 1){
stop("p must be in [0,1].")
}
if(is.null(phiu)){
phiu <- 1-pdiscgamma(u-1, shape=shape, rate=rate,
thresh=Inf, phiu = 0, shift=shift)
}
phib <- (1-phiu)
nb <- sum(p < phib)
nu <- sum(p >= phib)
q <- p
if(nb > 0){
q[p < phib] <- qdiscgamma(p[which(p < phib)], shape=shape,
rate=rate, thresh=u, phiu=phiu, shift=shift)
}
if(nu > 0){
q[p >= phib] <- qdiscgpd((p[p >= phib]-phib), thresh=u,
sigma=sigma, xi=xi, phiu=phiu)
}
q
}
# Generate data from the model!!
rdiscgammagpd <- function(n, shape, rate, u, sigma, xi, phiu=NULL, shift = 0){
if(!is(n, "numeric")){
stop("n must be numeric.")
}
if(!is(shift, "numeric")){
stop("shift must be numeric.")
}
if(any(n != floor(n))){
stop("n must be an integer")
}
if(shift != round(shift)){
stop("shift must be an integer.")
}
if(any(c(shape, rate, sigma) <= 0)){
stop("shape, rate, and sigma must all be positive.")
}
if(!is.null(phiu)){
if(phiu < 0 | phiu > 1){
stop("phiu must be in [0,1].")
}
}
if(is.null(phiu)){
phiu <- 1-pdiscgamma(u-1, shape=shape, rate=rate,
thresh=Inf, phiu = 0, shift=shift)
}
p <- runif(n)
qdiscgammagpd(p, shape=shape, rate=rate, u=u,
sigma=sigma, xi=xi, phiu=phiu, shift=shift)
}
ddiscgammagpd <- function(x, shape, rate, u, sigma, xi,
phiu=NULL, shift = 0, log = FALSE){
if(!is(x, "numeric")){
stop("x must be numeric.")
}
if(!is(shift, "numeric")){
stop("shift must be numeric.")
}
if(any(x != floor(x))){
stop("x must be an integer")
}
if(shift != round(shift)){
stop("shift must be an integer.")
}
if(any(c(shape, rate, sigma) <= 0)){
stop("shape, rate, and sigma must all be positive.")
}
if(!is.null(phiu)){
if(phiu < 0 | phiu > 1){
stop("phiu must be in [0,1].")
}
}
if(is.null(phiu)){
phiu <- 1-pdiscgamma(u-1, shape=shape, rate=rate,
thresh=Inf, phiu = 0, shift=shift)
}
out <- rep(NA, length(x))
if(sum(x>=u) != 0){
out[x>=u] <- ddiscgpd(x[x>=u], u, sigma, xi, phiu, log=log)
}
if(sum(x<u) != 0){
out[x<u] <- ddiscgamma(x[x<u], shape, rate, u, phiu, shift, log=log)
}
out
}
pdiscgammagpd <- function(q, shape, rate, u, sigma, xi, phiu=NULL, shift=0){
if(!is(x, "numeric")){
stop("x must be numeric.")
}
if(!is(shift, "numeric")){
stop("shift must be numeric.")
}
if(any(x != floor(x))){
stop("x must be an integer")
}
if(shift != round(shift)){
stop("shift must be an integer.")
}
if(any(c(shape, rate, sigma) <= 0)){
stop("shape, rate, and sigma must all be positive.")
}
if(!is.null(phiu)){
if(phiu < 0 | phiu > 1){
stop("phiu must be in [0,1].")
}
}
probs <- sapply(q, function(q)
ddiscgammagpd(0:q, shape, rate, u, sigma, xi, phiu, shift) %>%
sum)
probs
}
##--------------------------------------------------------------------
## Borrow functionality from the tcR package to help folks read files
##--------------------------------------------------------------------
parseFile <- function(file, format = c('mitcr', 'mitcrbc', 'migec', 'vdjtools',
'immunoseq', 'mixcr', 'imseq'),
inframe = TRUE){
if(!(is.character(file))){
stop("file must be a character string.")
}
if(!(format %in%  c('mitcr', 'mitcrbc', 'migec', 'vdjtools', 'immunoseq',
'mixcr', 'imseq'))){
stop("invalid format specified.")
}
if(!(is.logical(inframe))){
stop("inframe must be TRUE or FALSE.")
}
data <- tcR::parse.file(.filename = file, .format = format)
if(inframe){
data <- get.inframes(data)
}
counts <- data$Read.count
counts
}
parseFolder <- function(folder, format = c('mitcr', 'mitcrbc', 'migec',
'vdjtools', 'immunoseq', 'mixcr',
'imseq'),
inframe = TRUE){
if(!(is.character(folder))){
stop("folder must be a character string.")
}
if(!(format %in%  c('mitcr', 'mitcrbc', 'migec', 'vdjtools', 'immunoseq',
'mixcr', 'imseq'))){
stop("invalid format specified.")
}
if(!(is.logical(inframe))){
stop("inframe must be TRUE or FALSE.")
}
dataList <- parse.folder(.folderpath = folder, .format = format)
if(inframe){
dataList <- map(dataList, get.inframes)
}
counts <- map(dataList, "Read.count")
counts
}
# This function does model fitting and threshold selection as described in Desponds et al (2016)
# Details on page 5 of supplement (right column)
# x is a vector of clone sizes
fdesponds <- function(x){
if(!is(x, "numeric")){
stop("x must be numeric.")
}
if(any(x != round(x))){
stop("all elements in x must be integers.")
}
x <- sort(x)
Cmins <- unique(x)
alpha <- rep(NA, length(Cmins))
KS <- rep(NA, length(Cmins))
n <- rep(NA, length(Cmins))
for(i in 1:length(Cmins)){
d <- x[x > Cmins[i]]
n[i] <- length(d)
alpha[i] <- n[i]*(sum(log(d/Cmins[i])))^(-1) + 1
empir.cdf <- rep(NA, length(d))
for(j in 1:length(d)){
empir.cdf[j] <- mean(d <= d[j])
}
est.cdf <- VGAM::ppareto(d, scale=Cmins[i], shape = alpha[i]-1)
KS[i] <- max(abs(est.cdf - empir.cdf))
}
min.KS <- min(KS, na.rm = TRUE)
Cmin <- Cmins[KS == min(KS, na.rm = TRUE)][1]
powerlaw.exponent <- alpha[KS == min(KS, na.rm = TRUE)]
powerlaw.exponent <- powerlaw.exponent[!is.na(powerlaw.exponent)]
out <- c(min.KS, Cmin, powerlaw.exponent, powerlaw.exponent-1)
names(out) <- c("min.KS", "Cmin", "powerlaw.exponent", "pareto.alpha")
out
}
data("repertoires")
repertoires
data("repertoires")
repertoires