R/oncoPrint.R
451e6d73
 
 # == title
 # Make oncoPrint
 #
 # == param
 # -mat a character matrix which encodes mulitple alterations or a list of matrix for which every matrix contains binary
1a56796e
 #      value representing the alteration is present or absent. When it is a list, the names of the list represent alteration types.
5705ac14
 #      You can use `unify_mat_list` to make all matrix having same row names and column names.
451e6d73
 # -get_type If different alterations are encoded in the matrix, this self-defined function
 #           determines how to extract them. Only work when ``mat`` is a matrix.
7d5a7fc0
 # -alter_fun a single function or a list of functions which define how to add graphics for different alterations.
 #                 If it is a list, the names of the list should cover all alteration types.
1a56796e
 # -alter_fun_is_vectorized Whether ``alter_fun`` is implemented vectorized. Internally the function will guess.
451e6d73
 # -col a vector of color for which names correspond to alteration types.
1a56796e
 # -top_annotation Annotation put on top of the oncoPrint. By default it is barplot which shows the number of genes having the alteration in each sample.
 # -right_annotation Annotation put on the right of hte oncoPrint. By default it is barplto which shows the number of samples having the alteration in each gene.
6d8c4ef4
 # -show_pct whether show percent values on the left of the oncoprint
451e6d73
 # -pct_gp graphic paramters for percent row annotation
7d5a7fc0
 # -pct_digits digits for percent values
ad35494a
 # -pct_side side of pct
1a56796e
 # -show_row_names Whether show row names?
 # -row_names_side side of the row names
 # -row_names_gp Graphic parameters of row names.
80cba0ee
 # -row_order row order
 # -column_order column order
bb5445cb
 # -remove_empty_columns if there is no alteration in that sample, whether remove it on the heatmap
ad35494a
 # -remove_empty_rows if there is no alteration in that sample, whether remove it on the heatmap
1a56796e
 # -show_column_names Whether show column names?
5705ac14
 # -heatmap_legend_param pass to `Heatmap`
bb8e309c
 # -... pass to `Heatmap`, so can set ``bottom_annotation`` here.
451e6d73
 #
 # == details
dfa0170a
 # The 'memo sort' method is from https://gist.github.com/armish/564a65ab874a770e2c26 . Thanks to
 # B. Arman Aksoy for contributing the code.
 #
451e6d73
 # For more explanation, please go to the vignette.
 #
 # == value
1a56796e
 # A `Heatmap-class` object which means you can add other heatmaps or annotations to it.
451e6d73
 #
 # == author
 # Zuguang Gu <z.gu@dkfz.de>
 #
ad35494a
 oncoPrint = function(mat, 
 	get_type = function(x) x,
 	alter_fun, 
6f11a271
 	alter_fun_is_vectorized = NULL,
ad35494a
 	col, 
 
7db3856b
 	top_annotation = HeatmapAnnotation(column_barplot = anno_oncoprint_barplot()),
ad35494a
 	right_annotation = rowAnnotation(row_barplot = anno_oncoprint_barplot(
7db3856b
 			axis_param = list(side = "top", labels_rot = 0))),
 	bottom_annotation = NULL,
ad35494a
 
 	show_pct = TRUE, 
 	pct_gp = gpar(fontsize = 10), 
 	pct_digits = 0,
 	pct_side = "left",
 	show_row_names = TRUE,
 	row_names_side = "right",
 	row_names_gp = pct_gp,
80cba0ee
 
 	row_order = NULL,
 	column_order = NULL,
ad35494a
 	
5705ac14
 	remove_empty_columns = FALSE,
ad35494a
 	remove_empty_rows = FALSE,
 	show_column_names = FALSE,
5705ac14
 	heatmap_legend_param = list(title = "Alterations"),
451e6d73
 	...) {
bb8e309c
 
ad35494a
 	arg_list = list(...)
 	arg_names = names(arg_list)
7d5a7fc0
 
7db3856b
 	oe = environment(anno_oncoprint_barplot)
 	environment(anno_oncoprint_barplot) = environment()
 	on.exit(environment(anno_oncoprint_barplot) <- oe)
 
451e6d73
 	# convert mat to mat_list
6d8c4ef4
 	if(inherits(mat, "data.frame")) {
 		mat = as.matrix(mat)
 	}
451e6d73
 	if(inherits(mat, "matrix")) {
 		all_type = unique(unlist(lapply(mat, get_type)))
 		all_type = all_type[!is.na(all_type)]
 		all_type = all_type[grepl("\\S", all_type)]
 
 		mat_list = lapply(all_type, function(type) {
 			m = sapply(mat, function(x) type %in% get_type(x))
 			dim(m) = dim(mat)
 			dimnames(m) = dimnames(mat)
 			m
 		})
6d8c4ef4
 		names(mat_list) = all_type
451e6d73
 	} else if(inherits(mat, "list")) {
 		mat_list = mat
7b914edf
 
451e6d73
 		all_type = names(mat_list)
 		mat_list = lapply(mat_list, function(x) {
7b914edf
 				if(!is.matrix(x)) {
 					stop("Expect a list of matrix (not data frames).")
 				}
451e6d73
 				oattr = attributes(x)
 				x = as.logical(x)
 				attributes(x) = oattr
 				x
 			})
 
 		if(length(unique(sapply(mat_list, nrow))) > 1) {
 			stop("All matrix in 'mat_list' should have same number of rows.")
 		}
 
 		if(length(unique(sapply(mat_list, ncol))) > 1) {
 			stop("All matrix in 'mat_list' should have same number of columns.")
 		}
 	} else {
 		stop("Incorrect type of 'mat'")
 	}
 
ad35494a
 	cat("All mutation types:", paste(all_type, collapse = ", "), "\n")
 
6f11a271
 
 	# type as the third dimension
 	arr = array(FALSE, dim = c(dim(mat_list[[1]]), length(all_type)), dimnames = c(dimnames(mat_list[[1]]), list(all_type)))
 	for(i in seq_along(all_type)) {
 		arr[, , i] = mat_list[[i]]
 	}
 
 	oncoprint_row_order = function() {
 		order(rowSums(count_matrix), n_mut, decreasing = TRUE)
 	}
 
 	oncoprint_column_order = function() {
 		scoreCol = function(x) {
 			score = 0
 			for(i in 1:length(x)) {
 				if(x[i]) {
 					score = score + 2^(length(x)-i*1/x[i])
 				}
7d5a7fc0
 			}
6f11a271
 			return(score)
 		}
 		scores = apply(count_matrix[row_order, ,drop = FALSE], 2, scoreCol)
 		order(scores, decreasing=TRUE)
 	}
 
 	if(missing(alter_fun)) {
 		if(length(mat_list) == 1) {
 			af = list(
 				background = function(x, y, w, h, j, i) {
 					grid.rect(x, y, w, h, gp = gpar(fill = "#CCCCCC", col = NA))
 				},
 				function(x, y, w, h, j, i) {
 					grid.rect(x, y, w*0.9, h*0.9, gp = gpar(fill = "red", col = NA))
 				}
 			)
 			alter_fun_is_vectorized = TRUE
 			names(af) = c("background", names(mat_list))
5705ac14
 			col = "red"
 		} else if(length(mat_list) == 2) {
6f11a271
 			af = list(
 				background = function(x, y, w, h, j, i) {
 					grid.rect(x, y, w, h, gp = gpar(fill = "#CCCCCC", col = NA))
 				},
 				function(x, y, w, h, j, i) {
 					grid.rect(x, y, w*0.9, h*0.9, gp = gpar(fill = "red", col = NA))
 				},
 				function(x, y, w, h, j, i) {
 					grid.rect(x, y, w*0.9, h*0.4, gp = gpar(fill = "blue", col = NA))
 				}
 			)
 			alter_fun_is_vectorized = TRUE
 			names(af) = c("background", names(mat_list))
 			col = c("red", "blue")
7d5a7fc0
 		} else {
 			stop("`alter_fun` should be specified.")
5705ac14
 		}
 		names(col) = names(mat_list)
6f11a271
 		warning("Using default `alter_fun` graphics and reset `col`.")
 	}
 
 	if(is.list(alter_fun)) {
7d5a7fc0
 
 		# validate the list first
 		if(is.null(alter_fun$background)) alter_fun$background = function(x, y, w, h) grid.rect(x, y, w, h, gp = gpar(fill = "#CCCCCC", col = NA))
 		sdf = setdiff(all_type, names(alter_fun))
 		if(length(sdf) > 0) {
6f11a271
 			stop(paste0("You should define graphic function for: ", paste(sdf, collapse = ", ")))
7d5a7fc0
 		}
 
792d265a
 		alter_fun = alter_fun[unique(c("background", intersect(names(alter_fun), all_type)))]
 
6f11a271
 		if(is.null(alter_fun_is_vectorized)) {
 			alter_fun_is_vectorized = guess_alter_fun_is_vectorized(alter_fun)
 		}
792d265a
 
6f11a271
 		if(alter_fun_is_vectorized) {
 			layer_fun = function(j, i, x, y, w, h, fill) {
 				alter_fun$background(x, y, w, h)
 				for(nm in all_type) {
 					m = arr[, , nm]
 					l = pindex(m, i, j)
 					if(sum(l)) {
 						alter_fun[[nm]](x[l], y[l], w[l], h[l])
 					}
 				}
 			}
 			cell_fun = NULL
 		} else {
 			layer_fun = NULL
 			cell_fun = function(j, i, x, y, w, h, fill) {
 				alter_fun$background(x, y, w, h)
 				for(nm in all_type) {
 					if(arr[i, j, nm]) {
 						alter_fun[[nm]](x, y, w, h)
ddf13e44
 					}
792d265a
 				}
7d5a7fc0
 			}
 		}
6f11a271
 	} else if(is.function(alter_fun)) {
 		
 		if(length(formals(alter_fun)) == 5) {
 			af = function(x, y, w, h, v, j, i) alter_fun(x, y, w, h, v)
 		} else {
 			af = alter_fun
 		}
 
 		if(is.null(alter_fun_is_vectorized)) {
 			alter_fun_is_vectorized = FALSE
 		}
 
 		if(alter_fun_is_vectorized) {
 			layer_fun = function(j, i, x, y, w, h, fill) {
 				v = pindex(arr, i, j)
 				af(x, y, w, h, v, j, i)
ddf13e44
 			}
6f11a271
 			cell_fun = NULL
ddf13e44
 		} else {
6f11a271
 			layer_fun = NULL
 			cell_fun = function(j, i, x, y, w, h, fill) {
 				v = arr[i, j, ]
 				af(x, y, w, h, v, j, i)
ddf13e44
 			}
 		}
6f11a271
 	} else {
 		stop("You need to set `alter_fun`.")
5705ac14
 	}
 
792d265a
 	col = col[intersect(names(col), all_type)]
 
dfa0170a
 	count_matrix = apply(arr, c(1, 2), sum)
c785a658
 	n_mut = rowSums(apply(arr, 1:2, any))
ad35494a
 
7db3856b
 	if(is.null(row_order)) {
ad35494a
 		row_order = oncoprint_row_order()
 	}
7db3856b
 	if(is.null(column_order)) {
ad35494a
 		column_order = oncoprint_column_order()
 	}
7db3856b
 
dfa0170a
 	if(is.null(row_order)) row_order = seq_len(nrow(count_matrix))
ad804638
 	if(is.null(column_order)) column_order = seq_len(ncol(count_matrix))
dfa0170a
 	if(is.character(column_order)) {
 		column_order = structure(seq_len(dim(arr)[2]), names = dimnames(arr)[[2]])[column_order]
 	}
bb970cfc
 	names(column_order) = as.character(column_order)
bb5445cb
 	if(remove_empty_columns) {
 		l = rowSums(apply(arr, c(2, 3), sum)) > 0
 		arr = arr[, l, , drop = FALSE]
bb970cfc
 		column_order = structure(seq_len(sum(l)), names = which(l))[as.character(intersect(column_order, which(l)))]
bb5445cb
 	}
ad35494a
 	if(remove_empty_rows) {
 		l = rowSums(apply(arr, c(1, 3), sum)) > 0
 		arr = arr[l, , , drop = FALSE]
 		row_order = structure(seq_len(sum(l)), names = which(l))[as.character(intersect(row_order, which(l)))]
 	}
451e6d73
 
 	# validate col
 	sdf = setdiff(all_type, names(col))
 	if(length(sdf) > 0) {
 		stop(paste0("You should define colors for:", paste(sdf, collapse = ", ")))
 	}
 
 	# for each gene, percent of samples that have alterations
1422ff8d
 	pct_num = rowSums(apply(arr, 1:2, any)) / ncol(mat_list[[1]])
 	pct = paste0(round(pct_num * 100, digits = pct_digits), "%")
451e6d73
 
ad35494a
 	### now the annotations
7db3856b
 	err = try(top_annotation <- eval(substitute(top_annotation)), silent = TRUE)
 	if(inherits(err, "try-error")) {
 		stop_wrap("find an error when executing top_annotation. ")
 	}
 	right_annotation = eval(substitute(right_annotation))
 
ad35494a
 	if("left_annotation" %in% arg_names) {
 		stop("'left_annotation' are not allowed to specify, you can add...")
451e6d73
 	}
ad35494a
 	left_annotation = NULL
 	if(show_pct) {
 		left_annotation = rowAnnotation(pct = anno_text(pct, just = "right", location = unit(1, "npc"), gp = pct_gp),
 			show_annotation_name = FALSE)
 	}
 	if(show_row_names) {
 		ha_row_names = rowAnnotation(rownames = anno_text(dimnames(arr)[[1]], gp = pct_gp, just = "left", location = unit(0, "npc")),
 			show_annotation_name = FALSE)
7db3856b
 		right_annotation = c(ha_row_names, right_annotation, gap = unit(2, "mm"))
451e6d73
 	}
 
 	#####################################################################
 	# the main matrix
 	pheudo = c(all_type, rep(NA, nrow(arr)*ncol(arr) - length(all_type)))
57cc7781
 	dim(pheudo) = dim(arr)[1:2]
 	dimnames(pheudo) = dimnames(arr)[1:2]
dfa0170a
 	
ad35494a
 	if(length(arg_list)) {
 		if(any(arg_names %in% c("rect_gp", "cluster_rows", "cluster_columns", "cell_fun"))) {
7d5a7fc0
 			stop("'rect_gp', 'cluster_rows', 'cluster_columns', 'cell_fun' are not allowed to use in `oncoPrint()`.")
 		}
 	}
 
ad35494a
 	ht = Heatmap(pheudo, col = col, 
 		rect_gp = gpar(type = "none"), 
 		cluster_rows = FALSE, cluster_columns = FALSE, 
 		row_order = row_order, column_order = column_order,
6f11a271
 		cell_fun = cell_fun, layer_fun = layer_fun,
7b914edf
 		top_annotation = top_annotation,
ad35494a
 		left_annotation = left_annotation,
 		right_annotation = right_annotation,
 		show_row_names = FALSE,
0623db7b
 		show_column_names = show_column_names,
ad35494a
 		heatmap_legend_param = heatmap_legend_param,
 		...
 	)
451e6d73
 
ad35494a
 	return(ht)
455d2853
 }
5705ac14
 
 # == title
 # Unify a list of matrix 
 #
 # == param
 # -mat_list a list of matrix, all of them should have dimension names
 # -default default values for the newly added rows and columns
 #
 # == details
 # All matrix will be unified to have same row names and column names
 #
7b914edf
 # == value
 # A list of matrix
 #
5705ac14
 # == author
 # Zuguang Gu <z.gu@dkfz.de>
 #
 unify_mat_list = function(mat_list, default = 0) {
 	common_rn = unique(unlist(lapply(mat_list, rownames)))
 	common_cn = unique(unlist(lapply(mat_list, colnames)))
 
 	mat_list2 = lapply(seq_along(mat_list), function(i) {
 		mat = matrix(default, nrow = length(common_rn), ncol = length(common_cn))
 		dimnames(mat) = list(common_rn, common_cn)
 		mat[rownames(mat_list[[i]]), colnames(mat_list[[i]])] = mat_list[[i]]
 		mat
 	})
 	names(mat_list2) = names(mat_list)
 	return(mat_list2)
 }
 
 
0286e386
 
 # == title
ad35494a
 # Barplot annotation for oncoPrint
0286e386
 #
ad35494a
 # == param
1a56796e
 # -type A vector of the alteration types in your data. It can be a subset of all alteration types if you don't want to show them all.
 # -which Is ti a row annotation or a column annotation?
 # -width Wisth of the annotation.
 # -height Height of the annotation.
 # -border Whether draw the border?
 # -... Other parameters passed to `anno_barplot`.
0286e386
 #
1a56796e
 # == detail
 # This annotation function should always use with `oncoPrint`.
 # 
0286e386
 # == author
 # Zuguang Gu <z.gu@dkfz.de>
 #
ad35494a
 anno_oncoprint_barplot = function(type = all_type, which = c("column", "row"),
 	width = NULL, height = NULL, border = FALSE, ...) {
 
 	if(is.null(.ENV$current_annotation_which)) {
 		which = match.arg(which)[1]
 	} else {
 		which = .ENV$current_annotation_which
 	}
 
 	anno_size = anno_width_and_height(which, width, height, unit(2, "cm"))
 	# get variables fron oncoPrint() function
7db3856b
 	pf = parent.env(environment())
 	arr = pf$arr
 	all_type = pf$all_type
 	col = pf$col
ad35494a
 
 	type = type
 	all_type = intersect(all_type, type)
7db3856b
 	if(length(all_type) == 0) {
 		stop_wrap("find no overlap, check your `type` argument.")
 	}
ad35494a
 	arr = arr[, , all_type, drop = FALSE]
 	col = col[all_type]
 
 	if(which == "column") {
 		count = apply(arr, c(2, 3), sum)
d9834c56
 		fun = anno_barplot(count, gp = gpar(fill = col, col = NA), which = "column",
ad35494a
 			baseline = 0, height = anno_size$height, border = border, ...)
 	} else {
 		count = apply(arr, c(1, 3), sum)
d9834c56
 		fun = anno_barplot(count, gp = gpar(fill = col, col = NA), which = "row",
ad35494a
 			baseline = 0, width = anno_size$width, border = border, ...)
0286e386
 	}
d9834c56
 	
 	fun@show_name = FALSE
 	return(fun)
0286e386
 }
 
7db3856b
 
6f11a271
 guess_alter_fun_is_vectorized = function(alter_fun) {
 	n = 50
 	if(is.list(alter_fun)) {
 		x = 1:n
 		y = 1:n
 		w = unit(1:n, "mm")
 		h = unit(1:n, "mm")
 		dev.null()
 		oe = try({
 			for(i in seq_along(alter_fun)) {
 				alter_fun[[i]](x, y, w, h)
 			}
 		}, silent = TRUE)
 		dev.off2()
 		if(inherits(oe, "try-error")) {
 			return(FALSE)
 		} else {
 			return(TRUE)
 		}
 	} else {
 		return(FALSE)
 	}
 }
0286e386