Chapter 2 Functions Introduction
In this Chapter, I list some of important functions for plotting. These functions are mainly based on packages PTCA4CATA
and data4PCCAR
, which are developed by Dr. Herve Abdi and his team at The University of Texas at Dallas. These functions will provide computational convenience to my plotting and visualization of my results. It is worth to mention that these functions are designed to generate plots so users should conduct MSA before using these functions. For how to conduct MSA in R, please see each chapter in this book: PCA4, CA5, MCA8, BADA7, DiCA6, PLS-C9, DiSTATIS10 and MFA11. For further information about the two basic packages, please visit Dr. Abdi’s github
2.1 Plotting Scree plot
The first function is designed to plot scree plot:
Required parameters: eigs(eigenvalue), p.vals(inference p values)
Output: Scree plot
2.2 Plotting Permutation test results
The second function is designed to plot the permutation histogram:
Required parameters: eigs.perm(eigenvalue.permutation), eigs(eigenvalue), para1(x_lim for X axis, default = 5), para2(intervals of distribution, default = 20), Dim(number of dimension, default = 1).
Output: Permutation Plots
plot.permutation <- function(eigs.perm, eigs,
para1 = 5, para2 = 20,
Dim = 1){
zeDim = Dim
pH <- prettyHist(
distribution = eigs.perm[,zeDim],
observed = eigs[zeDim],
xlim = c(0,para1), # needs to be set by hand
breaks = para2,
border = "gray",
main = paste0("Permutation Test for Eigenvalue ",zeDim),
xlab = paste0("Eigenvalue ",zeDim),
ylab = "",
counts = FALSE,
cutoffs = c( 0.975))
}
2.3 Plotting Heatmap
Required parameters: data1(data set 1), data2(data set 2), xcol(color of labels in X axis), ycol(color of labels in X axis), textsize(font size of the labels, default = 5)
Output: Heatmap of correlation between two data sets.
plot.heatmap <- function(data1, data2,
DATA=NULL, xcol = NULL,
ycol = NULL, textsize = 5,
scale = TRUE ){
if(is.null(DATA)){
data1 <- as.matrix(data1)
data2 <- as.matrix(data2)
DATA = cor(data1, data2)
}
if(is.null(xcol)){
xcol <- prettyGraphsColorSelection(
n.colors = ncol(DATA))
}
if(is.null(ycol)){
ycol <- prettyGraphsColorSelection(
n.colors = nrow(DATA))
}
hmap <- superheat(DATA,
heat.pal = c("brown", "white","red4"),
left.label.size = 0.5,
bottom.label.size = 0.5,
bottom.label.text.angle = 90,
left.label.text.alignment = "right",
bottom.label.text.alignment = "right",
bottom.label.text.col = xcol,
left.label.text.col = ycol,
grid.vline = FALSE,
grid.hline = FALSE,
bottom.label.col = "white",
left.label.col = "white",
scale = scale,
left.label.text.size = textsize,
bottom.label.text.size = textsize,
)
}
2.4 Plotting Row Factor Scores
Required parameters: DESIGN(the grouping design of observation, such as Lakers is group1 and Celtics is group2), fs(factor scores), eigenvalues, tau(Kendall rank correlation coefficient), d(dimension, default = 1), mode(do you want “hull” or “CI”?), method(The string you want to add on your graph’s title).
Output: Global row factor scores map for each observation
plot.fs <- function(DESIGN, fs, eigs,
tau, d = 1, mode="CI",
method = " ")
{
fs <- as.matrix(fs)
# GETMEANS
fm.tmp <- aggregate(fs, list(DESIGN), mean)
fm <- fm.tmp[,2:ncol(fm.tmp)]
rownames(fm) <- fm.tmp[,1]
tau <- round(tau)
color.dot <- as.matrix(DESIGN)
na.de <- as.matrix(levels(DESIGN))
num.de <- length(na.de)
for (i in 1: num.de){
color.dot[which(color.dot == na.de[i])] <- index[i]
}
color.dot <- as.matrix(color.dot)
rownames(color.dot) <- DESIGN
fs.plot <- createFactorMap(fs, # data
title = paste0("Factor Scores_",
method),
alpha.points = 0.3,
axis1 = d, axis2 = (d+1),
pch = 19,
cex = 2,
text.cex = 2.5,
display.labels = FALSE,
col.points = color.dot,
col.labels = color.dot
)
fs.label <- createxyLabels.gen(d,(d+1),
lambda = eigs,
tau = tau,
axisName = "Component "
)
ind <- c(1:num.de)
for (i in 1:num.de){
inde <- match(rownames(color.dot),
sort(rownames(fm)))
ind[i] <- which(inde == i)[1]
}
grp.ind <- ind
rownames(fm[sort(rownames(fm)),]) <- sub("[[:punct:]]","",
rownames(fm[sort(rownames(fm)),]))
graphs.means <- PTCA4CATA:: createFactorMap(fm[sort(rownames(fm)),],
axis1 = d, axis2 =(d+1),
constraints = fs.plot$constraints,
col.points = color.dot[grp.ind],
col.labels = color.dot[grp.ind],
alpha.points = 0.9)
BootCube <- PTCA4CATA::Boot4Mean(fs,
design = as.factor(DESIGN),
niter = 100)
dimnames(BootCube$BootCube)[[2]] <- paste0("dim ",
1:dim(BootCube$BootCube)[[2]])
boot.elli <- MakeCIEllipses(data =
BootCube$BootCube[,d:(d+1),][sort(rownames(BootCube$BootCube[,d:(d+1),])),,],
names.of.factors =
c(paste0("Dimension ", d),paste0("Dimension ",
(d+1))),
col = color.dot[grp.ind],
alpha.line = 0.3,
alpha.ellipse = 0.3
)
# with Hull
colnames(fs) <- paste0('Dimension ', 1:ncol(fs))
# getting the color correct: an ugly trick
#col.groups <- as.data.frame(col.groups)
DESIGN <- factor(DESIGN, levels = DESIGN[grp.ind])
GraphHull <- PTCA4CATA::MakeToleranceIntervals(fs,
axis1 = d,
axis2 = (d+1),
design = DESIGN,
col = color.dot[grp.ind],
names.of.factors = c(paste0("Dim ", d),
paste0("Dim 2", (d+1))),
p.level = 1.00)
if (mode == "hull"){
factor.map <- fs.plot$zeMap_background +
GraphHull +
fs.plot$zeMap_dots +
fs.label +
graphs.means$zeMap_dots +
graphs.means$zeMap_text}
if (mode == "CI"){
factor.map <- fs.plot$zeMap_background +
boot.elli +
fs.plot$zeMap_dots +
fs.label +
graphs.means$zeMap_dots +
graphs.means$zeMap_text}
factor.map
}
2.5 Plotting Column Plot
Required parameters: fj(column factor scores), eigs(eigenvalue), tau(Kendall rank correlation coefficient), d(dimension, default = 1), col(color coding of variables), method(title names)
Output: column factor scores plot.
plot.cfs <- function(fj, eigs,
tau, d = 1,
col = NULL,
method = " ",
colrow = "row")
{
if(colrow == "row"){
cr <- nrow(fj)
}
if(colrow == "col"){
cr <- ncol(fj)
}
if(is.null(col)){
col <- prettyGraphs::prettyGraphsColorSelection(cr)
}
fj.labels <- createxyLabels.gen(d,(d+1),
lambda = eigs,
tau = round(tau),
axisName = "Component "
)
fj.plot <- createFactorMap(fj, # data
title = paste0("Factor Scores", method),
axis1 = d,
axis2 = (d+1),
pch = 19,
cex = 2,
text.cex = 3,
col.points = col,
col.labels = col
)
column.factor.map <- fj.plot$zeMap_background +
fj.plot$zeMap_dots +
fj.plot$zeMap_text +
fj.labels
column.factor.map
}
2.6 Plotting Loading Plot
Required parameters: data(raw data), col(color coding), fs(factor scores), eigs(eigenvalue), tau(Kendall rank correlation coefficient), d(dimension, default = 1)
Output: Loading plot for each variable.
plot.loading <- function(data, col=NULL, fs, eigs, tau, d=1){
if (is.null(col)){
col <- prettyGraphsColorSelection(n.colors = ncol(data))
}
cor.loading <- cor(data, fs)
rownames(cor.loading) <- rownames(cor.loading)
fi.labels <- createxyLabels.gen(d,(d+1),
lambda = eigs,
tau = round(tau),
axisName = "Component "
)
loading.plot <- createFactorMap(cor.loading,
axis1 = d,
axis2 = (d+1),
constraints = list(minx = -1, miny = -1,
maxx = 1, maxy = 1),
col.points = col,
col.labels = col)
LoadingMapWithCircles <- loading.plot$zeMap_background +
loading.plot$zeMap_dots +
addArrows(cor.loading[,d:(d+1)], color = col) +
addCircleOfCor() +
loading.plot$zeMap_text +
fi.labels
LoadingMapWithCircles
}
2.7 Plotting Contribution Barplot
Required parameters: cj(contribution of each variable), fj(factor scores of each variable), col(color coding), boot.ratios(bootstrap ratio), signiOnly(choose to only display sigificant contribution or not, default = FALSE), fig(how many plots to display, default = 2)
Output: Combination of contribution barplots.
plot.cb <- function(cj, fj, col = NULL,
boot.ratios, signifOnly = FALSE,
fig = 2, horizontal = FALSE,
colrow = "col", fontsize = 3)
{
if (colrow == "col"){
cr <- ncol(cj)
}
if (colrow == "row"){
cr <- nrow(cj)
}
# Contribution Plot
### plot contributions for component 1
if (is.null(col)){
col <- prettyGraphs::prettyGraphsColorSelection(cr)
}
signed.ctrJ <- cj * sign(fj)
laDim = 1
ctrJ.1 <- PrettyBarPlot2(signed.ctrJ[,laDim],
threshold = 1 / NROW(signed.ctrJ),
font.size = fontsize,
signifOnly = signifOnly,
horizontal = horizontal,
color4bar = col, # we need hex code
main = 'Variable Contributions (Signed)',
ylab = paste0('Contributions Barplot',
laDim),
ylim = c(1.2*min(signed.ctrJ),
1.2*max(signed.ctrJ))
) + ggtitle("Contribution",subtitle = paste0('Component ', laDim))
### plot contributions for component 2
laDim =2
ctrJ.2 <- PrettyBarPlot2(signed.ctrJ[,laDim],
threshold = 1 / NROW(signed.ctrJ),
font.size = fontsize,
color4bar = col, # we need hex code
signifOnly =signifOnly,
horizontal = horizontal,
main = 'Variable Contributions (Signed)',
ylab = paste0('Contributions Barplot',laDim),
ylim = c(1.2*min(signed.ctrJ),1.2*max(signed.ctrJ))
)+ ggtitle("",subtitle = paste0('Component ', laDim))
laDim =3
ctrJ.3 <- PrettyBarPlot2(signed.ctrJ[,laDim],
threshold = 1 / NROW(signed.ctrJ),
font.size = fontsize,
color4bar = col, # we need hex code
signifOnly = signifOnly,
horizontal = horizontal,
main = 'Variable Contributions (Signed)',
ylab = paste0('Contributions Barplot',laDim),
ylim = c(1.2*min(signed.ctrJ),
1.2*max(signed.ctrJ))
)+ ggtitle("",subtitle = paste0('Component ', laDim))
### Bootstrap of columns vectors
BR <- boot.ratios
laDim = 1
# Plot the bootstrap ratios for Dimension 1
ba001.BR1 <- PrettyBarPlot2(BR[,laDim],
threshold = 2,
font.size = fontsize,
signifOnly = signifOnly,
horizontal = horizontal,
color4bar = col, # we need hex code
ylab = 'Bootstrap ratios',
ylim = c(1.2*min(BR[,laDim]),
1.2*max(BR[,laDim]))
) + ggtitle("Bootstrap ratios",
subtitle = paste0('Component ', laDim))
# Plot the bootstrap ratios for Dimension 2
laDim = 2
ba002.BR2 <- PrettyBarPlot2(BR[,laDim],
threshold = 2,
font.size = fontsize,
signifOnly = signifOnly,
horizontal = horizontal,
color4bar = col, # we need hex code
ylab = 'Bootstrap ratios',
ylim = c(1.2*min(BR[,laDim]),
1.2*max(BR[,laDim]))
) + ggtitle("",subtitle = paste0('Component ', laDim))
laDim = 3
ba003.BR3 <- PrettyBarPlot2(BR[,laDim],
threshold = 2,
font.size = fontsize,
signifOnly = signifOnly,
horizontal = horizontal,
color4bar = col,
ylab = 'Bootstrap ratios',
ylim = c(1.2*min(BR[,laDim]),
1.2*max(BR[,laDim]))
) + ggtitle("",subtitle = paste0('Component ', laDim))
if(fig == 2){
grid.arrange(
as.grob(ctrJ.1),
as.grob(ctrJ.2),
as.grob(ba001.BR1),
as.grob(ba002.BR2),
ncol = 2,nrow = 2,
top = textGrob("Barplots for variables",
gp = gpar(fontsize = 18, font = 3))
)
}
if(fig==3){
grid.arrange(
as.grob(ctrJ.1),
as.grob(ctrJ.2),
as.grob(ctrJ.3),
as.grob(ba001.BR1),
as.grob(ba002.BR2),
as.grob(ba003.BR3),
ncol = 2,nrow = 3,
top = textGrob("Barplots for variables",
gp = gpar(fontsize = 18, font = 3))
)
}
}
2.8 Bins Helper Functions
Required parameters: quantitative data
Output: qualitative data with trisection
bins_helper <- function(DATA, name = "Variable") {
data <- as.data.frame(DATA)
tp <- quantile(as.numeric(data$DATA),seq(0,1,1/3))
ac = as.data.frame(table(data))
if (tp[3] < tp[4]){
if (ac[1,"Freq"] > 3){low <- which(data< tp[2])
high <- which(data > tp[3])
medium <- which(data >= tp[2] & data <=tp[3])
data[low,] <- 0
data[medium,] <- 1
data[high,] <-2
}else{ low <- which(data <= tp[2])
high <- which(data > tp[3])
medium <- which(data > tp[2] & data <=tp[3])
data[low,] <- 0
data[medium,] <- 1
data[high,] <-2}
}else{low <-which(data < tp[2])
high <- which(data >= tp[3])
medium <- which(data >= tp[2] & data <tp[3])
data[low,] <- 0
data[medium,] <- 1
data[high,] <-2}
a.c = as.data.frame(table(data))
colnames(a.c) <- c("Value","Freq")
rownames(a.c) <- c("Level_1", "Level_2", "Level_3")
#print(a.c)
spearman.col.score <- cor(DATA, as.numeric(data$DATA),
method = "spearman")
cat(name, "spearman r:", spearman.col.score, "\n")
return(data)
}
2.9 Plotting Latent Variable Plot
Required parameters: DESIGN, lx(from PLS), ly(from PLS), dimension(default = 1)
Output: LV plot
plot.lv <- function(DESIGN, lx, ly, d=1 ){
color.dot <- as.matrix(DESIGN)
na.de <- as.matrix(levels(DESIGN))
num.de <- length(na.de)
for (i in 1: num.de){
color.dot[which(color.dot == na.de[i])] <- index[i]
}
color.dot <- as.matrix(color.dot)
rownames(color.dot) <- DESIGN
latvar <- cbind(lx[,d],ly[,d])
colnames(latvar) <- c(paste0("Lx ", d),
paste0("Ly ", d))
# compute means
lv.group <- getMeans(latvar,DESIGN)
# get bootstrap intervals of groups
lv.group.boot <- Boot4Mean(latvar, DESIGN)
colnames(lv.group.boot$BootCube) <- c(paste0("Lx ", d),
paste0("Ly ", d))
plot.lv <- createFactorMap(latvar,
col.points = color.dot,
col.labels = color.dot,
title = c("Latent Variable Map", d),
alpha.points = 0.2
)
ind <- c(1:num.de)
for (i in 1:num.de){
inde <- match(rownames(color.dot),
sort(rownames(lv.group)))
ind[i] <- which(inde == i)[1]
}
grp.ind <- ind
plot.mean <- createFactorMap(lv.group,
col.points = color.dot[grp.ind],
col.labels = color.dot[grp.ind],
cex = 4,
pch = 17,
alpha.points = 0.8)
plot.meanCI <- MakeCIEllipses(lv.group.boot$BootCube[,c(1:2),],
col = color.dot[grp.ind],
names.of.factors = c(paste0("Lx ", d),
paste0("Ly ", d)))
plot.lv.pls <- plot.lv$zeMap_background +
plot.meanCI +
plot.lv$zeMap_dots +
plot.mean$zeMap_dots +
plot.mean$zeMap_text
plot.lv.pls
}
2.10 Plotting Partial Factor Scores
Required parameters: DESIGN, fs(factor scores), eigs(eigenvalue), tau(explained variance), dimension(default = 1), partial.fi.array(partial matrix), mm(how many sub data sets)
Output: Partial Factor Scores Map
plot.partial.fs <- function(DESIGN, fs,
eigs, tau,
d, partial.fi.array, mm ){
# get some colors
color.dot <- as.matrix(DESIGN)
na.de <- as.matrix(levels(DESIGN))
num.de <- length(na.de)
for (i in 1: num.de){
color.dot[which(color.dot == na.de[i])] <- index[i]
}
color.dot <- as.matrix(color.dot)
rownames(color.dot) <- DESIGN
# Factor Map
factor.graph.out <- createFactorMap(
X = fs,
axis = d, axis2 = (d+1),
col.points = color.dot,
col.labels = color.dot,
title = "MFA_Partial_Factor_Map",
display.labels = FALSE,
alpha.points = 0.3,
alpha.labels = 0.3
)
# Labels of Factor Map
labels4RV <- createxyLabels.gen(d, (d+1),
lambda = eigs,
tau = tau,
axisName = "Dimension "
)
# Groups
fm.tmp <- aggregate(fs, list(DESIGN), mean)
fm <- fm.tmp[,2:ncol(fm.tmp)]
rownames(fm) <- fm.tmp[,1]
ind <- c(1:num.de)
for (i in 1:num.de){
inde <- match(rownames(color.dot), sort(rownames(fm)))
ind[i] <- which(inde == i)[1]
}
grp.ind <- ind
rownames(fm[sort(rownames(fm)),]) <- sub("[[:punct:]]","",
rownames(fm[sort(rownames(fm)),]))
MapGroup <- PTCA4CATA::createFactorMap(fm[sort(rownames(fm)),],
axis1 = d,
axis2 = (d+1),
constraints = factor.graph.out$constraints,
col.points = color.dot[grp.ind],
cex = 4, # size of the dot (bigger)
col.labels = color.dot[grp.ind],
text.cex = 4,
alpha.points = 0.5)
# partial factor
partial.fi.array <- partial.fi.array
MFA.fi.groups.means <- PTCA4CATA::getMeans(
fs,
DESIGN)
colnames(MFA.fi.groups.means) <- paste0("Dimension ",
1:ncol(MFA.fi.groups.means))
# new array
MFA.partial.fi.groups.means <- array(0,dim=c(nrow(MFA.fi.groups.means),
ncol(MFA.fi.groups.means),
mm))
# for loop to calculate mean
for (i in 1:mm){
MFA.partial.fi.groups.means[,,i] <- as.matrix(PTCA4CATA::getMeans(
partial.fi.array[,,i],
DESIGN))
}
# setting dimnames
dimnames(MFA.partial.fi.groups.means) <- list(rownames(MFA.fi.groups.means),
colnames(MFA.fi.groups.means),
paste0(LETTERS[1:mm], 1:mm))
map4PFS <- createPartialFactorScoresMap(
factorScores = MFA.fi.groups.means,
partialFactorScores = MFA.partial.fi.groups.means,
axis1 = d, axis2 = (d+1),
colors4Items = color.dot,
colors4Blocks = ,
size.points = 1.5,
alpha.points = 0.7,
alpha.lines = 0.9,
size.labels = 3,
names4Partial = dimnames(MFA.partial.fi.groups.means)[[3]],
font.labels = 'bold')
partialFS.map.a <- factor.graph.out$zeMap_background +
factor.graph.out$zeMap_dots +
factor.graph.out$zeMap_text +
MapGroup$zeMap_dots +
MapGroup$zeMap_text +
map4PFS$mapColByItems + labels4RV
partialFS.map.b <- factor.graph.out$zeMap_background +
map4PFS$mapColByBlocks +
labels4RV +
MapGroup$zeMap_dots +
MapGroup$zeMap_text
partialFS.map.b
}