Chapter 2 Functions Introduction

In this Chapter, I list some of important functions for plotting. These functions are mainly based on packages PTCA4CATA and data4PCCAR, which are developed by Dr. Herve Abdi and his team at The University of Texas at Dallas. These functions will provide computational convenience to my plotting and visualization of my results. It is worth to mention that these functions are designed to generate plots so users should conduct MSA before using these functions. For how to conduct MSA in R, please see each chapter in this book: PCA⁴, CA⁵, MCA⁸, BADA⁷, DiCA⁶, PLS-C⁹, DiSTATIS¹⁰ and MFA¹¹. For further information about the two basic packages, please visit Dr. Abdi’s github

2.1 Plotting Scree plot

The first function is designed to plot scree plot:

Required parameters: eigs(eigenvalue), p.vals(inference p values)

Output: Scree plot

plot.scree <- function(eigs, p.vals){
  PlotScree(ev = eigs,
  p.ev = p.vals,
  plotKaiser = TRUE,
  title = "Explained Variance per Dimension")
}

2.2 Plotting Permutation test results

The second function is designed to plot the permutation histogram:

Required parameters: eigs.perm(eigenvalue.permutation), eigs(eigenvalue), para1(x_lim for X axis, default = 5), para2(intervals of distribution, default = 20), Dim(number of dimension, default = 1).

Output: Permutation Plots

plot.permutation <- function(eigs.perm, eigs, 
                             para1 = 5,  para2 = 20, 
                             Dim = 1){
  zeDim = Dim
  pH <- prettyHist(
    distribution = eigs.perm[,zeDim], 
    observed = eigs[zeDim], 
    xlim = c(0,para1), # needs to be set by hand
    breaks = para2,
    border = "gray", 
    main = paste0("Permutation Test for Eigenvalue ",zeDim),
    xlab = paste0("Eigenvalue ",zeDim), 
    ylab = "", 
    counts = FALSE, 
    cutoffs = c( 0.975))
}

2.3 Plotting Heatmap

Required parameters: data1(data set 1), data2(data set 2), xcol(color of labels in X axis), ycol(color of labels in X axis), textsize(font size of the labels, default = 5)

Output: Heatmap of correlation between two data sets.

plot.heatmap <- function(data1, data2, 
                         DATA=NULL, xcol = NULL, 
                         ycol = NULL, textsize = 5, 
                         scale = TRUE ){
  if(is.null(DATA)){
  data1 <- as.matrix(data1)
  data2 <- as.matrix(data2)
  DATA = cor(data1, data2)
  }
  if(is.null(xcol)){
    xcol <- prettyGraphsColorSelection(
      n.colors = ncol(DATA))
  }
  if(is.null(ycol)){
    ycol <- prettyGraphsColorSelection(
      n.colors = nrow(DATA))
  }
  hmap <- superheat(DATA, 
            heat.pal = c("brown", "white","red4"),
            left.label.size = 0.5,
            bottom.label.size = 0.5,
            bottom.label.text.angle = 90,
            left.label.text.alignment = "right",
            bottom.label.text.alignment = "right", 
            bottom.label.text.col = xcol,
            left.label.text.col = ycol,
            grid.vline = FALSE,
            grid.hline = FALSE, 
            bottom.label.col = "white", 
            left.label.col = "white", 
            scale = scale,
            left.label.text.size = textsize,
            bottom.label.text.size = textsize,
            )
}

2.4 Plotting Row Factor Scores

Required parameters: DESIGN(the grouping design of observation, such as Lakers is group1 and Celtics is group2), fs(factor scores), eigenvalues, tau(Kendall rank correlation coefficient), d(dimension, default = 1), mode(do you want “hull” or “CI”?), method(The string you want to add on your graph’s title).

Output: Global row factor scores map for each observation

plot.fs <- function(DESIGN, fs, eigs, 
                    tau, d = 1, mode="CI",
                    method = " ")
{
  fs <- as.matrix(fs)
  # GETMEANS
  fm.tmp <- aggregate(fs, list(DESIGN), mean)
  fm <- fm.tmp[,2:ncol(fm.tmp)]
  rownames(fm) <- fm.tmp[,1]
  tau <- round(tau)
  color.dot <- as.matrix(DESIGN)
  na.de <- as.matrix(levels(DESIGN))
  num.de <- length(na.de)
  for (i in 1: num.de){
    color.dot[which(color.dot == na.de[i])] <- index[i]
  }
  color.dot <- as.matrix(color.dot)
  rownames(color.dot) <- DESIGN
  fs.plot <- createFactorMap(fs, # data
                           title = paste0("Factor Scores_",
                                          method), 
                           alpha.points = 0.3,
                           axis1 = d, axis2 = (d+1), 
                           pch = 19, 
                           cex = 2,
                           text.cex = 2.5, 
                           display.labels = FALSE,
                           col.points = color.dot, 
                           col.labels =  color.dot
 )
  fs.label <- createxyLabels.gen(d,(d+1),
                               lambda = eigs,
                               tau = tau,
                               axisName = "Component "
 )
  ind <- c(1:num.de)
  for (i in 1:num.de){
    inde <- match(rownames(color.dot), 
                  sort(rownames(fm)))
    ind[i] <- which(inde == i)[1]
  }
  grp.ind <- ind
  rownames(fm[sort(rownames(fm)),]) <- sub("[[:punct:]]","",
                                       rownames(fm[sort(rownames(fm)),]))
  
  graphs.means <- PTCA4CATA:: createFactorMap(fm[sort(rownames(fm)),],
                                 axis1 = d, axis2 =(d+1), 
                                 constraints = fs.plot$constraints,
                                 col.points = color.dot[grp.ind],
                                 col.labels = color.dot[grp.ind],
                                 alpha.points = 0.9)
  
  BootCube <- PTCA4CATA::Boot4Mean(fs, 
                                 design = as.factor(DESIGN), 
                                 niter = 100)

  dimnames(BootCube$BootCube)[[2]] <- paste0("dim ", 
                                      1:dim(BootCube$BootCube)[[2]])
  
  boot.elli <- MakeCIEllipses(data =
                                BootCube$BootCube[,d:(d+1),][sort(rownames(BootCube$BootCube[,d:(d+1),])),,],
                  names.of.factors = 
                  c(paste0("Dimension ", d),paste0("Dimension ", 
                  (d+1))),
                  col = color.dot[grp.ind],
                  alpha.line = 0.3,
                  alpha.ellipse = 0.3
 )
  
  # with Hull
  colnames(fs) <- paste0('Dimension ', 1:ncol(fs))
  # getting the color correct: an ugly trick
  #col.groups <- as.data.frame(col.groups)
  DESIGN <- factor(DESIGN, levels = DESIGN[grp.ind])
  GraphHull <- PTCA4CATA::MakeToleranceIntervals(fs, 
               axis1 = d, 
               axis2 = (d+1),
               design = DESIGN,
               col = color.dot[grp.ind],
               names.of.factors =  c(paste0("Dim ", d),
                                     paste0("Dim 2", (d+1))),
               p.level = 1.00)
  
  if (mode == "hull"){
  factor.map <- fs.plot$zeMap_background +
    GraphHull + 
    fs.plot$zeMap_dots + 
    fs.label +  
    graphs.means$zeMap_dots + 
    graphs.means$zeMap_text}
  if (mode == "CI"){
    factor.map <- fs.plot$zeMap_background +
    boot.elli + 
    fs.plot$zeMap_dots + 
    fs.label +  
    graphs.means$zeMap_dots + 
    graphs.means$zeMap_text}
  factor.map
}

2.5 Plotting Column Plot

Required parameters: fj(column factor scores), eigs(eigenvalue), tau(Kendall rank correlation coefficient), d(dimension, default = 1), col(color coding of variables), method(title names)

Output: column factor scores plot.

plot.cfs <- function(fj, eigs, 
                     tau, d = 1, 
                     col = NULL, 
                     method = " ", 
                     colrow = "row")
  {
  if(colrow == "row"){
    cr <- nrow(fj)
  }
  if(colrow == "col"){
    cr <- ncol(fj)
  }
  if(is.null(col)){
    col <- prettyGraphs::prettyGraphsColorSelection(cr)
  }
  fj.labels <- createxyLabels.gen(d,(d+1),
                             lambda = eigs,
                             tau = round(tau),
                             axisName = "Component "
                             )
  fj.plot <- createFactorMap(fj, # data
               title = paste0("Factor Scores", method),
               axis1 = d, 
               axis2 = (d+1), 
               pch = 19, 
               cex = 2, 
               text.cex = 3, 
               col.points = col, 
               col.labels = col 
                            )
  column.factor.map <- fj.plot$zeMap_background + 
    fj.plot$zeMap_dots + 
    fj.plot$zeMap_text + 
    fj.labels
  column.factor.map
}

2.6 Plotting Loading Plot

Required parameters: data(raw data), col(color coding), fs(factor scores), eigs(eigenvalue), tau(Kendall rank correlation coefficient), d(dimension, default = 1)

Output: Loading plot for each variable.

plot.loading <- function(data, col=NULL, fs, eigs, tau, d=1){
  if (is.null(col)){
    col <- prettyGraphsColorSelection(n.colors = ncol(data))
  }
  cor.loading <- cor(data, fs)
  rownames(cor.loading) <- rownames(cor.loading)
  fi.labels <- createxyLabels.gen(d,(d+1),
                     lambda = eigs,
                     tau =  round(tau),
                     axisName = "Component "
  )
  loading.plot <- createFactorMap(cor.loading,
                     axis1 = d, 
                     axis2 = (d+1),
                     constraints = list(minx = -1, miny = -1,
                                         maxx = 1, maxy = 1),
                     col.points = col,
                     col.labels = col)
  LoadingMapWithCircles <-  loading.plot$zeMap_background + 
    loading.plot$zeMap_dots +
    addArrows(cor.loading[,d:(d+1)], color = col) +
    addCircleOfCor() +
    loading.plot$zeMap_text +  
    fi.labels
  LoadingMapWithCircles
}

2.7 Plotting Contribution Barplot

Required parameters: cj(contribution of each variable), fj(factor scores of each variable), col(color coding), boot.ratios(bootstrap ratio), signiOnly(choose to only display sigificant contribution or not, default = FALSE), fig(how many plots to display, default = 2)

Output: Combination of contribution barplots.

plot.cb <- function(cj, fj, col = NULL, 
                    boot.ratios, signifOnly = FALSE, 
                    fig = 2, horizontal = FALSE, 
                    colrow = "col", fontsize = 3)
  {
  if (colrow == "col"){
    cr <- ncol(cj)
  }
  if (colrow == "row"){
    cr <- nrow(cj)
  }
  # Contribution Plot
  ### plot contributions for component 1
  if (is.null(col)){
      col <- prettyGraphs::prettyGraphsColorSelection(cr)
  }
  signed.ctrJ <- cj * sign(fj)
  laDim = 1
  ctrJ.1 <- PrettyBarPlot2(signed.ctrJ[,laDim],
            threshold = 1 / NROW(signed.ctrJ),
            font.size = fontsize,
            signifOnly = signifOnly,
            horizontal = horizontal,
            color4bar = col, # we need hex code
            main = 'Variable Contributions (Signed)',
            ylab = paste0('Contributions Barplot',
                           laDim),
            ylim = c(1.2*min(signed.ctrJ), 
            1.2*max(signed.ctrJ))
    ) + ggtitle("Contribution",subtitle = paste0('Component ', laDim))
  
  ### plot contributions for component 2
  laDim =2
  ctrJ.2 <- PrettyBarPlot2(signed.ctrJ[,laDim],
               threshold = 1 / NROW(signed.ctrJ),
               font.size = fontsize,
               color4bar = col, # we need hex code
               signifOnly =signifOnly,
               horizontal = horizontal,
               main = 'Variable Contributions (Signed)',
               ylab = paste0('Contributions Barplot',laDim),
               ylim = c(1.2*min(signed.ctrJ),1.2*max(signed.ctrJ))
  )+ ggtitle("",subtitle = paste0('Component ', laDim))
  laDim =3
  ctrJ.3 <- PrettyBarPlot2(signed.ctrJ[,laDim],
                  threshold = 1 / NROW(signed.ctrJ),
                  font.size = fontsize,
                  color4bar = col, # we need hex code
                  signifOnly = signifOnly,                             
                  horizontal = horizontal,
                  main = 'Variable Contributions (Signed)',
                  ylab = paste0('Contributions Barplot',laDim),
                  ylim = c(1.2*min(signed.ctrJ), 
                                    1.2*max(signed.ctrJ))
  )+ ggtitle("",subtitle = paste0('Component ', laDim))
  

  
  ### Bootstrap of columns vectors
  BR <- boot.ratios
  laDim = 1
  
  # Plot the bootstrap ratios for Dimension 1
  ba001.BR1 <- PrettyBarPlot2(BR[,laDim],
                 threshold = 2,
                 font.size = fontsize,
                 signifOnly = signifOnly,
                 horizontal = horizontal,
                 color4bar = col, # we need hex code
                 ylab = 'Bootstrap ratios',
                 ylim = c(1.2*min(BR[,laDim]), 
                                       1.2*max(BR[,laDim]))
  ) + ggtitle("Bootstrap ratios", 
              subtitle = paste0('Component ', laDim))
  
  # Plot the bootstrap ratios for Dimension 2
  laDim = 2
  ba002.BR2 <- PrettyBarPlot2(BR[,laDim],
                 threshold = 2,
                 font.size = fontsize,
                 signifOnly = signifOnly,                             
                 horizontal = horizontal,
                 color4bar = col, # we need hex code
                 ylab = 'Bootstrap ratios',
                 ylim = c(1.2*min(BR[,laDim]), 
                          1.2*max(BR[,laDim]))
  ) + ggtitle("",subtitle = paste0('Component ', laDim))
  
  laDim = 3
  ba003.BR3 <- PrettyBarPlot2(BR[,laDim],
                              threshold = 2,
                              font.size = fontsize,
                              signifOnly = signifOnly,                             
                              horizontal = horizontal,
                              color4bar = col,
                              ylab = 'Bootstrap ratios',
                              ylim = c(1.2*min(BR[,laDim]), 
                                       1.2*max(BR[,laDim]))
  ) + ggtitle("",subtitle = paste0('Component ', laDim))
  
  if(fig == 2){
    grid.arrange(
    as.grob(ctrJ.1),
    as.grob(ctrJ.2),
    as.grob(ba001.BR1),
    as.grob(ba002.BR2),
    ncol = 2,nrow = 2,
    top = textGrob("Barplots for variables", 
                   gp = gpar(fontsize = 18, font = 3))
  )
  }
  if(fig==3){
    grid.arrange(
    as.grob(ctrJ.1),
    as.grob(ctrJ.2),
    as.grob(ctrJ.3),
    as.grob(ba001.BR1),
    as.grob(ba002.BR2),
    as.grob(ba003.BR3),
    ncol = 2,nrow = 3,
    top = textGrob("Barplots for variables", 
                   gp = gpar(fontsize = 18, font = 3))
  )
  }
}

2.8 Bins Helper Functions

Required parameters: quantitative data

Output: qualitative data with trisection

bins_helper <- function(DATA, name = "Variable") {
  data <- as.data.frame(DATA)
  tp <- quantile(as.numeric(data$DATA),seq(0,1,1/3))
  ac = as.data.frame(table(data))
  if (tp[3] < tp[4]){
    if (ac[1,"Freq"] > 3){low <- which(data< tp[2])
    high <- which(data > tp[3])
    medium <- which(data  >= tp[2] & data <=tp[3])
    data[low,] <- 0
    data[medium,] <- 1
    data[high,] <-2
    }else{ low <- which(data <= tp[2])
      high <- which(data > tp[3])
      medium <- which(data > tp[2] & data <=tp[3])
      data[low,] <- 0
      data[medium,] <- 1
      data[high,] <-2}
  }else{low <-which(data < tp[2])
    high <- which(data >= tp[3])
    medium <- which(data >= tp[2] & data <tp[3]) 
    data[low,] <- 0
    data[medium,] <- 1
    data[high,] <-2}
  a.c = as.data.frame(table(data))
  colnames(a.c) <- c("Value","Freq")
  rownames(a.c) <- c("Level_1", "Level_2", "Level_3")
  #print(a.c)
  spearman.col.score <- cor(DATA, as.numeric(data$DATA), 
                            method = "spearman")
  cat(name, "spearman r:", spearman.col.score, "\n")
  return(data)
}

2.9 Plotting Latent Variable Plot

Required parameters: DESIGN, lx(from PLS), ly(from PLS), dimension(default = 1)

Output: LV plot

plot.lv <- function(DESIGN, lx, ly, d=1 ){
  color.dot <- as.matrix(DESIGN)
  na.de <- as.matrix(levels(DESIGN))
  num.de <- length(na.de)
  for (i in 1: num.de){
    color.dot[which(color.dot == na.de[i])] <- index[i]
  }
  color.dot <- as.matrix(color.dot)
  rownames(color.dot) <- DESIGN
  latvar <- cbind(lx[,d],ly[,d])
  colnames(latvar) <- c(paste0("Lx ", d), 
                        paste0("Ly ", d))
  
  # compute means
  lv.group <- getMeans(latvar,DESIGN)
  
  # get bootstrap intervals of groups
  lv.group.boot <- Boot4Mean(latvar, DESIGN)
  colnames(lv.group.boot$BootCube) <- c(paste0("Lx ", d), 
                                        paste0("Ly ", d))
  
  
  plot.lv <- createFactorMap(latvar,
             col.points = color.dot,
             col.labels = color.dot,
             title = c("Latent Variable Map", d),
                       alpha.points = 0.2
  )
  ind <- c(1:num.de)
  for (i in 1:num.de){
    inde <- match(rownames(color.dot), 
                  sort(rownames(lv.group)))
    ind[i] <- which(inde == i)[1]
  }
  grp.ind <- ind
  plot.mean <- createFactorMap(lv.group,
             col.points = color.dot[grp.ind],
             col.labels = color.dot[grp.ind],
             cex = 4,
             pch = 17,
             alpha.points = 0.8)
  
  plot.meanCI <- MakeCIEllipses(lv.group.boot$BootCube[,c(1:2),], 
                     col = color.dot[grp.ind],
                     names.of.factors = c(paste0("Lx ", d), 
                                          paste0("Ly ", d)))
  
  plot.lv.pls <- plot.lv$zeMap_background + 
    plot.meanCI + 
    plot.lv$zeMap_dots + 
    plot.mean$zeMap_dots + 
    plot.mean$zeMap_text

  plot.lv.pls
}

2.10 Plotting Partial Factor Scores

Required parameters: DESIGN, fs(factor scores), eigs(eigenvalue), tau(explained variance), dimension(default = 1), partial.fi.array(partial matrix), mm(how many sub data sets)

Output: Partial Factor Scores Map

plot.partial.fs <- function(DESIGN, fs, 
                            eigs, tau, 
                            d, partial.fi.array, mm ){
  # get some colors
  color.dot <- as.matrix(DESIGN)
  na.de <- as.matrix(levels(DESIGN))
  num.de <- length(na.de)
  for (i in 1: num.de){
    color.dot[which(color.dot == na.de[i])] <- index[i]
  }
  color.dot <- as.matrix(color.dot)
  rownames(color.dot) <- DESIGN

  # Factor Map
  factor.graph.out <- createFactorMap(
    X = fs,
    axis = d, axis2 = (d+1),
    col.points = color.dot,
    col.labels = color.dot,
    title = "MFA_Partial_Factor_Map",
    display.labels = FALSE,
    alpha.points = 0.3,
    alpha.labels = 0.3
  )
  # Labels of Factor Map
  labels4RV <- createxyLabels.gen(d, (d+1),
    lambda = eigs,
    tau = tau,
    axisName = "Dimension "
  )
  # Groups
  fm.tmp <- aggregate(fs, list(DESIGN), mean)
  fm <- fm.tmp[,2:ncol(fm.tmp)]
  rownames(fm) <- fm.tmp[,1]
  ind <- c(1:num.de)
  for (i in 1:num.de){
    inde <- match(rownames(color.dot), sort(rownames(fm)))
    ind[i] <- which(inde == i)[1]
  }
  grp.ind <- ind
  rownames(fm[sort(rownames(fm)),]) <- sub("[[:punct:]]","",
                      rownames(fm[sort(rownames(fm)),]))
  
  MapGroup <- PTCA4CATA::createFactorMap(fm[sort(rownames(fm)),],
                            axis1 = d,
                            axis2 = (d+1),
                            constraints = factor.graph.out$constraints,
                            col.points = color.dot[grp.ind],
                            cex = 4,  # size of the dot (bigger)
                            col.labels = color.dot[grp.ind],
                            text.cex = 4,
                            alpha.points = 0.5)
    
  # partial factor
  partial.fi.array <- partial.fi.array
  MFA.fi.groups.means <- PTCA4CATA::getMeans(
    fs, 
    DESIGN)
  colnames(MFA.fi.groups.means) <- paste0("Dimension ", 
                          1:ncol(MFA.fi.groups.means))
  # new array
  MFA.partial.fi.groups.means <- array(0,dim=c(nrow(MFA.fi.groups.means),
                                               ncol(MFA.fi.groups.means), 
                                               mm))
  # for loop to calculate mean
  for (i in 1:mm){
    MFA.partial.fi.groups.means[,,i] <- as.matrix(PTCA4CATA::getMeans(
      partial.fi.array[,,i], 
      DESIGN))
  }
  # setting dimnames
  dimnames(MFA.partial.fi.groups.means) <- list(rownames(MFA.fi.groups.means),
                                                colnames(MFA.fi.groups.means), 
                                                paste0(LETTERS[1:mm], 1:mm))
  
  map4PFS <- createPartialFactorScoresMap(
    factorScores = MFA.fi.groups.means,
    partialFactorScores = MFA.partial.fi.groups.means,
    axis1 = d, axis2 = (d+1),
    colors4Items = color.dot,
    colors4Blocks = ,
    size.points = 1.5,
    alpha.points = 0.7,
    alpha.lines = 0.9,
    size.labels = 3,
    names4Partial = dimnames(MFA.partial.fi.groups.means)[[3]],
    font.labels = 'bold')
  
  partialFS.map.a <- factor.graph.out$zeMap_background + 
    factor.graph.out$zeMap_dots +
    factor.graph.out$zeMap_text +
    MapGroup$zeMap_dots + 
    MapGroup$zeMap_text + 
    map4PFS$mapColByItems + labels4RV
  partialFS.map.b <- factor.graph.out$zeMap_background + 
    map4PFS$mapColByBlocks + 
    labels4RV + 
    MapGroup$zeMap_dots + 
    MapGroup$zeMap_text

  partialFS.map.b
}