rENA coverage - 56.50%

Files
Source

#' Title
#'
#' @param set TBD
#' @param dimension_name_1 TBD
#' @param dimension_name_2 TBD
#'
#' @return TBD
#' @export
move_nodes_to_unit_circle<-function(
    set,
    dimension_name_1 = colnames(as.matrix(set$rotation$nodes))[1],
    dimension_name_2 = colnames(as.matrix(set$rotation$nodes))[2]
) {
  # get node position on the specified two dimensions
  dimension_names = c(dimension_name_1,dimension_name_2)
  node_position = set$rotation$nodes[,..dimension_names]
  # compute the length of each node position vector on the two dimensional plane
  length_list = sqrt(node_position[,1]^2+node_position[,2]^2)
  # compute the re-scaling coefficient for each non-zero node vector
  non_zero_lengths = which(length_list!=0)
  length_list[non_zero_lengths] = max(length_list)/length_list[non_zero_lengths]
  # move nodes to the circle
  for(i in non_zero_lengths)
  {
    set$rotation$nodes[[dimension_name_1]][i]=as.numeric(set$rotation$nodes[[dimension_name_1]][i]*length_list[i])
    set$rotation$nodes[[dimension_name_2]][i]= as.numeric(set$rotation$nodes[[dimension_name_2]][i]*length_list[i])
  }
  # compute the node weights so that the centroids could be computed
  codeNames = set$rotation$codes
  row_counts = set$connection.counts
  node_weights = data.frame(matrix(0,nrow=nrow(row_counts),ncol=length(codeNames)))

  for(i in 1:(length(codeNames)-1))
  {
    for(j in (i+1):length(codeNames))
    {
      connection_name = paste0(codeNames[i]," & ",codeNames[j])
      x = row_counts[,..connection_name]/2
      node_weights[,i]=node_weights[,i]+x
      node_weights[,j]=node_weights[,j]+x
    }
  }
  rs = rowSums(node_weights)
  rs_1 = which(rs!=0)
  node_weights[rs_1,]=node_weights[rs_1,]/rs[rs_1]
  # finally, recompute centroids
  centroids = as.matrix(node_weights)%*%as.matrix(set$rotation$nodes)
  for(j in 1:ncol(centroids))
  {
    set$model$centroids[,j+1] = centroids[,j]
  }
  return(set)
}


#' Title
#'
#' @param set TBD
#' @param dimension_name_1 TBD
#' @param dimension_name_2 TBD
#'
#' @return TBD
#' @export
move_nodes_to_unit_circle_with_equal_space <- function(
  set,
  dimension_name_1 = colnames(as.matrix(set$rotation$nodes))[1],
  dimension_name_2 = colnames(as.matrix(set$rotation$nodes))[2]
) {
  # get node position on the specified two dimensions
  dimension_names = c(dimension_name_1,dimension_name_2)
  node_position = set$rotation$nodes[,..dimension_names]

  # compute the length of each node position vector on the two dimensional plane
  length_list = sqrt(node_position[,1]^2+node_position[,2]^2)

  # find non-zero node positions
  non_zero_lengths = which(length_list!=0)
  node_position_non_zero = node_position[non_zero_lengths,]

  # divide the angle
  rotation_angle = 2*pi/nrow(node_position_non_zero)

  # order the nodes along the circle
  node_position_non_zero$id = c(1:nrow(node_position_non_zero))
  node_position_non_zero_upper = node_position_non_zero[which(node_position_non_zero[[dimension_name_2]]>=0),]
  node_position_non_zero_lower = node_position_non_zero[which(node_position_non_zero[[dimension_name_2]]<0),]
  node_position_non_zero_upper = node_position_non_zero_upper[order(node_position_non_zero_upper[[dimension_name_1]],decreasing = TRUE),]
  node_position_non_zero_lower = node_position_non_zero_lower[order(node_position_non_zero_lower[[dimension_name_1]],decreasing = FALSE),]
  node_position_non_zero_sorted = rbind(node_position_non_zero_upper,node_position_non_zero_lower)

  # find which has the max length
  max_i = which(length_list[non_zero_lengths]==max(length_list))[1]
  first_i = which(node_position_non_zero_sorted$id==max_i)[1]

  # find the coordinates of the fixed node
  x1=node_position_non_zero_sorted[[dimension_name_1]][first_i];
  y1=node_position_non_zero_sorted[[dimension_name_2]][first_i];

  # rotate the ordered nodes
  for(i in 1:nrow(node_position_non_zero_sorted)) {
    ind = (first_i+i-1)%%nrow(node_position_non_zero_sorted)

    if(ind==0) {
      ind = nrow(node_position_non_zero_sorted)
    }

    angle = (i-1)*rotation_angle
    x2 = x1*cos(angle)-y1*sin(angle)
    y2 = x1*sin(angle)+y1*cos(angle)
    node_position_non_zero_sorted[[dimension_name_1]][ind]=x2
    node_position_non_zero_sorted[[dimension_name_2]][ind]=y2
  }
  # match the order of the original data
  node_position_non_zero_sorted = node_position_non_zero_sorted[order(node_position_non_zero_sorted$id,decreasing = FALSE),]
  node_position[non_zero_lengths,]=node_position_non_zero_sorted[,..dimension_names]
  set$rotation$nodes[,dimension_names]=node_position

  # compute the node weights so that the centroids could be computed
  codeNames = set$rotation$codes
  row_counts = set$connection.counts
  node_weights = data.frame(matrix(0,nrow=nrow(row_counts),ncol=length(codeNames)))

  for(i in 1:(length(codeNames)-1)) {
    for(j in (i+1):length(codeNames)) {
      connection_name = paste0(codeNames[i]," & ",codeNames[j])
      x = row_counts[,..connection_name]/2
      node_weights[,i]=node_weights[,i]+x
      node_weights[,j]=node_weights[,j]+x
    }
  }

  rs = rowSums(node_weights)
  rs_1 = which(rs!=0)
  node_weights[rs_1,]=node_weights[rs_1,]/rs[rs_1]

  # finally, recompute centroids
  centroids = as.matrix(node_weights)%*%as.matrix(set$rotation$nodes)

  for(j in 1:ncol(centroids)) {
    set$model$centroids[,j+1] = centroids[,j]
  }

  return(set);
}

#' @title with.ena.matrix
#' @description This function sets up a context using the provided data (typically an ENA matrix),
#' allowing the evaluation of an expression (`expr`) with access to both the matrix and
#' its metadata. Optionally, a custom matrix `V` and other arguments can be supplied.
#'
#' @param data An ENA matrix or data frame containing the data to be used.
#' @param expr An R expression to be evaluated within the context of the ENA matrix.
#' @param ... Additional arguments, including an optional custom matrix `V` and other parameters.
#'
#' @details
#' - If a custom matrix `V` is provided in `...`, it will be used; otherwise, `data` is converted to a matrix.
#' - Metadata columns are coerced to numeric if they are character vectors.
#' - The expression is evaluated with access to both the matrix (`V`) and metadata.
#'
#' @return The result of evaluating `expr` in the constructed context.
#'
#' @export
with.ena.matrix <- function(data, expr, ...) {
  dot_args <- list(...);

  # Points
  V <- NULL;
  if(length(dot_args) > 0 && !is.null(dot_args$V)) {
    print("- using custom V matrix")
    V <- dot_args$V;
  }
  else {
    V <- as.matrix(data);
  }

  # Meta data
  x <- unclass(data);
  l <- lapply(x, function(i_val) {
    # i_val <- get(i);
    if(is.character(i_val)) {
      i_val <- as.numeric(as.factor(i_val));
    }
    return(i_val);
  });

  # frm <- dot_args$frm;
  # if(!is(frm, "formula")) {
  #   frm <- formula(frm);
  # }

  l$V <- V;
  # with(l, {
  #   lm(formula = frm)
  # })

  ll <- c(l, dot_args);
  eval(substitute(expr), ll, enclos = parent.frame());
  # lm(formula = frm, data = l)
}

###
#' @title ENA Rotate by regression (second way)
#'
#' @description This function allows user to provide a regression formula for rotation on x and optionally on y.
#'    If regression formula for y is not provide, svd is applied to the residual data deflated by x to get y coordinates.
#'    The regression formula should use ENA points as major predictors and a binary or numerical variable as dependent variable.
#'    Control and interaction variables are allowed to be included as predictors in the formula.
#'
#' @param enaset An \code{\link{ENAset}}
#' @param params list of parameters, may include:
#'     x_var: Regression formula for x direction, such as "lm(formula= Condition ~ V + GameHalf + Condition : GameHalf)",
#'     where V always stands for the ENA points.
#'     y_var: Regression formula, similar to x_var for y direction (optional).
#'
#' @export
#' @return \code{\link{ENARotationSet}}
ena.rotate.by.hena.regression_2 = function( enaset, params ) {

  # check arguments
  if ( !is.list(params) || is.null(params$x_var) ) {
    stop("params must be provided as a list() and provide `x_var`")
  }

  x <- formula(params$x_var);

  if (is.null(enaset$points.normed.centered)) {
    p <- as.matrix(enaset$model$points.for.projection);
  }
  else {
    p <- as.matrix(enaset$points.normed.centered);
  }

  #get variables
  V <- as.matrix(p);
  n <- ncol(V);

  #regress to get v1 using x regression formula
  # attach(enaset$meta.data,warn.conflicts = F)
  # v1 <- eval(parse(text = x))$coefficients;
  # v1_res <- with(enaset$model$points.for.projection, NULL, formula = x);
  v1_res <- with.ena.matrix(enaset$model$points.for.projection, {
    prm_var <- params$x_var;
    prm <- if(is.character(prm_var))
        prm_var
      else
        enquote(prm_var)
    ;
    vars <- all.vars(formula(prm));
    all_exists <- sapply(vars, function(x) x == "V" || exists(x))
    if(!all(all_exists)) {
      stop(paste0("The following columns in the formula are not found in the unique metadata for the units: ", paste0(vars[!all_exists], collapse = ", ")))
    }
    lm(formula(prm));
  });
  v1 <- v1_res$coefficients;

  # remove intercept
  if(is.null(dim(v1))) {
    v1 <- v1[2:(n+1)];
  }
  else {
    v1 <- v1[2,];
  }

  # make v1  a unit vector
  norm_v1 <- sqrt(sum(v1 * v1));
  if (norm_v1 != 0) {
    v1 <- v1 / norm_v1;
  }

  # name v1 vector
  if(is.na(all.vars(x)[2])) {
    xName <- names(v1)[1];
  }
  else {
    xName <- all.vars(x)[2];
  }

  # Save v1
  R <- matrix(c(v1), ncol = 1);
  colnames(R) <- c(paste0(xName,"_reg"));

  #deflate matrix by x dimension
  A <- as.matrix(p);
  defA <- as.matrix(A) - as.matrix(A) %*% v1 %*% t(v1);

  #if y formula is given, regress by y formula
  if (!is.null(params$y_var)) {
    y <- formula(params$y_var);

    # regress to get v2 vector using formula y
    V <- defA;

    v2_res <- with.ena.matrix(enaset$model$points.for.projection, {
      prm_var <- params$y_var;
      prm <- if(is.character(prm_var))
        prm_var
      else
        enquote(prm_var)
      ;
      vars <- all.vars(formula(prm));
      all_exists <- sapply(vars, function(x) x == "V" || exists(x))
      if(!all(all_exists)) {
        stop(paste0("The following columns in the formula are not found in the unique metadata for the units: ", paste0(vars[!all_exists], collapse = ", ")))
      }
      lm(formula(prm));
    });
    v2 <- v2_res$coefficients;
    v2 <- v2[2:length(v2)];

    #make v2 a unit vector
    norm_v2 <- sqrt(sum(v2 * v2));

    if (norm_v2 != 0) {
      v2 <- v2 / norm_v2;
    }

    #name v2 vector
    if(is.na(all.vars(y)[2])) {
      yName <- names(v2)[1];
    }
    else {
      yName <- all.vars(y)[2];
    }

    # save both v1 and v2
    R <- cbind(v1, v2);
    colnames(R) <- c(paste0(xName,"_reg"), paste0(yName,"_reg"));

    #deflat by v2
    defA <- as.matrix(defA) - as.matrix(defA) %*% v2 %*% t(v2);
  }

  # get svd for deflated points
  svd_result <- prcomp(defA, retx=FALSE, scale=FALSE, center=FALSE, tol=0);
  svd_v <- svd_result$rotation;

  # Merge rotation vectors
  vcount <- ncol(R);
  colNamesR <- colnames(R);
  combined <- cbind(R, svd_v[, 1:(ncol(svd_v) - vcount)]);
  colnames(combined) <- c(
    colNamesR,
    paste0("SVD", ((vcount + 1):ncol(combined)))
  );

  #create rotation set
  rotation_set <- ENARotationSet$new(
    node.positions = NULL,
    rotation = combined,
    codes = enaset$rotation$codes,
    eigenvalues = NULL
  )

  return(rotation_set);
}


#' @title ENAplot Class
#' 
#' @description
#' The ENAplot R6 class provides a structure for visualizing ENAset objects using plotly.
#' It encapsulates the ENAset data, the plotly visualization, and related plotting parameters.
#'
#' @section Fields:
#' \describe{
#'   \item{enaset}{The \code{\link{ENAset}} object from which the ENAplot was constructed.}
#'   \item{plot}{The plotly object used for data visualization.}
#'   \item{axes}{Axes information for the plot (TBD).}
#'   \item{point}{Point information for the plot (TBD).}
#'   \item{palette}{Color palette used for plotting (TBD).}
#'   \item{plotted}{Indicates whether the plot has been rendered (TBD).}
#' }
#'
#' @examples
#' # Example usage:
#' # enaplot <- ENAplot$new(enaset = myENAset)
#'
#' @docType class
#' @importFrom R6 R6Class
#' @import data.table
#' @export
#'
#' @field enaset - The \code{\link{ENAset}} object from which the ENAplot was constructed
#' @field plot - The plotly object used for data visualization
#' @field axes A list or object specifying the axes configuration for the ENA plot, such as axis labels, limits, or scaling.
#' @field point A structure representing the data points to be plotted, including coordinates and visual properties.
#' @field palette A set of colors or a function defining the color scheme used for plotting elements in the ENA plot.
#' @field plotted A logical or status indicator showing whether the plot has been rendered or updated.
#' @field showticklabels Logical. Indicates whether to show tick labels on the axes.
#' @field autosize Logical. Indicates whether the plot should automatically resize.
#' @field automargin Logical. Indicates whether the plot should automatically adjust margins.
#' @field axispadding Numeric. Padding factor for the axes.
ENAplot = R6::R6Class("ENAplot",

  public = list(

    ## Public Functions ----
      #' Create ENApolot
      #'
      #' @param enaset An ENA set object containing the data to be plotted.
      #' @param title The title of the plot.
      #' @param dimension.labels Labels for the dimensions shown in the plot.
      #' @param font.size Numeric value specifying the font size for plot text.
      #' @param font.color Color value for the plot text.
      #' @param font.family Font family to use for plot text.
      #' @param scale.to Numeric value to scale the plot axes.
      #' @param showticklabels Logical; whether to display axis tick labels.
      #' @param autosize Logical; whether the plot should automatically size itself.
      #' @param automargin Logical; whether the plot should automatically adjust margins.
      #' @param axispadding Numeric value specifying padding around axes.
      #' @param ... Additional arguments passed to the plotting function.      #' 
      #'
      #' @return ENAplot
      initialize = function(
        enaset = NULL,

        title = "ENA Plot",

        dimension.labels = c("",""),

        font.size = 14,
        font.color = "#000000",
        font.family = "Arial",
        scale.to = "network",
        ...
      ) {
        if (is(enaset, "ENAset")) {
          warning(paste0("Usage of ENAset objects will be deprecated ",
            "and potentially removed altogether in future versions."))

          enaset <- ena.set(enaset);
        }

        code.cols = !colnames(enaset$line.weights) %in% colnames(enaset$meta.data)

        args = list(...);
        if(!is.null(args$multiplier)) {
          private$multiplier = args$multiplier
        }
        if(!is.null(args$point.size)) {
          self$point$size = args$point.size
        }
        if(!is.null(args$showticklabels)) {
          self$showticklabels = args$showticklabels
        }
        if(!is.null(args$axispadding)) {
          self$axispadding = args$axispadding
        }
        if(!is.null(args$autosize)) {
          self$autosize = args$autosize
        }
        if(!is.null(args$automargin)) {
          self$automargin = args$automargin
        }
        self$enaset <- list(
          connection.counts = data.table::copy(enaset$connection.counts),
          meta.data = data.table::copy(enaset$meta.data),
          model = list(
            model.type = enaset$model$model.type,
            raw.input = data.table::copy(enaset$model$raw.input),
            row.connection.counts = data.table::copy(enaset$model$row.connection.counts),
            unit.labels = enaset$model$unit.labels,
            points.for.projection = data.table::copy(enaset$model$points.for.projection),
            centroids = data.table::copy(enaset$model$centroids),
            variance = enaset$model$variance
          ),
          points = data.table::copy(enaset$points),
          line.weights = data.table::copy(enaset$line.weights),
          rotation = list(
            adjacency.key = data.table::copy(enaset$rotation$adjacency.key),
            codes = enaset$rotation$codes,
            rotation.matrix = data.table::copy(enaset$rotation$rotation.matrix),
            center.vec = enaset$rotation$center.vec,
            nodes = data.table::copy(enaset$rotation$nodes)
          ),
          plots = list()
        );
        self$title <- title;

        private$dimension.labels <- dimension.labels;
        private$font.size <- font.size;
        private$font.color <- font.color;
        private$font.family <- font.family;
        private$font = list (
          size = private$font.size,
          color = private$font.color,
          family = private$font.family
        );
        self$plot <- plotly::plot_ly(
          mode = "markers",
          type ="scatter"
        );

        self$plot <- plotly::config(p = self$plot, displayModeBar = args$displayModeBar);

        if (is.list(scale.to)) {
          max.axis = max(abs(as.matrix(enaset$points)))*self$axispadding
          if(is.null(scale.to$x)) {
            axis.range.x = c(-max.axis, max.axis)
          }
          else {
            axis.range.x = scale.to$x
          }
          if(is.null(scale.to$y)) {
            axis.range.y = c(-max.axis, max.axis)
          }
          else {
            axis.range.y = scale.to$y
          }
        }
        else {
          if(is.character(scale.to) && scale.to == "points") {
            max.axis = max(abs(as.matrix(enaset$points)))*self$axispadding
          }
          else if (is.numeric(scale.to)) {
            max.axis = tail(scale.to, 1)
          }
          else {
            max.axis = max(abs(as.matrix(enaset$rotation$nodes)))*self$axispadding;
          }
          axis.range.x = axis.range.y = c(-max.axis, max.axis)
        }

        graph.axis <- list(
          titlefont = private$font,
          showgrid = F,
          zeroline = T,
          showticklabels = self$showticklabels,
          showgrid = T
          # range=c(-max.axis,max.axis)
        );
        if(!is.null(args$ticks)) {
          graph.axis$showticklabels = T;
          graph.axis$ticks = args$ticks$location;
          graph.axis$tickcolor = args$ticks$color;
          graph.axis$tickangle = args$ticks$angle;
        }
        self$axes$x = graph.axis
        self$axes$x$title = dimension.labels[1];
        self$axes$x$range = axis.range.x
        self$axes$y = graph.axis
        self$axes$y$title = dimension.labels[2];
        self$axes$y$range = axis.range.y

        self$plot = plotly::layout(
          self$plot,
          title =  title,
          xaxis = self$axes$x,
          yaxis = self$axes$y,
          autosize = self$autosize,
          font = list (
            size = 12,
            color = private$font.color,
            family = private$font.family
          )
        );
      },

      #' Print ENA plot
      #'
      #' @return
      print = function() {
        print(self$plot);
      },

      #' Get property from object
      #'
      #' @param x character key to retrieve from object
      #' @return value from object at x
      get = function(x) {
        return(private[[x]])
      },

    ## Public Properties ----
      enaset = NULL,
      title = "ENA Plot",
      plot = NULL,
      axes = list(
        x = NULL, y = NULL
      ),
      point = list(
        size = 5
      ),
      showticklabels = F,
      autosize = F,
      automargin = T,
      axispadding = 1.2,
      palette = c("#386CB0", "#F0027F", "#7FC97F", "#BEAED4",
                  "#FDC086","#FFFF99", "#BF5B17"),
      plotted = list(
        points = list(), networks = list(),
        trajectories = list(), means = list()
      )
  ),

  private = list(
    ####
    ## Private Properties
    ####
      dimension.labels = c("X","Y"),

      font = list(),
      font.size = 14,
      font.color = "#000000",
      font.family = "Arial",
      #plot.color = I("black"),

      multiplier = 5
    ####
    ## END: Private Properties
    ####
  )
)

#' Plot an ena.set object
#'
#' @param x ena.set to plot
#' @param y ignored.
#' @param ... Additional parameters passed along to ena.plot functions
#' @param empty Logical; if TRUE, creates an empty plot without points. Default is TRUE.
#' @param title Character; title for the plot. Default is "ENA Plot".
#'
#' @examples
#'
#' data(RS.data)
#'
#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
#'   'Client.and.Consultant.Requests','Design.Reasoning','Collaboration');
#'
#' accum = ena.accumulate.data(
#'   units = RS.data[,c("UserName","Condition")],
#'   conversation = RS.data[,c("Condition","GroupName")],
#'   metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre","CONFIDENCE.Post")],
#'   codes = RS.data[,codeNames],
#'   window.size.back = 4
#' )
#'
#' set = ena.make.set(
#'   enadata = accum
#' )
#'
#' plot(set) |>
#'   add_points(Condition$FirstGame, colors = "blue", with.mean = TRUE) |>
#'   add_points(Condition$SecondGame, colors = "red", with.mean = TRUE) |>
#'   with_means() |>
#'   add_nodes()
#'
#' myENAplot <- plot(set) |>
#'   add_network(Condition$FirstGame - Condition$SecondGame)
#' 
#' 
#' # Add a group mean to an existing ENA plot
#' add_group(myENAplot, wh = Condition$FirstGame)
#' 
#' # Add a trajectory to an existing ENA plot
#' add_trajectory(myENAplot, wh = Condition$FirstGame)
#' 
#' @example inst/examples/example-plot-piping.R
#' 
#' @return ena.plot.object
#' @export
plot.ena.set <- function(x, y, ..., empty = TRUE, title = "ENA Plot") {
  args <- list(...);

  if(is(x, "ena.ordered.set")) {
    stop("Plotting of ena.ordered.set objects requires using the 'ona' package.");
  }

  p = ena.plot(enaset = x, title = title, ...);
  if (isFALSE(empty)) {
    add_points(p, ...);
  }

  return(p)
}


#' Add points to an ENA plot
#'
#' This function adds points to an existing ENA plot or ENA set. It supports various input types for the `wh` parameter, including unevaluated expressions and language objects.
#'
#' @param x An `ENAplot` object or an ENA set containing plots.
#' @param wh Specifies the points to plot. Can be an unevaluated expression or a language object.
#' @param ... Additional parameters passed to the plotting functions.
#' @param colors A vector of colors for the plotted points. Default is `NULL`.
#'
#' @details
#' The function determines the type of the `wh` parameter and processes it accordingly:
#' - If `wh` is an unevaluated expression, it is captured and evaluated in the parent frame.
#' - If `wh` is a language object, it is processed to extract the relevant points information.
#'
#' The function updates the plot with the new points and stores the updated plot back in the ENA set.
#'
#' @example inst/examples/example-plot-piping.R
#' 
#' @return Invisibly returns the modified ENA set.
#'
#' @export
add_points <- function(
  x,
  wh = NULL, ...,
  colors = NULL
) {
  plot <- x;
  set <- plot$enaset;

  if(is.null(plot)) {
    stop("No existing plot found in the ENA set. Did you call plot(set) first?")
  }
  # plot <- set$plots[[length(set$plots)]]
  more.args <- list(...)

  wh_subbed <- substitute(wh)
  if (is.language(wh_subbed)) {
    # points <- list(do.call(`[`, list(x = set$points, i = wh)));
    points <- list(eval(str2lang(paste0(c("set$points", wh_subbed), collapse = "$"))));
    colors <- ifelse(is.null(colors), plot$palette[length(plot$plotted$points) + 1], colors);
    named <- paste(as.character(wh_subbed)[-1], collapse = " ");
  }
  else if (!is.null(wh_subbed) && length(wh_subbed) > 0) {
    wh_subbed <- as.character(wh_subbed);
    if (length(wh_subbed) > 1 && wh_subbed[[2]] %in% colnames(set$points)) {
      cc <- call(wh_subbed[[1]], set$points, wh_subbed[[2]])
      part1 <- eval(cc);
      name <- paste(wh_subbed[-1], collapse = "$");
      if(grepl(set$model$model.type, pattern="Trajectory")) {
        points <- set$points[part1 == wh_subbed[[3]], ]
        more.args$points = points[, .SD[nrow(.SD)], by = ENA_UNIT]
      }
      else {
        more.args$points = points <- set$points[part1 == wh_subbed[[3]], ]
      }

      if(is.null(colors)) {
        colors = plot$palette[length(plot$plotted$points) + 1]
      }
    }
    else if (length(wh_subbed) == 1 && wh_subbed[[1]] %in% colnames(set$points)) {
      more.args$points = points = set$points
      if(is.null(colors)) {
        colors <- plot$palette[as.numeric(as.factor(set$points[[wh_subbed]])) + length(plot$plotted$points)]
      }
      else {
        colors <- colors[as.numeric(as.factor(set$points[[wh_subbed]]))]
      }
    }
    else {
      points <- wh
      colors = ifelse(is.null(colors), plot$palette[length(plot$plotted$points) + 1], colors)
    }
  }
  else {
    # first_meta <- setdiff(colnames(set$connection.counts)[find_meta_cols(set$connection.counts)], c("QEUNIT", "ENA_UNIT"))[1]
    # meta_grps <- split(set$points, by = first_meta)
    
    # points = meta_grps
    # named <- paste0(names(points), ".Points")
    # # colors = ifelse(is.null(colors), plot$palette[length(plot$plotted$points) + 1], colors)
    # colors <- plot$palette[seq.int(from=length(plot$plotted$points)+1,length.out=length(meta_grps))];
    points <- list(set$points);
    named <- "all.points";
    colors <- ifelse(is.null(colors), plot$palette[length(plot$plotted$points) + 1], colors);
  }

  mean <- ifelse(!is.null(more.args$mean), more.args$mean, FALSE);
  more.args$enaplot = plot
  for(i in seq_along(colors)) {
    color <- colors[i];
    name <- named[i];
    pts <- points[[i]];
    more.args$colors <- color;
    more.args$legend.name <- name;
    more.args$points <- pts;
    plot <- do.call(ena.plot.points, more.args);

    plot$plotted$points[[length(plot$plotted$points) + 1]] <- list(
      data = points,
      color = color
    )
    names(plot$plotted$points)[length(plot$plotted$points)] <- name;

    if(isTRUE(mean) && nrow(pts) > 1) {
      more.args$labels <- name;
      plot <- do.call(ena.plot.group, more.args);
    }
  }
  # if(!is.null(colors)) {
  #   more.args$colors = colors
  # }
  # else {
  #   more.args$colors = plot$palette[length(plot$plotted$points) + 1]
  # }
  

  # if(!is.null(mean) && (is.list(mean) || mean == T)) {
  #   # if (is.list(mean)) {
  #   #   more.args <- c(mean, more.args[!names(more.args) %in% names(mean)])
  #   # }
  #   # more.args$enaplot <- plot
  #   # more.args$points <- points
  #   # more.args$labels <- name
  #   #
  #   # plot <- do.call(ena.plot.group, more.args).
  #   set <- add_group(set, substitute(wh), ...);
  # }

  # set$plots[[length(set$plots)]] <- plot

  return(plot);
}

#' Add all groups to an ENA plot
#'
#' This function iterates over all unique values of the first metadata column (excluding 'QEUNIT' and 'ENA_UNIT')
#' in the ENA set and adds each group as a set of points to the ENA plot. This is useful for quickly visualizing
#' all groups in a categorical variable on the same plot.
#'
#' @param x An `ENAplot` object (as returned by `plot.ena.set`).
#' @param wh (Ignored) Included for compatibility with other plotting functions.
#'
#' @details
#' The function finds the first metadata column in the ENA set (excluding 'QEUNIT' and 'ENA_UNIT'),
#' and for each unique value in that column, calls `add_points()` to add the group's points to the plot.
#'
#' @return The modified `ENAplot` object with all groups added as points.
#' 
#' @example inst/examples/example-plot-piping.R
#'
#' @export
group <- function(x, wh = NULL) {
  plot <- x;
  set <- plot$enaset;

  first_meta <- setdiff(colnames(set$connection.counts)[find_meta_cols(set$connection.counts)], c("QEUNIT", "ENA_UNIT"))[1]
  
  plot$plotted$points <- list();
  # meta_grps <- split(set$points, by = first_meta);
  meta_grps <- unique(set$points[[first_meta]]);
  for(grp in meta_grps) {
    add_points(plot, wh = call("==", as.name(first_meta), grp));
  }

  # points = meta_grps
  return(plot);
}

#' Add a trajectory to an ENA plot
#'
#' This function adds a trajectory to an existing ENA plot or ENA set. It supports various input types for the `wh` parameter, including unevaluated expressions and language objects.
#'
#' @param x An `ENAplot` object or an ENA set containing plots.
#' @param wh Specifies the trajectory to plot. Can be an unevaluated expression or a language object.
#' @param ... Additional parameters passed to the plotting functions.
#' @param name A character string specifying the name of the plot. Default is "plot".
#'
#' @details
#' The function determines the type of the `wh` parameter and processes it accordingly:
#' - If `wh` is an unevaluated expression, it is captured and evaluated in the parent frame.
#' - If `wh` is a language object, it is processed to extract the relevant trajectory information.
#'
#' The function updates the plot with the new trajectory and stores the updated plot back in the ENA set.
#'
#' @return Invisibly returns the modified ENA set.
#'
#' @example inst/examples/example-plot-piping.R 
#'
#' @export
add_trajectory <- function(x, wh = NULL, ..., name = "plot") {
  plot <- x;
  set <- plot$enaset;

  subbed <- substitute(wh)
  args_list <- as.character(subbed)
  points <- set$points

  if (!is.null(args_list) && !is.null(subbed)) {
    if (length(args_list) > 1) {
      wh_subbed <- as.character(substitute(wh))
      cc <- call(wh_subbed[[1]], set$points, wh_subbed[[2]])
      part1 <- eval(cc)
      points <- set$points[part1 == wh_subbed[[3]], ]
      by <- "ENA_UNIT"
    }
    else {
      by <- args_list[[1]]
    }
  }
  else {
    by <- "ENA_UNIT"
  }
  plot <- ena.plot.trajectory(plot, points = points, by = by)

  # set$model$plot <- plot
  # set$plots[[length(x$plots)]] <- plot
  
  # .return(set, from_plot = T, invisible = F)
  return(plot)
}


#' Add a group mean to an ENA plot
#'
#' This function adds a group mean to an existing ENA plot or ENA set. It supports various input types for the `wh` parameter, including unevaluated expressions and language objects.
#'
#' @param x An `ENAplot` object or an ENA set containing plots.
#' @param wh Specifies the group to plot. Can be an unevaluated expression or a language object.
#' @param ... Additional parameters passed to the plotting functions.
#'
#' @details
#' The function determines the type of the `wh` parameter and processes it accordingly:
#' - If `wh` is an unevaluated expression, it is captured and evaluated in the parent frame.
#' - If `wh` is a language object, it is processed to extract the relevant group information.
#'
#' The function updates the plot with the new group mean and stores the updated plot back in the ENA set.
#'
#' @example inst/examples/example-plot-piping.R
#' 
#' @return Invisibly returns the modified ENA set.
#'
#' @export
add_group <- function(x, wh = NULL, ...) {
  plot <- x;
  set <- plot$enaset;

  # Capture the expression passed to wh
  wh.expr <- substitute(wh)

  # Check if the expression is a call to `substitute()`. This happens when
  # add_group is called from another function like add_points, which has
  # already substituted the user's original input.
  if (is.call(wh.expr) && deparse(wh.expr[[1]]) == "substitute") {
    # If so, evaluate it in the parent frame to get the actual language object
    wh.clean <- eval(wh.expr, parent.frame())
  } else {
    # Otherwise, the captured expression is what we want
    wh.clean <- wh.expr
  }

  # set <- x
  # # plot <- set$model$plot
  # # plot <- set$plots[[length(set$plots)]]

  if (
    identical(as.character(wh.clean), "wh.clean") ||
    identical(as.character(wh.clean), "y")
  ) {
    wh.clean <- wh;
  }

  more_args = list(...)
  more_args$enaplot <- plot
  if(is.null(more_args$color)) {
    more_args$colors <- plot$palette[length(plot$plotted$means) + 1]
  }
  else {
    more_args$colors <- more_args$color;
  }

  group.rows.log <- NULL;
  if (is.null(wh.clean)) {
    plot <- do.call(ena.plot.group, more_args)
    group.rows.log <- rep(TRUE, nrow(set$points));
  }
  else {
    parts <- as.character(wh.clean)

    if (parts[2] %in% colnames(set$line.weights)) {
      label <- parts[3]
      group.rows.log <- set$points[[parts[2]]] == parts[3];
      group.rows <- set$points[group.rows.log, ]
      if(nrow(group.rows) > 0) {
        group.means <- colMeans(group.rows)

        more_args$points <- group.means
        more_args$labels <- label
        plot <- do.call(ena.plot.group, more_args)
      }
      else {
        warning("No points in the group")
      }
    }
    else {
      warning("Unable to plot group")
    }
  }

  plot$plotted$means[[length(plot$plotted$means) + 1]] = list(
    rows = group.rows.log,
    data = more_args$points,
    color = more_args$colors
  )

  # set$plots[[length(set$plots)]] <- plot
  
  # .return(plot, from_plot = T, invisible = F)
  return(plot)
}


##' Add a network to an ENA plot
#'
#' Adds a network (set of edges) to an existing ENA plot or ENA set. The network can be specified in several ways, including as an unevaluated expression, a numeric matrix, or a language object. This function is typically used to visualize group means, differences between groups, or custom networks on an ENA plot.
#'
#' @param x An `ENAplot` object or an ENA set containing plots.
#' @param wh Specifies the network to plot. Can be:
#'   \itemize{
#'     \item An unevaluated expression (e.g., `Condition$FirstGame - Condition$SecondGame`)
#'     \item A numeric matrix or data.frame of edge weights
#'     \item A language object
#'     \item NULL (defaults to the mean network)
#'   }
#' @param ... Additional parameters passed to the plotting functions.
#' @param with.mean Logical; if `TRUE`, also plots the mean for the points in the network. Default is `FALSE`.
#' @param edge.multiplier Numeric scalar to multiply the edge weights. Useful for scaling the network visualization. Default is 1.
#' @param colors Optional vector of colors for the network. If not specified, colors are chosen from the plot palette.
#'
#' @details
#' The function determines the type of the `wh` parameter and processes it accordingly:
#' \itemize{
#'   \item If `wh` is an unevaluated expression, it is captured and evaluated in the parent frame. This allows for flexible specification of group means or differences.
#'   \item If `wh` is a numeric matrix or data.frame, it is used directly as the network data.
#'   \item If `wh` is a language object, it is processed to extract the relevant network information.
#'   \item If `wh` is NULL, the mean network is plotted.
#' }
#'
#' The function updates the plot with the new network and returns the modified plot object. The ENA set is not modified in-place.
#'
#' @section Examples:
#'   See `inst/examples/example-plot-piping.R` for usage examples.
#'
#' @return The modified ENAplot object with the new network added.
#'
#' @export
add_network <- function(
  x, wh = NULL, 
  ..., 
  with.mean = F, 
  edge.multiplier = 1,
  colors = NULL
) {
  plot <- x;
  set <- plot$enaset;

  more_args <- list(...);

  wh_subbed <- substitute(wh)
  network <- colMeans(set$line.weights) * edge.multiplier;
  
  if (is.language(wh_subbed)) {
    network <- try(eval(wh_subbed, parent.frame()), silent = TRUE)
    if(inherits(network, "try-error")) {
      if(wh_subbed[[1]] == "-") {
        means <- sapply(c(wh_subbed[[2]], wh_subbed[[3]]), function(y) {
          colMeans(eval(str2lang(paste0(c("set$line.weights", y), collapse = "$"))));
        })

        network <- means[,1] - means[,2];
        named <- as.character(enquote(wh_subbed))[2];
        colors <- if(is.null(colors)) {
          plot$palette[seq.int(length(plot$plotted$points) + 1, 2)]
        } else {
          if(length(colors) < 2) {
            stop("Please provide two colors for the two groups being compared.")
          } else {
            colors
          }
        }
      }
      else {
        network <- colMeans(eval(str2lang(paste0(c("set$line.weights", wh_subbed), collapse = "$"))));
        colors <- ifelse(is.null(colors), plot$palette[length(plot$plotted$points) + 1], colors);
        named <- paste(as.character(wh_subbed)[-1], collapse = " ");
      }
    }
    else if (is.matrix(network) || is.data.frame(network) || is.numeric(network)) {
      network <- colMeans(network);
      colors <- ifelse(is.null(colors), plot$palette[length(plot$plotted$points) + 1], colors);
      named <- paste(as.character(wh_subbed)[-1], collapse = " ");
    }
  }

  more_args$enaplot = plot;
  more_args$colors = colors;
  if(is.data.frame(network) || is.matrix(network) || is.numeric(network)) {
    more_args$network = network * edge.multiplier;
    plot <- do.call(ena.plot.network, more_args);
  }

  
  # .return(set, from_plot = T, invisible = F)
  return(plot);
}


#' Add nodes to an ENA plot
#'
#' This function adds nodes to an existing ENA plot or ENA set. It can be used to customize the nodes displayed on the plot, including their size and other graphical parameters.
#'
#' @param x An \code{ENAplot} object or an ENA set containing plots.
#' @param ... Additional arguments passed to \code{ena.plot.points}, such as \code{nodes}, \code{size}, and other graphical parameters.
#' @param return_plot Logical; if \code{TRUE}, returns the modified ENA set. If \code{FALSE} (default), returns the modified plot invisibly.
#'
#' @details
#' If \code{x} is an \code{ENAplot}, the function extracts the associated ENA set and plot. Otherwise, it assumes \code{x} is an ENA set and uses the last plot in the set.
#' The nodes to be added can be specified via the \code{nodes} argument; otherwise, the default nodes from the set's rotation are used.
#' Node size can be customized via the \code{size} argument.
#'
#' The function updates the plot with the new nodes and stores the updated plot back in the ENA set.
#'
#' @return Invisibly returns the modified plot or ENA set, depending on the value of \code{return_plot}.
#'
#' @example inst/examples/example-plot-piping.R
#'
#' @export
add_nodes <- function(x, ..., return_plot = FALSE) {
  plot <- x;
  set <- plot$enaset;

  dot_args <- list(...);
  if(!is.null(dot_args$nodes)) {
    nodes <- dot_args$nodes;
  }
  else {
    nodes <- set$rotation$nodes;
  }

  node_sizes <- 1;
  if(!is.null(dot_args$size)) {
    node_sizes <- dot_args$size;
  }

  plot <- ena.plot.points(plot,
            points = as.matrix(nodes),
            texts = as.character(nodes$code),
            point.size = node_sizes,
            ...
          );

  plot$plotted$networks[[length(plot$plotted$networks) + 1]] <- list(
    nodes = nodes,
    data = NULL,
    color = NULL
  );

  # set$plots[[length(set$plots)]] <- plot

  return(plot);
}

#' Adds group means to the ENA plot.
#'
#' This function iterates over the plotted points in the ENA plot and calculates
#' the mean for each group of points. The calculated means are then added to the
#' plot as group means.
#'
#' @param x An ENA set object containing the plots.
#'
#' @return Invisibly returns the modified ENA set object with updated plots.
#'
#' @export
with_means <- function(x) {
  plot <- x;
  set <- plot$enaset;

  for(point_group in plot$plotted$points) {
    plot <- ena.plot.group(plot, point_group$data[[1]], colors = point_group$color[1])

    plot$plotted$means[[length(plot$plotted$means) + 1]] <- list(
      data = colMeans(point_group$data[[1]]),
      color = point_group$color[1]
    )
  }

  return(plot)
}


#' Adds trajectories to an ENA plot.
#'
#' This function generates trajectories for the plotted points in the ENA plot based on the specified grouping variables.
#' It supports options for jittering, animation, and scaling.
#'
#' @param x An ENA set object containing the plots.
#' @param ... Additional arguments passed to the plotting functions.
#' @param by A character vector specifying the grouping variables for the trajectories. Default is the first conversation parameter in the ENA set.
#' @param add_jitter Logical; if `TRUE`, adds jitter to the trajectory points. Default is `TRUE`.
#' @param frame Numeric; the duration of each frame in the animation. Default is 1100.
#' @param transition Numeric; the duration of the transition between frames. Default is 1000.
#' @param easing A character string specifying the easing function for the animation. Default is "circle-in-out".
#'
#' @return Invisibly returns the modified ENA set object with updated plots.
#'
#' @export
with_trajectory <- function(
  x, ...,
  by = x$`_function.params`$conversation[1],
  add_jitter = TRUE,
  frame = 1100,
  transition = 1000,
  easing = "circle-in-out"
) {
  set <- x
  if(!grepl(x = set$model$model.type, pattern = "Trajectory")) {
    stop(paste0("Unable to plot trajectories on model of type: ", set$model$model.type))
  }
  plot <- set$plots[[length(set$plots)]]

  args = list(...)

  all_steps_w_zero <- data.table(rbind(
    rep(0, length(by)),
    expand.grid(
      sapply(by, function(b) sort(unique(set$points[[b]]))),
      stringsAsFactors = F
    )
  ))
  colnames(all_steps_w_zero) <- by
  point_group_names <- seq(plot$plotted$points)
  points_cleaned <- lapply(point_group_names, function(n) {
    prepare_trajectory_data(
      points = plot$plotted$points[[n]]$data,
      by = by,
      units = plot$plotted$points[[n]]$data,
      units_by = set$`_function.params`$units,
      steps = all_steps_w_zero
    )
  })
  names(points_cleaned) <- sapply(plot$plotted$points, "[[", "color")
  points_cleaned <- rbindlist(points_cleaned, idcol = "color")

  meta_data = unique(set$meta.data)
  setkey(points_cleaned, ENA_UNIT)
  setkey(meta_data, ENA_UNIT)
  points_cleaned = meta_data[points_cleaned]
  setkeyv(points_cleaned, by)

  size = ifelse(is.null(args$size), 10, args$size)
  opacity = ifelse(is.null(args$opacity), 1, args$opacity)

  dims = as.matrix(points_cleaned[, find_dimension_cols(points_cleaned), with = F])[, 1:2]
  if(add_jitter) {
    dims[, 1] = jitter(dims[, 1])
    dims[, 2] = jitter(dims[, 2])
  }

  if(is.null(args$scale)) {
    max_abs = max(abs(dims))
    scale = c(-1*max_abs, max_abs)
  }
  else {
    scale = args$scale
  }

  ax <- list(
    range = scale, title = "",
    zeroline = TRUE, showline = FALSE,
    showticklabels = FALSE, showgrid = FALSE
  )

  #####
  ### Add to the plot
  #####
    thisPlot <- plotly::plot_ly(
        data = points_cleaned,
        x = dims[,1], y = dims[,2],
        text = ~ENA_UNIT,
        frame = as.formula(paste0("~", by)),
        type = 'scatter',
        mode = 'markers',
        marker = list(
          size = size,
          opacity = opacity,
          hoverinfo = "text",
          color = as.numeric(as.factor(points_cleaned[["color"]]))
        )
      ) |>
      plotly::layout(
        xaxis = ax,
        yaxis = ax,
        showlegend = T
      ) |>
      plotly::animation_opts(
        frame = frame,
        transition = transition,
        easing = easing,
        redraw = T
      )
  #####

  # set$model$plot <- plot
  set$plots[[length(set$plots) + 1]] <- thisPlot
  invisible(set)
}

#' Prepares trajectory data for an ENA plot.
#'
#' This function processes and prepares trajectory data for plotting in an ENA set. It handles rotation, grouping, and filling missing steps in the trajectory.
#'
#' @param x An ENA set object. If `NULL`, other parameters must be provided.
#' @param by A character vector specifying the grouping variables for the trajectory. Default is the first conversation parameter in the ENA set.
#' @param rotation_matrix A matrix used to rotate the points. Default is the rotation matrix from the ENA set.
#' @param points A data table of points to be processed. Default is the points from the ENA set.
#' @param units A data table of units corresponding to the points. Default is the trajectories or points from the ENA set.
#' @param units_by A character vector specifying the unit grouping variables. Default is the unit parameters from the ENA set.
#' @param steps A data table specifying the steps for the trajectory. If `NULL`, steps are generated automatically.
#'
#' @return A data table containing the processed trajectory data, including dimensions and metadata.
prepare_trajectory_data <- function(
  x = NULL,
  by = x$`_function.params`$conversation[1],
  rotation_matrix = x$rotation.matrix,
  points = NULL,
  units = points,
  units_by = x$`_function.params`$units,
  steps = NULL
) {
  if(is(x, "ena.set")) {
    if(is.null(points))
      points <- x$points
    if(is.null(units))
      units <- x$trajectories #points[, find_meta_cols(points), with = FALSE]
  }

  unique_unit_values <- unique(units[, c(units_by, "ENA_UNIT"), with = FALSE])

  if(!is.null(rotation_matrix)) {
    rotation_matrix = as.matrix(rotation_matrix)
    full_data <- cbind(units, as.matrix(points) %*% rotation_matrix)
  } else {
    full_data <- cbind(units, as.matrix(points))
  }
  full_data <- full_data[, unique(names(full_data)), with = FALSE]

  if(is.null(steps)) {
    all_steps_w_zero <- data.table(rbind(
      rep(0, length(by)),
      expand.grid(
        sapply(by, function(b) sort(unique(units[[b]]))),
        stringsAsFactors = F
      )
    ))
    colnames(all_steps_w_zero) <- by
  } else {
    all_steps_w_zero <- steps
  }
  all_step_data <- CJ(all_steps_w_zero[[by]], unique_unit_values$ENA_UNIT)
  colnames(all_step_data) <- c(by, "ENA_UNIT")

  dimension_col_names = colnames(points)[
                          which(sapply(points, function(col) {
                            is(col, "ena.dimension")
                          }))
                        ]
  all_step_data[, c(dimension_col_names) := 0]
  all_step_data[[by]] = as.ena.metadata(all_step_data[[by]])
  all_step_data = merge(unique_unit_values, all_step_data, by = "ENA_UNIT")
  setkey(all_step_data, "ENA_UNIT")

  filled_data = all_step_data[ , {
      by_names = names(.BY)
      user_rows = sapply(1:length(by_names), function(n) {
          full_data[[by_names[n]]] == .BY[n]
      })
      existing_row = which(rowSums(user_rows * 1) == 2)
      if(length(existing_row) > 0) {
        full_data[existing_row, c(dimension_col_names), with = FALSE]
      } else {
        prev_row = tail(full_data[ENA_UNIT == .BY$ENA_UNIT & full_data[[by]] < .BY[[by]],], 1)
        if(nrow(prev_row) == 0) {
          data.table(matrix(rep(0, length(dimension_col_names)), nrow = 1, dimnames = list(NULL, c(dimension_col_names))))
        } else {
          prev_row[, c(dimension_col_names), with = FALSE]
        }
      }

  },  by = c("ENA_UNIT", by)]
  for(col in dimension_col_names) {
    set(filled_data, j = col, value = as.ena.dimension(filled_data[[col]]))
  }
  return(filled_data)
}

#' Clears specified plots from an ENA set.
#'
#' This function removes the plots specified by their indices from the `plots` field of the ENA set.
#'
#' @param x An ENA set object containing the plots.
#' @param wh A numeric vector specifying the indices of the plots to clear. Default is all plots.
#'
#' @return Invisibly returns the modified ENA set object with the specified plots removed.
#'
#' @example inst/examples/example-plot-piping.R
#' 
#' @export
clear <- function(x, wh = seq(x$plots)) {
  if(length(wh) > 0) {
    x$plots[[wh]] <- NULL
  }
  invisible(x)
}

#' Scales the points in an ENA set.
#'
#' This function adjusts the scale of the points in the ENA set to match the range of the network.
#'
#' @param x An ENAplot object containing the set to scale.
#' @param center Unused parameter, included for compatibility.
#' @param scale A numeric value specifying the scaling factor. If `NULL`, the function will determine the scale based on the data.
#'
#' @return The modified ENAplot object with scaled points.
#'
#' @export
scale.ENAplot <- function(x, center = NULL, scale = NULL) {
  plot <- x
  set <- plot$enaset;

  point_range <- range(set$points);
  network_range <- range(set$rotation$nodes);

  if(is.null(scale)) {
    scale <- min(abs(network_range) / abs(point_range));
  }

  set$points <- set$points * scale;

  return(plot)
}

#' Updates the axis ranges of an ENA plot based on the plotted data.
#'
#' This function adjusts the x and y axis ranges of the ENA plot to ensure that all plotted points, networks, and means are visible.
#'
#' @param x An ENA plot object containing the plotted data and axis configurations.
#'
#' @return The updated ENA plot object with adjusted axis ranges.
#'
#' @export
check_range <- function(x) {
  numbers <- as.numeric(sapply(x$plotted$points, function(p) max(as.matrix(p$data))));
  means <- as.numeric(sapply(x$plotted$means, function(p) max(as.matrix(p$data))));

  network <- NULL;
  if(length(x$plotted$networks) > 0) {
    network <- abs(as.numeric(sapply(x$plotted$networks, function(nn) sapply(nn, `[`, c("x0","x1","y0","y1")))));
  }

  if(
    length(numbers) == 0 &&
    length(means) == 0
  ) {
    return(x)
  }

  curr_max = max(c(numbers, network, means))
  if(curr_max*1.2 > max(x$axes$y$range)) {
    this.max = curr_max * 1.2
    x$axes$x$range = c(-this.max, this.max)
    x$axes$y$range = c(-this.max, this.max)
    x$plot = plotly::layout(
      x$plot,
      xaxis = x$axes$x,
      yaxis = x$axes$y
    );
  } else if (curr_max < max(x$axes$y$range*0.5)) {
    this.max = curr_max * 1.2
    x$axes$x$range = c(-this.max, this.max)
    x$axes$y$range = c(-this.max, this.max)
    x$plot = plotly::layout(
      x$plot,
      xaxis = x$axes$x,
      yaxis = x$axes$y
    );
  }

  x
}

#' Display and update plot objects within a custom object
#'
#' This function updates the plots within the provided object by applying the `check_range` function to each plot.
#' It then prints the updated object using custom print options and returns the object invisibly.
#'
#' @param x An object containing a list of plots in the `plots` field.
#' @param ... Additional arguments passed to the `print` method.
#'
#' @return The updated object `x`, returned invisibly.
#'
#' @export
show <- function(x, ...) {
  # browser()
  x$plots <- lapply(x$plots, check_range)
  print(x, ..., plot = T, set = F)
  invisible(x)
}

## ── qeviz interactive plot integration ───────────────────────────────────────
##
## Public API
##   ena.plot.interactive()   — create an interactive qeviz htmlwidget
##   ena.export.html()        — write a self-contained HTML file
##   enaInteractiveOutput()   — Shiny output binding
##   renderEnaInteractive()   — Shiny render function
##
## Internal helpers
##   .ena_to_model_data()  — convert ena.set to qeviz ModelData list
##   .ena_frame()          — build a QEFrame list from a data.frame
##   .ena_group_ci()       — 95% t-interval bounds per group
##   .ena_group_outlier()  — IQR-based outlier bounds per group
## ─────────────────────────────────────────────────────────────────────────────


# ── Internal helpers ──────────────────────────────────────────────────────────

#' Build a QEFrame list from a plain data.frame.
#' @noRd
.ena_frame <- function(df) {
  list(
    data  = lapply(seq_len(nrow(df)), function(i) as.list(df[i, , drop = FALSE])),
    types = as.list(setNames(
      sapply(df, function(col) {
        if (is.numeric(col))      "numeric"
        else if (is.integer(col)) "integer"
        else                      "character"
      }),
      names(df)
    ))
  )
}

#' Compute per-group 95% CI bounding boxes (t-interval on group mean).
#' Returns a data.frame with columns: group, {dim}.low, {dim}.high for each dim.
#' @noRd
.ena_group_ci <- function(points_df, group_col, dim_cols, conf_level = 0.95) {
  groups <- unique(points_df[[group_col]])
  rows <- lapply(groups, function(g) {
    sub   <- points_df[points_df[[group_col]] == g, dim_cols, drop = FALSE]
    n     <- nrow(sub)
    if (n < 2L) return(NULL)
    means <- colMeans(sub, na.rm = TRUE)
    sds   <- apply(sub, 2, sd, na.rm = TRUE)
    t_val <- qt((1 + conf_level) / 2, df = n - 1L)
    row   <- as.list(
      c(
        group = g,
        setNames(
          as.numeric(rbind(means - t_val * sds / sqrt(n),
                           means + t_val * sds / sqrt(n))),
          as.vector(rbind(paste0(dim_cols, ".low"), paste0(dim_cols, ".high")))
        )
      )
    )
    as.data.frame(row, stringsAsFactors = FALSE)
  })
  rows <- Filter(Negate(is.null), rows)
  if (length(rows) == 0L) return(NULL)
  do.call(rbind, rows)
}

#' Compute per-group IQR-based outlier bounding boxes.
#' Returns a data.frame with columns: group, {dim}.low, {dim}.high for each dim.
#' @noRd
.ena_group_outlier <- function(points_df, group_col, dim_cols, iqr_factor = 1.5) {
  groups <- unique(points_df[[group_col]])
  rows <- lapply(groups, function(g) {
    sub <- points_df[points_df[[group_col]] == g, dim_cols, drop = FALSE]
    if (nrow(sub) < 1L) return(NULL)
    row <- list(group = g)
    for (d in dim_cols) {
      q1  <- quantile(sub[[d]], 0.25, na.rm = TRUE)
      q3  <- quantile(sub[[d]], 0.75, na.rm = TRUE)
      iqr <- q3 - q1
      row[[paste0(d, ".low")]]  <- as.numeric(q1 - iqr_factor * iqr)
      row[[paste0(d, ".high")]] <- as.numeric(q3 + iqr_factor * iqr)
    }
    as.data.frame(row, stringsAsFactors = FALSE)
  })
  rows <- Filter(Negate(is.null), rows)
  if (length(rows) == 0L) return(NULL)
  do.call(rbind, rows)
}

#' Convert an ena.set to the ModelData list expected by qeviz.
#' @noRd
.ena_to_model_data <- function(set,
                                group_col  = NULL,
                                dim_cols   = c("SVD1", "SVD2"),
                                include_ci = TRUE,
                                conf_level = 0.95,
                                iqr_factor = 1.5) {

  # ── nodes ──────────────────────────────────────────────────────────────────
  node_pos <- as.data.frame(set$rotation$nodes)[, c("code", dim_cols), drop = FALSE]
  nodes    <- .ena_frame(node_pos)

  # ── edges ──────────────────────────────────────────────────────────────────
  # connection.counts has metadata columns (ena.metadata class) followed by
  # edge-weight columns (ena.co.occurrence class).  Edge column names use the
  # rENA " & " separator; qeviz expects "." — rename them here.
  cc       <- as.data.frame(set$connection.counts)
  is_edge  <- sapply(cc, function(x) inherits(x, "ena.co.occurrence"))
  edge_cc  <- cc[, is_edge, drop = FALSE]
  names(edge_cc) <- gsub(" & ", ".", names(edge_cc), fixed = TRUE)
  edge_cc$QEUNIT <- as.character(cc$ENA_UNIT)
  edge_cc  <- edge_cc[, c("QEUNIT", setdiff(names(edge_cc), "QEUNIT")), drop = FALSE]
  edges    <- .ena_frame(edge_cc)

  # ── points ─────────────────────────────────────────────────────────────────
  pts <- as.data.frame(set$points)
  keep_cols <- c("ENA_UNIT", group_col, dim_cols)
  pts       <- pts[, keep_cols[keep_cols %in% names(pts)], drop = FALSE]
  names(pts)[names(pts) == "ENA_UNIT"] <- "QEUNIT"
  if (!is.null(group_col) && group_col %in% names(pts)) {
    pts[[group_col]] <- as.character(pts[[group_col]])
  }
  for (d in dim_cols) {
    if (d %in% names(pts)) pts[[d]] <- as.numeric(pts[[d]])
  }
  points <- .ena_frame(pts)

  result <- list(
    nodes       = nodes,
    edges       = edges,
    points      = points,
    updated     = as.numeric(Sys.time()) * 1000,
    id_col      = "QEUNIT",
    node_id_col = "code",
    x_col       = dim_cols[1L],
    y_col       = dim_cols[2L],
    group_col   = group_col
  )

  # ── groups frame — pre-computed means + optional CI bounds ────────────────
  if (!is.null(group_col) && group_col %in% names(pts)) {
    groups_unique <- unique(pts[[group_col]])

    means_rows <- lapply(groups_unique, function(g) {
      sub   <- pts[pts[[group_col]] == g, dim_cols, drop = FALSE]
      means <- colMeans(sub, na.rm = TRUE)
      as.data.frame(
        as.list(c(group = g, setNames(as.numeric(means), dim_cols))),
        stringsAsFactors = FALSE
      )
    })
    groups_df <- do.call(rbind, Filter(Negate(is.null), means_rows))

    if (include_ci) {
      ci_df <- .ena_group_ci(pts, group_col, dim_cols, conf_level)
      if (!is.null(ci_df)) {
        groups_df <- merge(groups_df, ci_df, by = "group", all.x = TRUE)
        groups_df <- groups_df[match(groups_unique, groups_df$group), , drop = FALSE]
      }
    }

    result$groups <- .ena_frame(groups_df)

    # Deprecated: outlier frame retained for backward compat
    out_df <- .ena_group_outlier(pts, group_col, dim_cols, iqr_factor)
    if (!is.null(out_df)) result$outlier <- .ena_frame(out_df)
  }

  result
}


# ── Public API ────────────────────────────────────────────────────────────────

#' Interactive ENA plot using qeviz
#'
#' Renders an interactive ENA plot inside RStudio, R Markdown / Quarto, and
#' Shiny using the qeviz visualization library.
#'
#' @param set         An \code{\link{ena.make.set}} result.
#' @param group_col   Character. Name of the grouping column in \code{set$points}
#'                    (e.g. \code{"Condition"}).  Controls point colours and group
#'                    mean networks.
#' @param group       Character. Which group's mean network to display.  Defaults
#'                    to the first group.
#' @param unit        Character. A specific unit ID to display its individual
#'                    network instead of a group mean.
#' @param compare     Character. Second group or unit for a subtraction view
#'                    (\code{group} minus \code{compare}).
#' @param also        Character. Second group for an overlay view (both networks
#'                    drawn simultaneously).
#' @param dim_cols    Character vector of two dimension names to plot.
#'                    Default \code{c("SVD1", "SVD2")}.
#' @param label_nodes \code{"on"} | \code{"off"} | \code{"auto"} | \code{"click"}.
#'                    Visibility of code-node labels.  Default \code{"on"}.
#' @param label_means Visibility of group-mean labels.  Default \code{"on"}.
#' @param label_points Visibility of unit-point labels.  Default \code{"off"}.
#' @param confidence  Logical. Include 95\% CI bounds in the groups frame.
#'                    Default \code{TRUE}.
#' @param outlier     Logical. Draw IQR-based outlier boxes. Default \code{TRUE}.
#' @param scale_points Logical. Rescale unit points to match the node coordinate
#'                    space.  Default \code{TRUE}.
#' @param conf_level  Numeric. Confidence level for CI boxes. Default \code{0.95}.
#' @param iqr_factor  Numeric. IQR multiplier for outlier boxes. Default \code{1.5}.
#' @param width,height Widget dimensions in pixels.  \code{NULL} uses htmlwidgets
#'                    sizing policy defaults (700 × 650).
#'
#' @return An \code{htmlwidget} object that renders in RStudio Viewer, R Markdown,
#'   Quarto, and Shiny.
#'
#' @examples
#' \dontrun{
#' data(RS.data)
#' codeNames <- c("Data", "Technical.Constraints", "Performance.Parameters",
#'                "Client.and.Consultant.Requests", "Design.Reasoning", "Collaboration")
#' accum <- ena.accumulate.data(
#'   units        = RS.data[, c("UserName", "Condition")],
#'   conversation = RS.data[, c("Condition", "GroupName")],
#'   codes        = RS.data[, codeNames],
#'   window.size.back = 4
#' )
#' set <- ena.make.set(enadata = accum)
#'
#' # Basic plot coloured by Condition
#' ena.plot.interactive(set, group_col = "Condition")
#'
#' # Show only FirstGame mean network
#' ena.plot.interactive(set, group_col = "Condition", group = "FirstGame")
#'
#' # Subtraction: FirstGame minus SecondGame
#' ena.plot.interactive(set, group_col = "Condition",
#'                      group = "FirstGame", compare = "SecondGame")
#' }
#'
#' @export
ena.plot.interactive <- function(
  set,
  group_col     = NULL,
  group         = NULL,
  unit          = NULL,
  compare       = NULL,
  also          = NULL,
  dim_cols      = c("SVD1", "SVD2"),
  label_nodes   = "on",
  label_means   = "on",
  label_points  = "off",
  confidence    = TRUE,
  outlier       = TRUE,
  scale_points  = TRUE,
  conf_level    = 0.95,
  iqr_factor    = 1.5,
  width         = NULL,
  height        = NULL
) {
  if (!requireNamespace("htmlwidgets", quietly = TRUE)) {
    stop("The 'htmlwidgets' package is required. Install with: install.packages('htmlwidgets')")
  }

  model <- .ena_to_model_data(
    set,
    group_col  = group_col,
    dim_cols   = dim_cols,
    include_ci = isTRUE(confidence),
    conf_level = conf_level,
    iqr_factor = iqr_factor
  )

  x <- list(
    model   = model,
    options = list(
      group       = group,
      unit        = unit,
      compare     = compare,
      also        = also,
      labelNodes  = label_nodes,
      labelMeans  = label_means,
      labelPoints = label_points,
      outlier     = if (isFALSE(outlier))      "false" else NULL,
      scalePoints = if (isFALSE(scale_points)) "false" else NULL
    )
  )

  htmlwidgets::createWidget(
    name    = "qeviz",
    x       = x,
    width   = width,
    height  = height,
    package = "rENA",
    sizingPolicy = htmlwidgets::sizingPolicy(
      viewer.padding      = 5,
      browser.fill        = TRUE,
      knitr.figure        = FALSE,
      knitr.defaultWidth  = 700,
      knitr.defaultHeight = 650
    )
  )
}


#' Export a self-contained interactive ENA plot as HTML
#'
#' Writes a single \code{.html} file containing the qeviz bundle and embedded
#' model data.  No R, no Python, and no server are required to open the file —
#' share it with collaborators, attach it to a paper submission, or archive it
#' as supplementary material.
#'
#' @param set       An \code{\link{ena.make.set}} result.
#' @param file      Output file path.  Default \code{"ena_plot.html"}.
#' @param group_col Character. Grouping column in \code{set$points}.
#' @param ...       Additional arguments passed to \code{\link{ena.plot.interactive}}
#'                  (e.g. \code{group}, \code{compare}, \code{label_nodes}).
#' @param width,height Plot dimensions in pixels. Default 700 × 600.
#' @param selfcontained Logical. Inline the qeviz bundle in the HTML file.
#'                    Default \code{TRUE}.  Set to \code{FALSE} to reference the
#'                    bundle via a relative path (smaller file, not portable).
#'
#' @return The resolved absolute path of the written file (invisibly).
#'
#' @examples
#' \dontrun{
#' set <- ena.make.set(enadata = accum)
#' ena.export.html(set, "model.html", group_col = "Condition")
#' }
#'
#' @export
ena.export.html <- function(
  set,
  file          = "ena_plot.html",
  group_col     = NULL,
  ...,
  width         = 700L,
  height        = 600L,
  selfcontained = TRUE
) {
  if (!requireNamespace("htmlwidgets", quietly = TRUE)) {
    stop("The 'htmlwidgets' package is required. Install with: install.packages('htmlwidgets')")
  }

  widget <- ena.plot.interactive(
    set,
    group_col = group_col,
    width     = width,
    height    = height,
    ...
  )

  abs_file <- normalizePath(file, mustWork = FALSE)
  htmlwidgets::saveWidget(widget, abs_file, selfcontained = selfcontained)
  message("Written: ", abs_file)
  invisible(abs_file)
}


#' Shiny output binding for interactive ENA plots
#'
#' @param outputId Shiny output ID.
#' @param width,height CSS dimensions. Defaults: \code{"100\%"}, \code{"600px"}.
#' @export
enaInteractiveOutput <- function(outputId, width = "100%", height = "600px") {
  htmlwidgets::shinyWidgetOutput(outputId, "qeviz", width, height, package = "rENA")
}


#' Shiny render function for interactive ENA plots
#'
#' @param expr    Expression that returns an \code{\link{ena.plot.interactive}} widget.
#' @param env     Environment for \code{expr}. Default: \code{parent.frame()}.
#' @param quoted  Logical. Is \code{expr} already quoted? Default \code{FALSE}.
#' @export
renderEnaInteractive <- function(expr, env = parent.frame(), quoted = FALSE) {
  if (!quoted) expr <- substitute(expr)
  htmlwidgets::shinyRenderWidget(expr, enaInteractiveOutput, env, quoted = TRUE)
}

##
#' @title Generate ENA Set
#'
#' @description Generates an ENA model by constructing a dimensional reduction of adjacency (co-occurrence) vectors in an ENA data object
#'
#' @details This function generates an ENAset object from an ENAdata object. Takes
#' the adjacency (co-occurrence) vectors from enadata, computes a dimensional
#' reduction (projection), and calculates node positions in the projected ENA
#' space. Returns location of the units in the projected space, as well as
#' locations for node positions, and normalized adjacency (co-occurrence) vectors
#' to construct network graphs
#'
#' @export
#'
#' @param enadata \code{\link{ENAdata}} that will be used to generate an ENA model
#' @param dimensions The number of dimensions to include in the dimensional reduction
#' @param norm.by A function to be used to normalize adjacency (co-occurrence) vectors before computing the dimensional reduction, default: sphere_norm_c()
#' @param rotation.by	A function to be used to compute the dimensional reduction, default: ena.svd()
#' @param rotation.params (optional) A character vector containing additional parameters for the function in rotation.by, if needed
#' @param rotation.set A previously-constructed  ENARotationSet object to use for the dimensional reduction
#' @param endpoints.only A logical variable which determines whether to only show endpoints for trajectory models
#' @param center.align.to.origin A logical variable when TRUE (default) determines aligns both point center and centroid center to the origin
#' @param node.position.method A function to be used to determine node positions based on the dimensional reduction, default: lws.position.es()
#' @param as.list R6 objects will be deprecated, but if this is TRUE, the original R6 object will be returned, otherwise a list with class `ena.set`
#' @param ... additional parameters addressed in inner function
#'
#' @examples
#' data(RS.data)
#'
#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
#'   'Client.and.Consultant.Requests','Design.Reasoning','Collaboration');
#'
#' accum = ena.accumulate.data(
#'   units = RS.data[,c("UserName","Condition")],
#'   conversation = RS.data[,c("Condition","GroupName")],
#'   metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre","CONFIDENCE.Post")],
#'   codes = RS.data[,codeNames],
#'   window.size.back = 4
#' )
#'
#' set = ena.make.set(
#'   enadata = accum
#' )
#'
#' set.means.rotated = ena.make.set(
#'   enadata = accum,
#'   rotation.by = ena.rotate.by.mean,
#'   rotation.params = list(
#'       accum$meta.data$Condition=="FirstGame",
#'       accum$meta.data$Condition=="SecondGame"
#'   )
#' )
#'
#' @seealso \code{\link{ena.accumulate.data}}, \code{\link{ENAset}}
#'
#' @return \code{\link{ENAset}} class object that can be further processed for analysis or plotting
##
ena.make.set <- function(
  enadata,
  dimensions = 2,
  norm.by = fun_sphere_norm,
  rotation.by = ena.svd,
  rotation.params = NULL,
  rotation.set = NULL,
  endpoints.only = TRUE,
  center.align.to.origin = TRUE,
  node.position.method = lws.positions.sq,
  as.list = TRUE,
  ...
) {
  if (as.list == F) {
    warning(paste0("Usage of ENAdata and ENAset objects will be deprecated ",
                   "and potentially removed altogether in future versions."))

    if (!is(enadata, "ENAdata")) {
      stop(paste0("Use of ena.make.set with as.list=FALSE requires `enadata` ",
                  "be an ENAdata object. Re-run the accumulation with as.list=FALSE"))
    }

    set <- ENAset$new(
      enadata = enadata,
      dimensions = dimensions,
      rotation.by = ifelse(
        !is.null(rotation.by) && identical(rotation.by, ena.svd),
        ena.svd.R6,
        rotation.by
      ),
      rotation.params = rotation.params,
      rotation.set = rotation.set,
      norm.by = norm.by,
      node.position.method = ifelse(
        identical(node.position.method, lws.positions.sq),
        lws.positions.sq.R6,
        node.position.method
      ),
      endpoints.only = endpoints.only,
      center.align.to.origin = center.align.to.origin,
      ...
    )
    return(set$process());
  }
  else {
    if ("ENAdata" %in% class(enadata)) {
      warning(paste0("Usage of ENAdata objects will be deprecated and ",
                     "potentially removed altogether in future versions. See ",
                     "ena.accumulate.data() or ena.set()."))

      enadata <- ena.set(enadata)
    }

    enadata$`_function.params`$center.align.to.origin <- center.align.to.origin;
    enadata$`_function.params`$rotation.by <- rotation.by;
    enadata$`_function.params`$rotation.params <- rotation.params;

    ###
    # Convert the string vector of code names to their corresponding
    # co-occurence names
    #####
    code_columns <- svector_to_ut(enadata$rotation$codes)

    ###
    # Normalize the raw data using self$function.params$norm.by,
    # which defaults to calling rENA::dont_sphere_norm_c
    #####
    line.weights <- norm.by(as.matrix(enadata$connection.counts))
    colnames(line.weights) <- code_columns

    line.weights.dt <- data.table::as.data.table(line.weights)
    for (i in seq(ncol(line.weights.dt)))
      set(line.weights.dt, j = i,
          value = as.ena.co.occurrence(line.weights.dt[[i]]))

    enadata$line.weights <- cbind(enadata$meta.data, line.weights.dt)
    class(enadata$line.weights) <- c("ena.line.weights", "ena.matrix",
                                     class(enadata$line.weights))
    #####

    ###
    # Center the normed data
    #####
    # if ( inherits(rotation.set, "ena.rotation.set") ) {

    # if ( !is.null(rotation.by) && is.null(rotation.set) ) {
    #   points.for.projection <- center_data_c(line.weights)
    # }
    if ( !is.null(rotation.set)  ) {
      if( inherits(rotation.set, "ena.rotation.set") ) {
        if(center.align.to.origin) {
          points.for.projection <- line.weights

          non_zero_rows <- rowSums(as.matrix(line.weights)) != 0;
          if(sum(non_zero_rows) > 0) {
            points.for.projection[rowSums(as.matrix(line.weights))!=0,] <- center.projection(lws = line.weights[rowSums(as.matrix(line.weights))!=0,], rotation = rotation.set);
          }
          else {
            stop("There were no co-occurrences of codes for any of the units within the model as defined.");
          }
        }
        else {
          points.for.projection <- center.projection(lws = line.weights, rotation = rotation.set);
        }
      }
      else {
        stop("Supplied rotation.set is not an instance of ENARotationSet");
      }
    }
    else {
      if(center.align.to.origin) {
        points.for.projection <- line.weights

        non_zero_rows <- rowSums(as.matrix(line.weights))!=0;
        if(sum(non_zero_rows) > 0) {
          points.for.projection[rowSums(as.matrix(line.weights))!=0,] <- center_data_c(line.weights[rowSums(as.matrix(line.weights))!=0,])
        }
        else {
          stop("There were no co-occurrences of codes for any of the units within the model as defined.");
        }
      }
      else {
        points.for.projection <- center_data_c(line.weights)
      }
    }

    colnames(points.for.projection) <- code_columns;
    enadata$model$points.for.projection = data.table::as.data.table(points.for.projection)
    for (i in seq(ncol(enadata$model$points.for.projection))) {
      set(
        enadata$model$points.for.projection,
        j = i,
        value = as.ena.co.occurrence(enadata$model$points.for.projection[[i]])
      )
    }
    enadata$model$points.for.projection <- as.ena.matrix(cbind(
      enadata$meta.data,
      enadata$model$points.for.projection
    ), "ena.points")
    #####

    ###

    ###
    # Generate and Assign the rotation set
    #####
    if (!is.null(rotation.by) && is.null(rotation.set)) {
      rotation <- do.call(rotation.by, list(enadata, rotation.params))
      # added by Carl, 2026.1.6
      if(is.null(rotation))
      {
        stop("Unable to create a rotation set")
      }
      #
      enadata$rotation.matrix <- as.data.table(rotation$rotation, keep.rownames = "codes")
      for (i in seq(ncol(enadata$rotation.matrix))) {
        if(i == 1) {
          set(enadata$rotation.matrix,
              j = i, value = as.ena.metadata(enadata$rotation.matrix[[i]])
          )
        }
        else {
          set(enadata$rotation.matrix,
              j = i, value = as.ena.dimension(enadata$rotation.matrix[[i]])
          )
        }
      }
      class(enadata$rotation.matrix) <- c("ena.rotation.matrix", class(enadata$rotation.matrix))

      enadata$rotation$rotation.matrix <- enadata$rotation.matrix
      enadata$rotation$eigenvalues <- rotation$eigenvalues;
      if(center.align.to.origin) {
        enadata$rotation$center.vec = colMeans(line.weights[rowSums(as.matrix(line.weights))!=0,]) # ADD CENTERING VEC HERE
      }
      else {
        enadata$rotation$center.vec = colMeans(line.weights) # ADD CENTERING VEC HERE
      }
    }
    else if (!is.null(rotation.set)) {
      if (is(rotation.set, "ena.rotation.set")) {
        enadata$rotation.matrix <- rotation.set$rotation.matrix
        enadata$rotation$rotation.matrix <- rotation.set$rotation.matrix
        enadata$rotation$nodes <- rotation.set$nodes;
        enadata$rotation$eigenvalues <- rotation.set$eigenvalues
        enadata$rotation$center.vec = rotation.set$center.vec # ADD CENTERING VEC HERE
      }
      else {
        stop("Supplied rotation.set is not an instance of ENARotationSet")
      }
    }
    else {
      stop("Unable to find or create a rotation set")
    }
    #####

    ###
    # Generate the rotated points
    #####
    if (!is.null(enadata$rotation.matrix)) {
      points <- points.for.projection %*% as.matrix(enadata$rotation.matrix)
      points.dt <- as.data.table(points)
      for (i in seq(ncol(points.dt))) {
        set(points.dt, j = i, value = as.ena.dimension(points.dt[[i]]))
      }
      if(grepl(x = enadata$model$model.type, pattern = "Trajectory")) {
        enadata$points <- cbind(enadata$trajectories, points.dt)
      }
      else {
        enadata$points <- cbind(enadata$meta.data, points.dt)
      }
      enadata$points <- as.ena.matrix(enadata$points, "ena.points")
    }
    else {
      stop(paste0("There is no rotation matrix, if you supplied a custom ",
                  "rotation.set, be sure it contains a rotation.matrix"))
    }
    #####

    ###
    # Calculate node positions
    #  - The supplied methoed is responsible is expected to return a list
    #    with two keys, "node.positions" and "centroids"
    #####
    if (exists("rotation") && !is.null(rotation) && is.null(rotation.set)) {
      positions <- node.position.method(enadata)

      if (all(names(positions) %in% c("node.positions", "centroids"))) {
        enadata$rotation$nodes <- as.data.table(positions$node.positions)
        colnames(enadata$rotation$nodes) <- colnames(points)
        rownames(enadata$rotation$nodes) <- enadata$rotation$codes

        for (i in seq(ncol(enadata$rotation$nodes))) {
          set(enadata$rotation$nodes, j = i,
              value = as.ena.dimension(enadata$rotation$nodes[[i]]))
        }
        enadata$rotation$nodes <- data.table(
          code = structure(enadata$rotation$codes,
                           class = c("code", class(enadata$rotation$codes))),
          enadata$rotation$nodes
        )
        class(enadata$rotation$nodes) = c("ena.nodes",
                                          class(enadata$rotation$nodes))

        enadata$model$centroids <- as.data.table(positions$centroids)
        for (i in seq(ncol(enadata$model$centroids))) {
          set(enadata$model$centroids, j = i,
              value = as.ena.dimension(enadata$model$centroids[[i]])
          )
        }
        colnames(enadata$model$centroids) <- colnames(as.matrix(enadata$rotation.matrix))
        enadata$model$centroids = cbind(
          data.table(unit = enadata$model$unit.labels),
          enadata$model$centroids
        )
        set(enadata$model$centroids, j = 1L,
            value = as.ena.metadata(enadata$model$centroids[[1L]])
        )
        enadata$model$centroids <- as.ena.matrix(enadata$model$centroids)
      }
      else {
        stop(paste0("The node position method didn't return back the ",
                    "expected objects:\n",
                    "\tExpected: c('node.positions','centroids')\n",
                    "\tReceived: ", names(positions), sep = ""))
      }
    } else if (!is.null(rotation.set)) {
      enadata$rotation$nodes <- rotation.set$nodes
    }

    if (is.null(enadata$rotation$nodes)) {
      stop("Unable to determine the node positions either by calculating
                    them using `node.position.method` or using a supplied
                    `rotation.set`")
    }
    #####

    ###
    # Variance
    #####
    var_rot_data <- var(points)
    diagonal_variance <- as.vector(diag(var_rot_data))
    enadata$model$variance <- diagonal_variance / sum(diagonal_variance)
    names(enadata$model$variance) <- colnames(enadata$rotation$rotation.matrix)[-1]
    #####

    enadata$plots <- list() #default = ena.plot(enadata, ...))
    # class(enadata$model$plot) <- c("ena.plot", class(enadata$model$plot))

    enadata$`_function.params`$norm.by <- norm.by
    return(enadata)
  }
}

#' Accumulate Connection Counts for ENA
#'
#' This function takes a data.frame and accumulates co-occurrences of codes within specified units and conversations (horizon), preparing it for ENA. It's designed to be used with pipes (`|>`)..
#'
#' @param x A data.frame or similar object containing the data to be analyzed.
#' @param units A character vector specifying the columns that define the units of analysis.
#' @param codes A character vector specifying the columns that contain the codes for co-occurrence analysis.
#' @param horizon A character vector specifying the columns that define the conversational boundaries (horizon).
#' @param ... Additional arguments passed to underlying accumulation functions.
#' @param ordered A logical value. If TRUE, creates ordered networks (A -> B is different from B -> A). Defaults to FALSE.
#' @param binary A logical value. If TRUE, connection counts are binarized (0 or 1). Defaults to TRUE.
#'
#' @return An ena.set object containing the accumulated connection counts and metadata.
#' @export
#'
#' @examples
#' data(RS.data)
#'
#' codes <- c("Data", "Technical.Constraints", "Performance.Parameters",
#'            "Client.and.Consultant.Requests", "Design.Reasoning",
#'            "Collaboration")
#' units <- c("Condition", "UserName")
#' horizon <- c("Condition", "GroupName")
#' enaset <- RS.data |>
#'   accumulate(units, codes, horizon)
#'
accumulate <- function(
    x,
    units = rENA::units(x),
    codes = rENA::codes(x),
    horizon = rENA::horizon(x),
    ...,
    ordered = FALSE,
    binary = TRUE
) {
  # set <- ena.accumulate.data.file(
  #   file = x,
  #   units.by = units,
  #   conversations.by = horizon,
  #   codes = codes,
  #   ...
  # )
  args <- list(...)
  force(units);
  force(codes);
  force(horizon);

  hoo_rules <- list(
    str2lang(paste0("(", paste0(sapply(horizon, function(cb) paste0(cb, " %in% UNIT$", cb)), collapse = " & "), ")"))
  )
  contexts <- tma::contexts(
    x,
    units_by = make.names(units),
    hoo_rules = hoo_rules,
    split_rules = function(unit, unit_context) {
      split(unit_context, by = horizon)
    }
  )

  win_wgts <- if(is.null(args$tensor)) {
    args$default_window <- if (is.null(args$default_window)) 1 else args$default_window;
    args$default_weight <- if (is.null(args$default_weight)) 1 else args$default_weight;
    tma::context_tensor(
      df = x,
      sender_cols = args$tma_ground_cols,
      receiver_cols = args$tma_response_cols,
      mode_column = ifelse(is.null(args$mode_column), tma::ATTR_NAMES$CONTEXT_ID, args$mode_column),
      default_window = args$default_window,
      default_weight = args$default_weight
    )
  }
  else {
    args$tensor
  }

  # args$ordered <- if (is.null(args$ordered)) TRUE else FALSE
  # browser()
  set <- tma::accumulate(
    context_model = contexts,
    # multidim_arr = multidim_arr,
    tensor = win_wgts,
    # time_column = args$time_column,
    codes = make.names(codes),
    ordered = ordered,
    binary = binary
  )

  set$rotation <- list(
    rotation.matrix = NULL,
    codes = codes,
    adjacency.key = sapply(colnames(as.matrix(set$connection.counts)), function(y) strsplit(y, "\\s?&\\s?")[[1]], simplify = T),
    node.positions = NULL,
    eigenvalues = NULL,
    centervec = NULL
  )

  return(set)
}

##' Build a Complete ENA Model
#'
#' This function applies a full ENA modeling pipeline to accumulated data. It is a convenience wrapper that chains together normalization, centering, rotation, projection, and optional optimization. Each step can be customized by supplying an alternative function.
#'
#' @param data An `ena.set` object, typically the result of `accumulate()`.
#' @param ... Additional arguments passed to the rotation function specified by `rotate_fun`.
#' @param normalize A function to normalize the connection counts. Defaults to `sphere_norm`.
#' @param center_with A function to center the normalized data. Defaults to `center`.
#' @param rotate_with A function to perform the rotation (e.g., SVD). Defaults to `rotate`.
#' @param project_with A function to project the points into the rotated space. Defaults to `project`.
#' @param optimize_with A function to optimize node positions. Defaults to `optimize`. Can be set to `NULL` or `FALSE` to skip.
#' @param rotate_fun The specific rotation function to be used by `rotate_with`. Defaults to `ena.rotate.by.generalized`.
#' @param rotate_params A list of additional parameters to pass to the `rotate_fun`.
#' @param exclude_zero_networks A logical value passed to `center_with`. When `TRUE`,
#'   units with all-zero line weights are excluded from the mean computation during
#'   centering (but all units are still shifted by that mean). Defaults to `TRUE`
#'   automatically for ordered sets (`accumulate(ordered = TRUE)`), `FALSE` otherwise.
#' @param center_to_origin A logical value. When `TRUE`, the mean of all projected
#'   points is subtracted from both the points and the node positions after
#'   optimization, placing the centroid of the space at the origin. This is the
#'   default behavior for ONA (Ordered Network Analysis). Defaults to `FALSE`.
#'
#' @return An `ena.set` object with a complete ENA model, including projected points and node positions.
#' @export
#'
#' @examples
#' data(RS.data)
#'
#' codes <- c("Data", "Technical.Constraints", "Performance.Parameters",
#'            "Client.and.Consultant.Requests", "Design.Reasoning",
#'            "Collaboration")
#' units <- c("Condition", "UserName")
#' horizon <- c("Condition", "GroupName")
#' enaset <- RS.data |>
#'   accumulate(units, codes, horizon) |>
#'   model()
model <- function(
  data, ...,
  normalize = sphere_norm,
  center_with = center,
  rotate_with = rotate,
  project_with = project,
  optimize_with = optimize,
  # Rotation specific parameters
  rotate_fun = ena.rotate.by.generalized,
  rotate_params = list(),
  # Centering options
  exclude_zero_networks = is(data, "ena.ordered.set"),
  center_to_origin = FALSE
) {
    x <- normalize(data)
    x <- center_with(x, exclude_zero_networks = exclude_zero_networks)

    if (length(rotate_params) > 1) {
      x <- do.call(rotate_with, list(x, wh = rotate_fun, by = unlist(rotate_params)))
    }
    else {
      x <- rotate_with(x, wh = rotate_fun, by = rotate_params)
    }

    x <- project_with(x)

    if (!is.null(optimize_with) && !isFALSE(optimize_with)) {
      x <- optimize_with(x)
    }

    if (isTRUE(center_to_origin) && !is.null(x$points)) {
      # x$points dimension columns are classed ena.co.occurrence (not ena.dimension),
      # so use !find_meta_cols to locate them.
      dim_cols      <- which(!find_meta_cols(x$points))
      node_dim_cols <- which(find_dimension_cols(x$rotation$nodes))
      # Compute per-dimension means from the projected points
      pt_means <- as.list(colMeans(x$points[, dim_cols, with = FALSE]))
      # Translate points so their centroid is at the origin
      x$points[, c(dim_cols) := lapply(.SD, function(col) col - mean(col)), .SDcols = dim_cols]
      # Translate nodes by the same vector
      x$rotation$nodes[, c(node_dim_cols) := Map(`-`, .SD, pt_means), .SDcols = node_dim_cols]
    }

  return(x)
}

##' Apply Spherical Normalization to ENA Data
#'
#' This function applies spherical normalization to the connection counts in an `ena.set` object or to a raw matrix of connection counts. Normalization is a key step before centering and rotation in ENA.
#'
#' @param x An `ena.set` object or a numeric matrix of connection counts.
#' @param add.meta A logical value. If `TRUE` (the default), metadata from the `ena.set` is preserved and included in the output. This parameter is ignored if `x` is a matrix.
#'
#' @return If `x` is an `ena.set`, it returns the modified `ena.set` with a new `line.weights` matrix and an updated `centervec` in the `rotation` object. If `x` is a matrix, it returns a matrix of normalized line weights.
#' @export
#'
#' @examples
#' data(RS.data)
#'
#' codes <- c("Data", "Technical.Constraints", "Performance.Parameters",
#'            "Client.and.Consultant.Requests", "Design.Reasoning",
#'            "Collaboration")
#' units <- c("Condition", "UserName")
#' horizon <- c("Condition", "GroupName")
#' enaset <- RS.data |>
#'   accumulate(units, codes, horizon) |>
#'   sphere_norm()
sphere_norm <- function(x, add.meta = TRUE) {
  x_ <- NULL
  names_ <- NULL
  meta_ <- NULL

  # verify that the connection.counts exist

  if (is(x, "ena.set")) {
    if (is.null(x$connection.counts)) {
      stop("Connection counts are missing.")
    }

    x_ <- as.matrix(x$connection.counts)
    names_ <- colnames(x_)
    if (isTRUE(add.meta)) {
      meta_ <- x$meta.data
    }

    x$line.weights <- fun_sphere_norm(x_)
    colnames(x$line.weights) <- names_

    x$line.weights <- as_line_weights_matrix(x$line.weights, meta_)
    x$rotation$centervec <- colMeans(x$line.weights)
  }
  else {
    x_ <- as.matrix(x);
    names_ <- colnames(x_);
    x <- fun_sphere_norm(x_);
    colnames(x) <- names_;
  }

  return(x)
}


as_points_matrix <- function(x, metadata = NULL) {
  x_ <- data.table::as.data.table(x)
  for (i in seq(ncol(x_))) {
    set(x_,
      j = i,
      value = as.ena.co.occurrence(x_[[i]])
    )
  }

  if (!is.null(metadata)) {
    x_ <- cbind(metadata, x_)
  }

  class(x_) <- c("ena.points", "ena.matrix", class(x_))

  return(x_)
}

as_line_weights_matrix <- function(x, metadata = NULL) {
  line.weights.dt <- data.table::as.data.table(x)
  for (i in seq(ncol(line.weights.dt))) {
    set(line.weights.dt,
      j = i,
      value = as.ena.co.occurrence(line.weights.dt[[i]])
    )
  }

  x_ <- line.weights.dt
  if (!is.null(metadata)) {
    x_ <- cbind(metadata, line.weights.dt)
  }

  class(x_) <- c("ena.line.weights", "ena.matrix", class(line.weights.dt))

  return(x_)
}

as_rotation_matrix <- function(x) {
  x_ <- data.table::as.data.table(x, keep.rownames = "codes")
  for (i in seq(ncol(x_))) {
    if (i == 1) {
      set(x_, j = i, value = as.ena.metadata(x_[[i]]))
    } else {
      set(x_, j = i, value = as.ena.dimension(x_[[i]]))
    }
  }
  class(x_) <- c("ena.rotation.matrix", class(x_))

  return(x_)
}

as_nodes_matrix <- function(x, rows, cols = NULL, cls = "ena.matrix") {
  x_ <- data.table::data.table(rows[[1]], x)
  rownames(x_) <- rows[[1]]

  if (!is.null(cols)) {
    colnames(x_) <- c(names(rows), cols)
  }

  for (i in seq(ncol(x_))) {
    if (i == 1) {
      set(x_, j = i, value = as.ena.metadata(x_[[i]]))
    } else {
      set(x_, j = i, value = as.ena.dimension(x_[[i]]))
    }
  }

  class(x_) <- c(cls, class(x_))

  return(x_)
}

##' Center ENA Data
#'
#' This function centers the line weights of an `ena.set` by subtracting the mean of each connection from all units. This is a standard step in preparing data for rotation.
#'
#' @param x An `ena.set` object (typically after `sphere_norm()`) or a numeric matrix.
#' @param add.meta A logical value. If `TRUE` (the default), metadata is preserved. Ignored if `x` is a matrix.
#' @param exclude_zero_networks A logical value. If `TRUE`, units whose line weights are
#'   all zero are excluded when computing the column means used for centering. The mean
#'   is computed from non-zero units only, but all units (including zero-network ones)
#'   are shifted by that mean. This prevents empty networks from pulling the centroid
#'   toward zero. Defaults to `FALSE` (standard behaviour: all units contribute to
#'   the mean). Use `TRUE` for ordered/directed ENA sets produced by
#'   `accumulate(ordered = TRUE)`.
#'
#' @return If `x` is an `ena.set`, it returns the modified `ena.set` with the centered data stored in `x$model$points.for.projection`. If `x` is a matrix, it returns a centered matrix.
#' @export
#'
#' @examples
#' data(RS.data)
#'
#' codes <- c("Data", "Technical.Constraints", "Performance.Parameters",
#'            "Client.and.Consultant.Requests", "Design.Reasoning",
#'            "Collaboration")
#' units <- c("Condition", "UserName")
#' horizon <- c("Condition", "GroupName")
#' enaset <- RS.data |>
#'   accumulate(units, codes, horizon) |>
#'   sphere_norm() |>
#'   center()
center <- function(x, add.meta = TRUE, exclude_zero_networks = FALSE) {
  x_ <- NULL
  names_ <- NULL
  meta_ <- NULL

  # Helper: subtract column means computed from non-zero rows, applied to all rows.
  center_excluding_zeros <- function(m) {
    nonzero <- rowSums(m) != 0
    if (!any(nonzero)) {
      # All rows are zero — fall back to standard centering (result is all zeros)
      return(center_data_c(m))
    }
    col_means <- colMeans(m[nonzero, , drop = FALSE])
    sweep(m, 2, col_means, "-")
  }

  if (is(x, "ena.set")) {
    # make sure the line weights exist and are a matrix
    if (is.null(x$line.weights)) {
      stop("Missing line.weights on the provided ENA set. This is typically created using the 'accumulate' and 'sphere_norm' functions.")
    }

    x_ <- as.matrix(x$line.weights)
    is_unordered_set <- ncol(x_) == choose(length(x$rotation$codes), 2)
    names_ <- apply(tma::adjacency_key(x$rotation$codes, is_unordered_set), 2, paste, collapse = " & ")
    if (isTRUE(add.meta)) {
      meta_ <- x$meta.data
    }

    centered <- if (isTRUE(exclude_zero_networks)) {
      center_excluding_zeros(x_)
    } else {
      center_data_c(x_)
    }

    colnames(centered) <- names_
    x$model$points.for.projection <- as_points_matrix(centered, meta_)
  }
  else {
    x_ <- as.matrix(x)
    names_ <- colnames(x_)
    x <- if (isTRUE(exclude_zero_networks)) {
      center_excluding_zeros(x_)
    } else {
      center_data_c(x_)
    }
    colnames(x) <- names_
  }

  return(x)
}

#' Rotate ENA Data
#'
#' Rotates ENA data using a specified rotation function (default: SVD), optionally using formulas or grouping variables.
#'
#' @param x An \code{ena.set} object to be rotated.
#' @param ... Optional formulas or additional arguments for rotation.
#' @param wh Function to use for rotation (default: \code{ena.svd}).
#'
#' @return The rotated \code{ena.set} object with updated rotation matrices.
#' @export
#'
#' @examples
#' # Assuming 'set' is an ena.set object:
#' data(RS.data)
#'
#' codes <- c("Data", "Technical.Constraints", "Performance.Parameters",
#'            "Client.and.Consultant.Requests", "Design.Reasoning",
#'            "Collaboration")
#' units <- c("Condition", "UserName")
#' horizon <- c("Condition", "GroupName")
#' enaset <- RS.data |>
#'   accumulate(units, codes, horizon) |>
#'   sphere_norm() |>
#'   center() |>
#'   rotate()
rotate <- function(
  x,
  ...,
  wh = ena.rotate.by.generalized
) {
  x_ <- NULL
  names_ <- NULL
  codes_ <- NULL
  meta_ <- NULL
  dot_args <- list(...)

  if (is(x, "ena.set")) {
    # Make sure points.for.projection exists
    if (is.null(x$model$points.for.projection)) {
      stop("Missing `points.for.projection` on the provided ENA set. This is typically created using ?center()")
    }

    if (!is.null(dot_args$add.meta) && isTRUE(dot_args$add.meta)) {
      meta_ <- x$meta.data
    }
  }
  else {
    # Construct ENAset-like list from provided matrix
    x_ <- as.matrix(x);
    names_ <- colnames(as.matrix(x_));
    codes_ <- unique(unlist(strsplit(names_, " & ")));

    x <- list(
      model = list(
        points.for.projection = x_
      ),
      rotation = list(
        codes = codes_
      )
    )
  }

  by_vals <- NULL

  # if (length(dot_args) == 0) {
  #   wh <- ena.svd
  # }
  # else {
    dot_formulas <- sapply(dot_args, function(d) {
      d2 <- tryCatch(
        {
          d3 <- as.formula(d)
          TRUE
        },
        error = function(e) FALSE
      )
      return(d2)
    })
    if (any(dot_formulas)) {
      if (all(dot_formulas)) {
        wh <- ena.rotate.by.hena.regression_2
        by_vals <- list(params = dot_args)
        names(by_vals$params) <- c("x_var", "y_var")[seq_along(by_vals)]
      }
      else {
        stop("If rotating using a formula, all must be formulas")
      }
    }
    else {
      # Means rotation?
      by_vals <- list();
      if (!is.null(dot_args$params)) {
        by_vals <- dot_args$params
      }
      else if (!is.null(dot_args$by$params)) {
        by_vals <- dot_args$by$params
      }
      else {
        by_vals <- list(
          x_var = NULL,
          y_var = NULL
        )

        first_meta <- setdiff(colnames(x$connection.counts)[find_meta_cols(x$connection.counts)], c("QEUNIT", "ENA_UNIT"))[1]
        # args$rotate.by is a list of columns to subset from accum$connection.counts
        by_vals$x_var <- x$connection.counts[, ..first_meta, drop = FALSE];
      }
    }
  # }

  x$rotation <- do.call(wh, list(enaset = x, params = by_vals))

  # Ensure x$rotation is a list with required elements
  if (!is.list(x$rotation)) {
    stop("Rotation function did not return a list as expected.")
  }

  # Only extract elements that exist in the returned list
  rotation_elements <- c("eigenvalues", "codes", "node.positions", "rotation")
  x$rotation <- x$rotation[intersect(rotation_elements, names(x$rotation))]

  if (!is.null(x$rotation$rotation)) {
    x$rotation.matrix <- as_rotation_matrix(x$rotation$rotation)
    x$rotation$rotation.matrix <- x$rotation.matrix
    x$rotation$rotation <- NULL
  }
  else {
    x$rotation.matrix <- NULL
    x$rotation$rotation.matrix <- NULL
  }

  x$rotation.matrix <- as_rotation_matrix(x$rotation$rotation)
  x$rotation$rotation.matrix <- x$rotation.matrix
  x$rotation$rotation <- NULL

  return(x)
}

##' Project ENA Points onto Rotated Space
#'
#' This function projects ENA points onto the rotated space using the rotation matrix.
#' Optionally, metadata can be included in the resulting points matrix.
#'
#' @param x An \code{ena.set} object containing the points for projection and rotation matrix.
#' @param rotation Optional. A rotation matrix to use for projection if \code{x} is not an \code{ena.set}.
#' @param add.meta Logical. If \code{TRUE} (default), metadata will be included in the output.
#'
#' @return The input \code{ena.set} object with the projected points matrix (and metadata if requested).
#' @export
#'
#' @examples
#' # Assuming 'set' is an ena.set object:
#' data(RS.data)
#'
#' codes <- c("Data", "Technical.Constraints", "Performance.Parameters",
#'            "Client.and.Consultant.Requests", "Design.Reasoning",
#'            "Collaboration")
#' units <- c("Condition", "UserName")
#' horizon <- c("Condition", "GroupName")
#' enaset <- RS.data |>
#'   accumulate(units, codes, horizon) |>
#'   sphere_norm() |>
#'   center() |>
#'   rotate() |>
#'   project()
project <- function(x, rotation = NULL, add.meta = TRUE) {
  meta_ <- NULL

  if (is(x, "ena.set")) {
    points <- as.matrix(x$model$points.for.projection) %*% as.matrix(x$rotation.matrix);

    if (isTRUE(add.meta)) {
      meta_ <- x$meta.data;
    }
    x$points <- as_points_matrix(points, meta_);

    var_rot_data <- var(points)
    diagonal_variance <- as.vector(diag(var_rot_data))
    x$model$variance <- diagonal_variance / sum(diagonal_variance)
    names(x$model$variance) <- colnames(x$rotation$rotation.matrix)[-1]

    return(x)
  }
  else {
    if(is.null(rotation)) {
      stop("When providing a matrix, a rotation matrix must also be provided")
    }

    points <- as.matrix(x) %*% as.matrix(rotation);
    return(points);
  }
}


##' Optimize Node and Centroid Positions in ENA Set
#'
#' This function computes and assigns node positions and centroids for an ENA set object
#' using the current points and rotation information.
#'
#' @param x An \code{ena.set} object for which to optimize node and centroid positions.
#' @param weights Optional. A numeric matrix of connection weights. If provided, the function will use this matrix instead of the connection counts from the \code{ena.set}.
#'
#' @return The input \code{ena.set} object with updated node and centroid positions.
#' @export
#'
#' @examples
#' # Assuming 'set' is an ena.set object:
#' data(RS.data)
#'
#' codes <- c("Data", "Technical.Constraints", "Performance.Parameters",
#'            "Client.and.Consultant.Requests", "Design.Reasoning",
#'            "Collaboration")
#' units <- c("Condition", "UserName")
#' horizon <- c("Condition", "GroupName")
#' enaset <- RS.data |>
#'   accumulate(units, codes, horizon) |>
#'   sphere_norm() |>
#'   center() |>
#'   rotate() |>
#'   project() |>
#'   optimize()
optimize <- function(x, weights = NULL) {
  if(!is(x, "ena.set")) {
    if(is.null(weights)) {
      stop("When providing a matrix, weights must also be provided")
    }

    x_ <- x;
    x <- list(
      points = x_,
      line.weights = weights,
      rotation = list(
        codes = unique(unlist(strsplit(colnames(as.matrix(weights)), " & ")))
      )
    )
  }

  points = as.matrix(x$points);
  weights = as.matrix(x$line.weights);
  if(is(x, "ena.ordered.set")) {
    positions <- directed_node_positions(weights, points, ncol(points));
    x$rotation$nodes <- as_nodes_matrix(positions$nodes, list("code" = x$rotation$codes), cols = colnames(as.matrix(x$points)), cls = "ena.nodes")
  }
  else {
    # browser()
    positions <- lws_lsq_positions(weights, points, ncol(points));
    x$rotation$nodes <- as_nodes_matrix(positions$nodes, list("code" = x$rotation$codes), cols = colnames(as.matrix(x$points)), cls = "ena.nodes")
  }

  x$model$centroids <- as_nodes_matrix(positions$centroids, rows = list("ENA_UNIT" = x$points$ENA_UNIT), cols = colnames(as.matrix(x$points)))

  return(x)
}

#####
#' @title Plot of ENA trajectories
#'
#' @description Function used to plot trajectories
#'
#' @export
#'
#' @param enaplot \code{\link{ENAplot}} object to use for plotting
#' @param points dataframe of matrix - first two column are X and Y coordinates, each row is a point in a trajectory
#' @param by vector used to subset points into individual trajectories, length nrow(points)
#' @param names character vector - labels for each trajectory of points, length length(unique(by))
#' @param labels character vector - point labels, length nrow(points)
#' @param labels.show A character choice: Always, Hover, Both.  Default: Both
# @param confidence.interval A character that determines which confidence interval type to use, choices: none, box, crosshair, default: none
# @param outlier.interval A character that determines which outlier interval type to use, choices: none, box, crosshair, default: none
# @param confidence.interval.values A matrix/dataframe where columns are CI x and y values for each point
# @param outlier.interval.values A matrix/dataframe where columns are OI x and y values for each point
#' @param colors A character vector, that determines marker color, default NULL results in
#' alternating random colors. If single color is supplied, it will be used for all
#' trajectories, otherwise the length of the supplied color vector should be equal
#' to the length of the supplied names (i.e a color for each trajectory being plotted)
#' @param shape A character which determines the shape of markers, choices: square, triangle, diamond, circle, default: circle
#' @param label.offset A numeric vector of an x and y value to offset labels from the coordinates of the points
#' @param label.font.size An integer which determines the font size for labels, default: enaplot$font.size
#' @param label.font.color A character which determines the color of label font, default: enaplot$font.color
#' @param label.font.family A character which determines font type, choices: Arial, Courier New, Times New Roman, default: enaplot$font.family
#' @param default.hidden A logical indicating if the trajectories should start hidden (click on the legend to show them) Default: FALSE
#'
#' @seealso \code{\link{ena.plot}}
#'
#' @examples
#' data(RS.data)
#'
#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
#'   'Client.and.Consultant.Requests','Design.Reasoning','Collaboration');
#'
#' accum = ena.accumulate.data(
#'   units = RS.data[,c("UserName","Condition")],
#'   conversation = RS.data[,c("GroupName","ActivityNumber")],
#'   metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre","CONFIDENCE.Post","C.Change")],
#'   codes = RS.data[,codeNames],
#'   window.size.back = 4,
#'   model = "A"
#' );
#'
#' set = ena.make.set(accum);
#'
#' ### get mean network plots
#' first.game.lineweights = as.matrix(set$line.weights$Condition$FirstGame)
#' first.game.mean = colMeans(first.game.lineweights)
#'
#' second.game.lineweights = as.matrix(set$line.weights$Condition$SecondGame)
#' second.game.mean = colMeans(second.game.lineweights)
#'
#' subtracted.network = first.game.mean - second.game.mean
#'
#' # Plot dimension 1 against ActivityNumber metadata
#' dim.by.activity = cbind(
#'     as.matrix(set$points)[,1],
#'     set$trajectories$ActivityNumber * .8/14-.4  #scale down to dimension 1
#' )
#'
#' plot = ena.plot(set)
#' plot = ena.plot.network(plot, network = subtracted.network, legend.name="Network")
#' plot = ena.plot.trajectory(
#'   plot,
#'   points = dim.by.activity,
#'   names = unique(set$model$unit.label),
#'   by = set$trajectories$ENA_UNIT
#' );
#' print(plot)
#'
#' @return The \code{\link{ENAplot}} provided to the function, with its plot updated to include the trajectories
#####
ena.plot.trajectory = function(
  enaplot,
  points,
  by = NULL,
  labels = NULL, #unique(enaplot$enaset$enadata$units),
  labels.show = c("Always","Hover","Both"),
  names = NULL,
  label.offset = NULL,
  label.font.size = enaplot$get("font.size"),
  label.font.color = enaplot$get("font.color"),
  label.font.family = c("Arial", "Courier New", "Times New Roman"),
  shape = c("circle", "square", "triangle-up", "diamond"),
  colors = NULL,
  default.hidden = F
) {
  if(!is.character(label.font.family)) {
    label.font.size = enaplot$get("font.family");
  }
  labels.show <- match.arg(labels.show);
  shape <- match.arg(shape);

  if(is.null(by)) {
    by <- list(all = rep(T, nrow(points)));
  }
  if(!is(points, "data.table")) {
    points <- data.table::as.data.table(points);
  }
  if(length(colors) == 1)
    colors <- rep(colors, length(names))

  mode <- "lines+markers+text";
  hoverinfo <- "x+y";
  tbl <- data.table::data.table(points);
  if (!is.null(labels)) {
    if (labels.show %in% c("Always","Both"))
      mode <- paste0(mode,"+text");
    if (labels.show %in% c("Hover","Both"))
      hoverinfo <- paste0(hoverinfo,"+text");

    tbl = data.table::data.table(points, labels = labels);
  }

  if(!is.null(by)) {
    if(is.character(by) && length(by) == nrow(tbl))
        by <- as.factor(by)

    dfdt_trajs <- tbl[,{ data.table::data.table(lines = list(.SD))  }, by = by]
  } else {
    dfdt_trajs <- tbl[,{ data.table::data.table(lines = list(.SD))  }]
  }

  valid_label_offsets = c("top left","top center","top right","middle left",
              "middle center","middle right","bottom left","bottom center",
              "bottom right")
  if(!all(label.offset %in% valid_label_offsets))
    stop(sprintf( "Unrecognized label.offsets: %s",
      paste(unique(label.offset[!(label.offset %in% valid_label_offsets)]),
      collapse = ", ") ))

  if(length(label.offset) == 1)
    label.offset = rep(label.offset, nrow(dfdt_trajs))

  if (!is.null(colors) &&
      length(colors) > 1 && length(colors) != length(names)
  ) {
    stop("Length of the colors must be 1 or the same length as by")
  }

  for (x in 1:nrow(dfdt_trajs)) {
    d <- remove_meta_data(dfdt_trajs[x,]$lines[[1]])
    d.names <- colnames(d)
    enaplot$plot = plotly::add_trace(
      enaplot$plot,
      data = d,
      x = as.formula(paste0("~", d.names[1])),
      y = as.formula(paste0("~", d.names[2])),
      name = names[x],
      mode = mode,
      text = dfdt_trajs[x,]$lines[[1]]$labels,
      textposition = label.offset[x],
      hoverinfo = hoverinfo,
      showlegend = T,
      line = list (
        color = if(!is.null(colors)) colors[x] else NULL
      ),
      marker = list (
        symbol = shape
        ,color = if(!is.null(colors)) colors[x] else NULL
      ),
      textfont = list (
        family = label.font.family,
        size = label.font.size,
        color = label.font.color
      ),
      visible = ifelse(default.hidden, "legendonly", T)
    );
  }

  enaplot$plotted$trajectories[[
    length(enaplot$plotted$trajectories) + 1
  ]] <- dfdt_trajs

  return(enaplot);
}

#' @title hENA rotation for ENA
#'
#' @description hENA rotation function.
#'
#' @param enaset ena set
#' @param params list of parameters
#'
#' @return ena set
#' @export
ena.rotation.h <- function(
  enaset,
  params
) {
  # check arguments
    if ( !is.list(params) || is.null(params$x_var) ) {
      stop("params must be provided as a list() and provide `x_var`")
    }
    x_var = params$x_var;
    y_var = params$y_var;
    control_vars = params$control_vars;
    centering = ifelse(!is.null(params$centering), params$centering, TRUE);
    include_xy = ifelse(!is.null(params$include_xy), params$include_xy, FALSE);
    formula = params$formula;

  # get centered data
    if (!is.null(enaset$model$points.for.projection)) {
      data = data.table::copy(enaset$model$points.for.projection)
    }
    else {
      data = data.table::copy(enaset$points.normed.centered)
    }

  # Prep
    value_vars = colnames(as.matrix(data))
    data.table::set(x = data, j = value_vars, value = data[, lapply(.SD, function(x) x - mean(x)), .SDcols = value_vars])

  # dummy code x_var
    if (!is.numeric(data[[x_var]])) {
      x_var_f = paste0(x_var,"_f")
      data[[x_var_f]] = data.table::rleidv(x = data, cols = x_var) - 1
      x_var = x_var_f;
    }

  # dummy code y_var
    if (!is.null(y_var) && !is.numeric(data[[y_var]])) {
      y_var_f = paste0(y_var,"_f")
      data[[y_var_f]] = data.table::rleidv(x = data, cols = y_var) - 1
      y_var = y_var_f;
    }
    both_vars = c(x_var, y_var)

  # centering x_var and y_var
    if ( centering ) {
      data[, c(both_vars) := lapply(.SD, function(x) x - mean(x)), .SDcols = c(both_vars)]
    }

  # prepare regression formula
    f = paste(c(both_vars, control_vars), collapse = " + ")

    if ( include_xy ) {
      xy_var = paste(both_vars, collapse = "_");
      data[[xy_var]] = data[[x_var]] * data[[y_var]];
      f = paste(c(f, xy_var), collapse = " + ");
    }

    if (!is.null(formula)) {
      f = formula;
    }

  # run regression models and get slope variables
    v = matrix(sapply(value_vars, function(v) {
      formula = as.formula(paste0("data$`", v, "` ~ ", f));
      lm(formula, data = data)$coefficients[seq_along(both_vars) + 1];
    }), ncol = length(both_vars), byrow = TRUE)

  # Prep deflation
    R = NULL;
    '..value_vars' = NULL;
    A = as.matrix(data[, ..value_vars]);

  # Normalize x rotation vector
    v1 = v[, 1, drop = FALSE];
    norm_v1 = sqrt(sum( v1 * v1 ));
    if (norm_v1 != 0) {
      v1 = v1 / norm_v1;
      R = v1;
    }
    defA = as.matrix(A) - as.matrix(A) %*% v1 %*% t(v1);

  # Normalize y rotation vector, if applicable
    v2 = NULL;
    if (!is.null(y_var)) {
      v2 = v[, 2]
      v2 = as.numeric(v2) - as.numeric(t(v2) %*% v1) * v1;
      norm_v2 = sqrt(sum( v2 * v2 ));

      if (norm_v2 != 0) {
        v2 = v2 / norm_v2;
        if( is.null(R) ) {
          R = matrix(c(v2), ncol = 1)
        }
        else {
          R = matrix(c(R, v2), ncol = 2)
        }
      }

      defA = defA - defA %*% v2 %*% t(v2);
    }

  # get svd for deflated points
    # svd_result = svd(defA)
    # svd_v = svd_result$v;
    svd_result = prcomp(defA, retx=FALSE, scale=FALSE, center=FALSE, tol=0)
    svd_v = svd_result$rotation

  # Merge rotation vectors
    vcount = ncol(R);
    combined = cbind(R, svd_v[, 1:(ncol(svd_v) - vcount)]);

    colnames(combined) = c(
      paste(c("x","y")[seq_len(vcount)], both_vars[seq_len(vcount)], sep = "_"),
      paste0("SVD", ((vcount + 1):ncol(combined)))
    );

  # put into ENARotationSet
    # browser()
    rotation_set <- ENARotationSet$new(
      node.positions = NULL,
      rotation = combined,
      codes = enaset$rotation$codes,
      eigenvalues = svd_result$sdev ^ 2
    )

  # Done
    return(rotation_set)
}

plot_nodes <- function(...) {
  enaplot$plot <- plotly::add_trace(
    enaplot$plot,
    type = "scatter",
    data = nodes,
    x = ~X1,
    y = ~X2,
    mode = mode,
    textposition = label.offset[rows.to.keep],
    marker = list(
      color = "#000000",
      size = abs(nodes$weight),
      line = list(
        width = 0
      )
      #,name = labels[i] #rownames(nodes)[i]
    ),
    textfont = list (
      family = label.font.family,
      size = label.font.size,
      color = label.font.color
    ),
    text = labels[rows.to.keep], #rownames(nodes),
    legendgroup = legend.name,
    name = legend.name,
    showlegend = show.legend,
    hoverinfo = 'none'
  );

  return(enaplot$plot);
}

plot_edges <- function(...) {
  if (length(network.edges.shapes) > 0 ) {
    enaplot$plotted$networks[[length(enaplot$plotted$networks) + 1]] <- network.edges.shapes

    for (n in 1:length(network.edges.shapes)) {
      e = network.edges.shapes[[n]];

      name = NULL;
      show.legend = F;
      this.name = paste(e$nodes[1],e$nodes[2], sep=".")
      if(legend.include.edges) {
        name = this.name;
        show.legend = T;
      }

      enaplot$plot = plotly::add_trace(
        enaplot$plot,
        type = "scatter",
        mode = "lines",
        data = data.frame(X1=c(e$x0,e$x1), X2=c(e$y0,e$y1)),
        x = ~X1, y = ~X2,
        line = e$line,
        opacity = e$opacity,
        legendgroup = if(legend.include.edges == T) this.name else legend.name,
        showlegend = show.legend,
        name = name
      )
    }
  }

  return(enaplot$plot);
}

##
#' @title Plot an ENA network
#'
#' @description Plot an ENA network: nodes and edges
#'
#' @details lots a network graph, including nodes (taken from codes in the ENAplot) and the edges (provided in network)
#'
#' @export
#'
#' @param enaplot \code{\link{ENAplot}} object to use for plotting
#' @param network dataframe or matrix containing the edge weights for the network graph; typically comes from ENAset$line.weights
#' @param node.positions matrix containing the positiions of the nodes. Defaults to enaplot$enaset$node.positions
#' @param adjacency.key matrix containing the adjacency key for looking up the names and positions
#' @param colors A String or vector of colors for positive and negative line weights. E.g. red or c(pos= red, neg = blue), default: c(pos= red, neg = blue)
#' @param edge_type A String representing the type of line to draw, either "line", "dash", or "dot"
#' @param show.all.nodes A Logical variable, default: true
#' @param threshold A vector of numeric min/max values, default: c(0,Inf) plotting . Edge weights below the min value will not be displayed; edge weights above the max value will be shown at the max value.
#' @param thin.lines.in.front A logical, default: true
#' @param layers ordering of layers, default: c("nodes", "edges")
#' @param thickness A vector of numeric min/max values for thickness, default:  c(min(abs(network)), max(abs(network)))
#' @param opacity A vector of numeric min/max values for opacity, default: thickness
#' @param saturation A vector of numeric min/max values for saturation, default: thickness
#' @param scale.range A vector of numeric min/max to scale from, default: c(0.1,1) or if min(network) is 0, c(0,1)
#' @param node.size A lower and upper bound used for scaling the size of the nodes, default c(0, 20)
#' @param labels A character vector of node labels, default: code names
#' @param label.offset A character vector of representing the positional offset relative to the respective node. Defaults to "middle right" for all nodes. If a single values is provided, it is used for all positions, else the length of the
#' @param label.font.size An integer which determines the font size for graph labels, default: enaplot$font.size
#' @param label.font.color A character which determines the color of label font, default: enaplot$font.color
#' @param label.font.family A character which determines font type, choices: Arial, Courier New, Times New Roman, default: enaplot$font.family
#' @param legend.name A character name used in the plot legend. Not included in legend when NULL (Default), if legend.include.edges is TRUE will always be "Nodes"
#' @param legend.include.edges Logical value indicating if the edge names should be included in the plot legend. Forces legend.name to be "Nodes"
#' @param scale.weights Logical indicating to scale the supplied network
#' @param ... Additional parameters
#'
#' @seealso \code{\link{ena.plot}}, \code{\link{ena.plot.points}}
#' @importFrom scales rescale

#' @examples
#' data(RS.data)
#'
#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
#'   'Client.and.Consultant.Requests','Design.Reasoning','Collaboration');
#'
#' accum = ena.accumulate.data(
#'   units = RS.data[,c("UserName","Condition")],
#'   conversation = RS.data[,c("Condition","GroupName")],
#'   metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre","CONFIDENCE.Post")],
#'   codes = RS.data[,codeNames],
#'   window.size.back = 4
#' )
#'
#' set = ena.make.set(
#'   enadata = accum,
#'   rotation.by = ena.rotate.by.mean,
#'   rotation.params = list(
#'     accum$meta.data$Condition=="FirstGame",
#'     accum$meta.data$Condition=="SecondGame"
#'   )
#' )
#'
#' plot = ena.plot(set)
#'
#' ### Subset rotated points and plot Condition 1 Group Mean
#' as.matrix(set$points$Condition$FirstGame)
#'
#' first.game.points = as.matrix(set$points$Condition$FirstGame)
#' plot = ena.plot.group(plot, first.game.points, labels = "FirstGame",
#'     colors = "red", confidence.interval = "box")
#'
#' ### Subset rotated points and plot Condition 2 Group Mean
#' second.game.points = as.matrix(set$points$Condition$SecondGame)
#' plot = ena.plot.group(plot, second.game.points, labels = "SecondGame",
#'     colors  = "blue", confidence.interval = "box")
#'
#' ### get mean network plots
#' first.game.lineweights = as.matrix(set$line.weights$Condition$FirstGame)
#' first.game.mean = colMeans(first.game.lineweights)
#'
#' second.game.lineweights = as.matrix(set$line.weights$Condition$SecondGame)
#' second.game.mean = colMeans(second.game.lineweights)
#'
#' subtracted.network = first.game.mean - second.game.mean
#' plot = ena.plot.network(plot, network = subtracted.network)
#' print(plot)
#'
#' @return The \code{\link{ENAplot}} provided to the function, with its plot updated to include the nodes and provided connecting lines.
##
ena.plot.network = function(
  enaplot = NULL,
  network = NULL,
  node.positions = enaplot$enaset$rotation$nodes,
  adjacency.key = NULL, #enaplot$enaset$enadata$adjacency.matrix,
  colors = c(pos=enaplot$palette[1], enaplot$palette[2]),
  edge_type = "line", #c("line", "dash", "dot"),
  show.all.nodes = T,
  threshold = c(0),
  thin.lines.in.front = T,
  layers = c("nodes", "edges"),

  thickness = c(min(abs(network)), max(abs(network))),
  opacity = thickness,
  saturation = thickness,
  scale.range = c(ifelse(min(network)==0, 0, 0.1), 1),

  node.size = c(3,10),

  labels = NULL,
  label.offset = "middle right",
  label.font.size = enaplot$get("font.size"),
  label.font.color = enaplot$get("font.color"),
  label.font.family = enaplot$get("font.family"),
  legend.name = NULL,
  legend.include.edges = F,
  scale.weights = F,
  ...
) {
  expected_codes <- choose(nrow(node.positions), 2)
  if(expected_codes != length(network)) {
    # browser()
    if(is.data.frame(network) && ncol(as.matrix(network)) == expected_codes) {
      network = as.vector(as.matrix(network))
    }
    else {
      stop(paste0("Network vector needs to be of length ", choose(nrow(node.positions), 2)))
    }
  }
  node.rows <- NULL
  if(is(node.positions, "ena.nodes")) {
    if(is.null(adjacency.key)) {
      adjacency.key <- namesToAdjacencyKey(node.positions$code)
    }
    node.rows <- node.positions$code

    if(is.null(labels)) {
      labels <- node.positions$code
    }
  }
  else {
    if(is.matrix(node.positions)) {
      node.positions <- as.data.frame(node.positions)
    }
    adjacency.key <- namesToAdjacencyKey(rownames(node.positions))
    node.rows <- rownames(node.positions)
    if(is.null(labels)) {
      labels  <- rownames(node.positions)
    }
  }
  args = list(...);
  network.edges.shapes = list();
  edge_type = match.arg(arg = edge_type, choices = c("line", "dash", "dot"));

  nodes = data.frame(as.matrix(node.positions));
  colnames(nodes) = paste0("X", seq(colnames(nodes)))
  nodes$weight = rep(0, nrow(nodes))
  nodes$color = "black";

  # Handle label parameters
  if(length(label.offset) == 1) {
    label.offset = rep(label.offset[1], length(labels))
  }
  if(length(label.offset) != length(labels)) {
    stop("length(label.offset) must be equal to 1 or length(labels)")
  }

  # Handle legend parameters
  if(legend.include.edges == T && !is.null(legend.name)) {
    legend.name = "Nodes"
  }

  network.scaled = network;
  if(!is.null(threshold)) {
    multiplier.mask = ((network.scaled >= 0) * 1) - ((network.scaled < 0) * 1)
    if(length(threshold) == 1) {
      threshold[2] = Inf;
    }
    else if(threshold[2] < threshold[1]) {
      stop("Minimum threshold value must be less than the maximum value.");
    }

    if(threshold[1] > 0) {
      # network.scaled = network.scaled[sizes > threshold[1]]
      network.scaled[abs(network.scaled) < threshold[1]] = 0
    }
    if(threshold[2] < Inf && any(abs(network.scaled) > threshold[2]))  {
      to.threshold = abs(network.scaled) > threshold[2]
      network.scaled[to.threshold] = threshold[2]
      network.scaled[to.threshold] = network.scaled[to.threshold] * multiplier.mask[to.threshold]
    }
  }
  network.thickness = abs(network.scaled);
  network.saturation = abs(network.scaled);
  network.opacity = abs(network.scaled);

  network.to.keep = (network != 0) * 1
  if(scale.weights == T) {
    network.scaled = network * (1 / max(abs(network)));
    network.thickness = scales::rescale(x = abs(network.scaled), to = scale.range, from = thickness);
  }
  network.scaled = network.scaled * network.to.keep
  network.thickness = network.thickness * network.to.keep

  network.saturation = scales::rescale(x = abs(network.scaled), to = scale.range, from = saturation);
  network.opacity = scales::rescale(x = abs(network.scaled), to = scale.range, from = opacity);

  pos.inds = as.numeric(which(network.scaled >=0));
  neg.inds = as.numeric(which(network.scaled < 0));

  colors.hsv = rgb2hsv(col2rgb(colors))

  if(ncol(colors.hsv) == 1) {
    colors.hsv[[4]] = colors.hsv[1] + 0.5;
    if(colors.hsv[4] > 1) {
      colors.hsv[4] = colors.hsv[4] - 1;
    }

    colors.hsv[[5]] = colors.hsv[2];
    colors.hsv[[6]] = colors.hsv[3];
    dim(colors.hsv) = c(3,2);
  }

  mat = as.matrix(adjacency.key);
  for (i in 1:length(network)) {
    v0 <- nodes[node.rows==mat[1,i], ];
    v1 <- nodes[node.rows==mat[2,i], ];
    nodes[node.rows==mat[1,i],]$weight = nodes[node.rows==mat[1,i],]$weight + abs(network.thickness[i]);
    nodes[node.rows==mat[2,i],]$weight = nodes[node.rows==mat[2,i],]$weight + abs(network.thickness[i]);

    color = NULL
    if(i %in% pos.inds) {
      color = colors.hsv[,1];
    } else {
      color = colors.hsv[,2];
    }
    color[2] = network.saturation[i];

    edge_shape = list(
      type = "line",
      opacity = network.opacity[i],
      nodes = c(mat[,i]),
      line = list(
        name = "test",
        color= hsv(color[1],color[2],color[3]),
        width= abs(network.thickness[i]) * enaplot$get("multiplier"),
        dash = edge_type
      ),
      x0 = as.numeric(v0[1]),
      y0 = as.numeric(v0[2]),
      x1 = as.numeric(v1[1]),
      y1 = as.numeric(v1[2]),
      layer = "below",
      size = as.numeric(abs(network.scaled[i]))
    );
    network.edges.shapes[[i]] = edge_shape
  };

  if(thin.lines.in.front) {
    network.edges.shapes = network.edges.shapes[rev(order(sapply(network.edges.shapes, "[[", "size")))]
  }
  else {
    network.edges.shapes = network.edges.shapes[order(sapply(network.edges.shapes, "[[", "size"))]
  }

  rows.to.keep = rep(T, nrow(nodes))
  if(show.all.nodes == F) {
    rows.to.keep = nodes$weight != 0
    # nodes = nodes[rownames(nodes) %in% unique(as.character(sapply(network.edges.shapes, "[[", "nodes"))), ]
  }
  nodes = nodes[rows.to.keep,];
  mode = "markers+text"
  if(!is.null(args$labels.hide) && args$labels.hide == T) {
    mode="markers"
  }
  if( any(nodes$weight > 0)) {
    nodes$weight = scales::rescale((nodes$weight * (1 / max(abs(nodes$weight)))), node.size) # * enaplot$get("multiplier"));
  }
  else {
    nodes$weight = node.size[2]
  }

  show.legend = !is.null(legend.name);
  if(legend.include.edges) {
    if(is.null(legend.name)) {
      legend.name = "Nodes"
    }
    show.legend = T;
  }

  # browser()
  environment(plot_nodes) <- environment()
  environment(plot_edges) <- environment()

  for(layer in layers) {
    enaplot$plot <- do.call(what = paste0("plot_", layer), args = list())
  }

  enaplot
}

###
#' @title ENA Rotate by generalized means rotation (GMR)
#'
#' @description Computes a dimensional reduction from a matrix of ENA points
#'   such that the first dimension best represents the contribution of a target
#'   variable after controlling for covariates via Lasso. An optional second
#'   GMR axis can be computed for \code{y_var}; remaining dimensions are filled
#'   by SVD of the doubly-deflated space. Delegates to
#'   \code{\link[libqe]{generalized_means_rotation}}.
#'
#' @param enaset An \code{\link{ENAset}} or compatible list with
#'   \code{model$points.for.projection} (or \code{points.normed.centered}),
#'   \code{line.weights}, and \code{rotation$codes}.
#' @param params A list with the following named elements:
#'   \describe{
#'     \item{\code{x_var}}{Required. A \code{data.frame} (or character vector of
#'       column names in \code{enaset$meta.data}) whose first column is the
#'       target variable. Additional columns are treated as covariates and
#'       penalized via Lasso.}
#'     \item{\code{y_var}}{Optional. Same format as \code{x_var}. When provided
#'       a second GMR axis is computed.}
#'     \item{\code{select_2_groups}}{Optional length-2 list/vector of group
#'       labels. When given, the GMR fit for the x axis uses only rows whose
#'       target value is in these two groups. The group mean difference for x1
#'       (the secondary axis that keeps group means on the x-axis) is always
#'       computed from the full data.}
#'     \item{\code{interactions}}{Logical; if \code{TRUE} (default) pairwise
#'       interaction terms are added to the model matrix when covariates are
#'       present. Set \code{FALSE} for main-effects-only Lasso.}
#'   }
#'
#' @importFrom libqe generalized_means_rotation
#' @importFrom stats model.matrix as.formula
#' @export
#' @return A list with \code{rotation} (q x q matrix, column names GMR1,
#'   GMR2|SVD2, SVD3, …), \code{codes}, \code{eigenvalues}, and
#'   \code{node.positions = NULL}, suitable for use inside \code{rotate()}.
###
ena.rotate.by.generalized <- function(enaset, params) {

  ## ── Input validation ────────────────────────────────────────────────────────
  if (!is.list(params) || is.null(params$x_var)) {
    stop("params must be provided as a list() and provide `x_var`")
  }

  ## ── Resolve x_var → data.frame ──────────────────────────────────────────────
  if (!is.data.frame(params$x_var)) {
    if (all(params$x_var %in% colnames(enaset$meta.data))) {
      x <- enaset$meta.data[, params$x_var, with = FALSE]
    } else {
      stop(paste("x_var incorrect:", paste(params$x_var, collapse = ", ")))
    }
  } else {
    x <- params$x_var
  }

  ## ── ENA point matrix ────────────────────────────────────────────────────────
  V <- if (!is.null(enaset$points.normed.centered))
         as.matrix(enaset$points.normed.centered)
       else
         as.matrix(enaset$model$points.for.projection)

  ## ── Target variable & encoding ──────────────────────────────────────────────
  ## For categorical targets, encode as 0-based integer codes.
  ## When select_2_groups is provided, the two selected groups are encoded as
  ## 0 and 1 (required by the C++ x1 computation, which uses labels == 0/1).
  target_full   <- as.vector(x[[1]])
  x_categorical <- !is.numeric(target_full)

  if (x_categorical) {
    grp <- params$select_2_groups
    if (!is.null(grp) && length(grp) == 2) {
      all_levels <- c(grp[[1]], grp[[2]],
                      setdiff(unique(target_full), c(grp[[1]], grp[[2]])))
    } else {
      all_levels <- unique(target_full)
    }
    x_target_enc <- as.numeric(factor(target_full, levels = all_levels)) - 1.0
    x_n_groups   <- as.integer(length(all_levels))
  } else {
    x_target_enc <- as.numeric(target_full)
    x_n_groups   <- 0L
  }

  ## ── Row subset (select_2_groups → 0-based integer indices) ──────────────────
  if (!is.null(params$select_2_groups) && length(params$select_2_groups) == 2) {
    subset_rows <- which(target_full %in% params$select_2_groups)
    if (length(subset_rows) < 2L) {
      warning("select_2_groups produced < 2 matching rows; using all rows")
      x_subset <- integer(0)
    } else {
      x_subset <- as.integer(subset_rows - 1L)
    }
  } else {
    x_subset <- integer(0)
  }

  ## ── Model matrix for x ──────────────────────────────────────────────────────
  ## Interaction terms are included by default when covariates are present.
  interactions <- isTRUE(if (!is.null(params$interactions)) params$interactions else TRUE)
  fstr_x <- if (ncol(x) > 1L && interactions) "~ .^2" else "~ ."
  mm_x   <- model.matrix(as.formula(fstr_x), data = x)[, -1L, drop = FALSE]

  ## x1_cols (0-based): columns in mm_x that belong to the target variable
  ## (main-effect columns only; interaction columns stay penalized)
  x1_name  <- colnames(x)[1L]
  safe_x1  <- gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\\1", x1_name)
  x1_regex <- paste0("^", safe_x1, "[^:]*$")
  x1_cols  <- as.integer(grep(x1_regex, colnames(mm_x)) - 1L)
  if (length(x1_cols) == 0L) x1_cols <- 0L  # guard: treat first col as target

  ## ── Y axis ──────────────────────────────────────────────────────────────────
  has_y <- !is.null(params$y_var)

  if (has_y) {
    if (!is.data.frame(params$y_var)) {
      if (all(params$y_var %in% colnames(enaset$meta.data))) {
        y <- enaset$meta.data[, params$y_var, with = FALSE]
      } else {
        stop("y_var must be a data.frame or a column name in enaset$meta.data")
      }
    } else {
      y <- params$y_var
    }
    y_target_raw  <- as.vector(y[[1]])
    y_categorical <- !is.numeric(y_target_raw)
    if (y_categorical) {
      y_levels     <- unique(y_target_raw)
      y_target_enc <- as.numeric(factor(y_target_raw, levels = y_levels)) - 1.0
      y_n_groups   <- as.integer(length(y_levels))
    } else {
      y_target_enc <- as.numeric(y_target_raw)
      y_n_groups   <- 0L
    }
    fstr_y  <- if (ncol(y) > 1L && interactions) "~ .^2" else "~ ."
    mm_y    <- model.matrix(as.formula(fstr_y), data = y)[, -1L, drop = FALSE]
    y1_name  <- colnames(y)[1L]
    safe_y1  <- gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\\1", y1_name)
    y1_regex <- paste0("^", safe_y1, "[^:]*$")
    y1_cols  <- as.integer(grep(y1_regex, colnames(mm_y)) - 1L)
    if (length(y1_cols) == 0L) y1_cols <- 0L
  } else {
    ## Dummy y params — passed but ignored by the C++ when has_y = FALSE
    mm_y          <- matrix(0.0, nrow(V), 1L)
    y_target_enc  <- numeric(nrow(V))
    y1_cols       <- 0L
    y_categorical <- FALSE
    y_n_groups    <- 0L
  }

  ## ── Delegate to libqe ───────────────────────────────────────────────────────
  result <- libqe::generalized_means_rotation(
    V              = V,
    x_model_matrix = mm_x,
    x_target       = x_target_enc,
    x1_cols        = x1_cols,
    x_categorical  = x_categorical,
    x_n_groups     = x_n_groups,
    x_subset       = x_subset,
    has_y          = has_y,
    y_model_matrix = mm_y,
    y_target       = y_target_enc,
    y1_cols        = y1_cols,
    y_categorical  = y_categorical,
    y_n_groups     = y_n_groups
  )

  ## ── Assemble rotation matrix ─────────────────────────────────────────────────
  rotation <- result$rotation
  colnames(rotation) <- result$column_names
  rownames(rotation) <- colnames(as.matrix(enaset$line.weights))

  list(
    node.positions = NULL,
    rotation       = rotation,
    codes          = enaset$rotation$codes,
    eigenvalues    = result$eigenvalues
  )
}

#' Find metadata columns
#'
#' @param x data.table (or frame) to search for columns of class ena.metadata
#'
#' @return logical vector
#' @export
find_meta_cols <- function(x) {
   sapply(x, is, class2 = "ena.metadata")
}

#' Find code columns
#'
#' @param x data.table (or frame) to search for columns of class ena.co.occurrence
#'
#' @return logical vector
#' @export
find_code_cols <- function(x) {
   grepl("adjacency.code", x = names(x)) | sapply(x, function(col) {
     is(col, class2 = "ena.co.occurrence")
   })
}

#' Find Binary Columns
#'
#' Identifies columns in a data.frame or data.table that are binary (i.e., contain only two unique values), optionally including logical columns.
#'
#' @param x A data.frame or data.table to search for binary columns.
#' @param include_logical Logical. If TRUE, logical columns are also considered binary. Default is FALSE.
#'
#' @return A character vector of column names that are binary, or NULL if none are found.
#' @export
#'
#' @examples
#' df <- data.frame(a = c(0, 1, 1), b = c(TRUE, FALSE, TRUE), c = c(1, 2, 3))
#' find_binary_cols(df)
#' find_binary_cols(df, include_logical = TRUE)
find_binary_cols <- function(x, include_logical = FALSE) {
   nm <- colnames(x)[sapply(x, is_binary_col, include_logical)];
   if(length(nm) > 0) nm else NULL;
}

#' Find dimension columns
#'
#' @param x data.table (or frame) to search for columns of class ena.dimension
#'
#' @return logical vector
#' @export
find_dimension_cols <- function(x) {
   sapply(x, is, class2 = "ena.dimension")
}

#' Remove meta columns from data.table
#'
#' @param x [TBD]
#'
#' @return data.table withe columns of class ena.meta.data removed
#' @export
remove_meta_data <- function(x) {
   as.data.frame(x)[, !find_meta_cols(x), drop = F]
}

#' Extract metadata easily
#'
#' @param x [TBD]
#' @param i [TBD]
#'
#' @return [TBD]
#' @export
"$.ena.metadata" <- function(x, i) {
   #browser()
   parts <- unlist(strsplit(
               x = as.character(sys.call())[2], split = "\\$"
            ))[1:2]

   set <- get(parts[1], envir = parent.frame())
   m <- set[[parts[2]]][x == i, ]
   m
}

#' Extract line.weignts easily
#'
#' @param x [TBD]
#' @param i [TBD]
#'
#' @return [TBD]
#' @export
"$.line.weights" <- function (x, i) {
   vals <- x[[which(colnames(x) == i)]]

   vals
}

#' Extract points easily
#'
#' @param x [TBD]
#' @param i [TBD]
#'
#' @return [TBD]
#' @export
"$.ena.points" <- function (x, i) {
   vals <- x[[which(colnames(x) == i)]]

   vals
}

#' Extract from ena.matrix easily using metadata
#'
#' @param x [TBD]
#' @param i [TBD]
#'
#' @return [TBD]
#' @export
"$.ena.matrix" <- function (x, i) {
   vals <- x[[which(colnames(x) == i)]]

   vals
}

#' Multiply ena.matrix objects
#' Element-wise multiplication of dimension columns in an ena.matrix by another ena.matrix or numeric matrix.
#' If e2 is an ena.matrix, it is converted to a standard matrix before multiplication.
#' The multiplication is applied only to the dimension columns of e1, while other columns remain
#' unchanged.
#' 
#' @param e1 An ena.matrix object whose dimension columns will be multiplied.
#' @param e2 An ena.matrix or numeric matrix to multiply with the dimension columns of
#' e1.
#' 
#' @return An ena.matrix object with the dimension columns of e1 multiplied by e2.
#' @exportS3Method "*" ena.matrix
"*.ena.matrix" <- function (e1, e2) {
   e2m <- e2
   if(is(e2, "ena.matrix")) {
      e2m <- as.matrix(e2)
   }
   
   dim_cols <- colnames(e1)[find_dimension_cols(e1)]
   e1[, (dim_cols) := Map(function(col, mult) col * mult, .SD, as.data.frame(e2m)), .SDcols = dim_cols]
}

# "$.ena.plot" <- function(x, i) {
#  browser()
# }
# "[[.ena.plot" <- function(x, i) {
#  browser()
# }
#' @export
.DollarNames.ena.metadata <- function(x, pattern = "") {
   unique(x)
}

# "[.ena.matrix" = function(x, ...)
# {
#    browser()
#    original.class = class(x)[1]
#    class(x) = class(x)[-1]
#    x = x[...]
#
# #   y = as.data.frame(x)
# }

#' @export
summary.ena.set <- function(object, ...) {
   x <- object
   print_dims <- function(n = 2) {
      cat("\t", paste("Dimension", 1:n, collapse = "\t"), "\n")
   }
   cat("Units: ", nrow(x$points), "\t\t")
   cat("Codes: ", length(x$rotation$codes), "\n")

   cat("Variance: \n")
   print_dims()
   cat("\t", paste(round(x$model$variance[1:2], 3), collapse = "\t\t"), "\n\n")

   cat("Eigenvalues: \n")
   print_dims()
   cat("\t", paste(round(
      x$rotation$eigenvalues[1:2], 3), collapse = "\t\t"), "\n\n")

   cat("Correlations: \n")
   cors <- ena.correlations(x)
   rownames(cors) <- paste("Dimension", 1:2)
   print(cors)
}
# as.data.frame.ena.connections <- function(x) {
#   class(x) = class(x)[-1]
#   y = as.data.frame(x)
#   y
# }
# format.co.occurrence = format.metadata = function(x, justify = "none") {
#   y = as.character(x)
#   format(y, justify = justify)
# }

#' Title
#'
#' @param x [TBD]
#' @param ... [TBD]
#' @param plot [TBD]
#' @param set [TBD]
#'
#' @return [TBD]
#' @export
print.ena.set <- function(x, ..., plot = FALSE, set = TRUE) {
   x.unclass <- unclass(x)

   if(
      !is.null(x.unclass$`_plot_op`) &&
      x.unclass$`_plot_op` == T
   ) {
      base::print(x.unclass$plots)
   }
   else {
      if(plot == FALSE) {
         x.unclass$plots <- NULL
      }
      base::print(x.unclass)
   }

   invisible(x);
}

#' Title
#'
#' @param x [TBD]
#' @param by [TBD]
#' @param model [TBD]
#' @param ... [TBD]
#'
#' @return [TBD]
#' @export
as_trajectory <- function(x,
   by = x$`_function.params`$conversation[1],
   model = c("AccumulatedTrajectory", "SeperateTrajectory"),
   ...
) {
   model = match.arg(model)
   orig_args = x$`_function.params`
   orig_args$model = model

   more_args <- list(...)
   for(arg in names(more_args)) {
      orig_args[[arg]] <- more_args[[arg]]
   }
   #c(mean, more.args[!names(more.args) %in% names(mean)])

   do.call(ena, orig_args)
}

#' Title
#'
#' @param x [TBD]
#' @param by [TBD]
#' @param ... [TBD]
#'
#' @return [TBD]
#' @export
project_in <- function(x, by = NULL, ...) {
   if(is.null(by)) {
      stop("A second parameter (ena.set or rotation.set) is required")
   }

   rotation.set <- NULL
   if(is(by, "ena.set")) {
      rotation.set <- by$rotation
   } else if(is(by, "ena.rotation.set")) {
      rotation.set <- by
   }

   if(!identical(x$rotation$adjacency.key, rotation.set$adjacency.key)) {
      stop("Rotation sets must have identical adjacency keys")
   }

   x$rotation.matrix <- rotation.set$rotation.matrix
   x$rotation$rotation.matrix <- rotation.set$rotation.matrix
   x$rotation$nodes <- rotation.set$nodes;
   x$rotation$eigenvalues <- rotation.set$eigenvalues

   points <- as.matrix(x$model$points.for.projection) %*% as.matrix(x$rotation.matrix)
   points.dt <- as.data.table(points)
   for (i in seq(ncol(points.dt))) {
    set(points.dt, j = i, value = as.ena.dimension(points.dt[[i]]))
   }
   if(grepl(x = x$model$model.type, pattern = "Trajectory")) {
    x$points <- cbind(x$trajectories, points.dt)
   } else {
    x$points <- cbind(x$meta.data, points.dt)
   }
   x$points <- as.ena.matrix(x$points, "ena.points")

   .return(x, invisible = T)
}

#' Title
#'
#' @param x [TBD]
#' @param on [TBD]
#'
#' @return [TBD]
#' @export
means_rotate <- function(x, on = NULL) {
   groupVar = NULL
   groups = NULL
   if(is.null(on)) {
      col_counts = as.numeric(x$model$raw.input[, lapply(.SD, function(s) {
                  length(unique(s))
               }),
               .SDcols = c(x$`_function.params`$units)
            ])
      groupVar = x$`_function.params`$units[order(col_counts) == 1]
      group_vars = unique(x$model$raw.input[[groupVar]])
      if(!is.null(levels(group_vars))) {
        groups = levels(group_vars)[1:2]
      }
      else {
        groups = group_vars[1:2]
      }
      # on_grps = list()
      # on_grps[[on]] = sapply(on_vals, function(v) {
      #    x$meta.data[[on]] == v
      # }, simplify = F)
   } else if(!is.null(names(on))) {
      groupVar = names(on)
      groups = on[[groupVar]]
   }

   if(is.null(groupVar) || is.null(groups)) {
      stop("Unable to determine groups for rotation.")
   }

   orig_args <- x$`_function.params`
   orig_args$groupVar = groupVar
   orig_args$groups = groups
   new_set <- do.call(ena, orig_args)
   new_set$plots <- x$plots
   invisible(new_set)
}

.return <- function(x, invisible = T, from_plot = F) {
   x$`_plot_op` = from_plot
   if(isTRUE(from_plot)) {
      
   }

   if(invisible == T) {
      invisible(x)
   }
   else {
      return(x)
   }
}

is_logical_col <- function(col) {
  n_cols = col == TRUE | col == FALSE;
  is_col <- is.logical(col) & all(n_cols);

  return(is_col);
}

is_binary_col <- function(col, include_logical = TRUE) {
  n_cols = col == 1 | col == 0;
  is_col <- is.numeric(col) && all(is.wholenumber(col) & all(n_cols));

  if(isTRUE(include_logical)) {
    is_col <- is_col | is_logical_col(col);
  }

  return(is_col);
}

is.wholenumber <- function(x, tol = .Machine$double.eps^0.5)  abs(x - round(x)) < tol


#' Extract points easily
#'
# @param x [TBD]
# @param i [TBD]
# @param j [TBD]
# @param ... Passed to `[.data.table`
# @param with.meta logical, currently defaults to TRUE, which includes the metadata columns.
#
# @return [TBD]
# @export
# "[.ena.matrix" <- function (x, i, j, by, keyby, ..., with.meta = TRUE) {
#   orig.class <- class(x)
#   x.unclass <- data.table::as.data.table(unclass(x))
#
#   if(with.meta == FALSE) {
#     x.nometa <- x.unclass[, !find_meta_cols(x.unclass), with = F]
#     x_ <- x.nometa[i, ..j, ...]
#   }
#   else {
#     x_ <- x.unclass[i, j, by = by, keyby = keyby, ...]
#     # if (!is.null(j)) {
#     #   x_ <- x_[, ..j]
#     # }
#   }
#   class(x_) <- orig.class
#   x_
# }


##
#' @title Accumulate data from a data frame into a set of adjacency (co-occurrence) vectors
#'
#' @description This function initializes an ENAdata object, processing conversations from coded data to generate adjacency (co-occurrence) vectors
#'
#' @details ENAData objects are created using this function. This accumulation receives
#' separate data frames for units, codes, conversation, and optionally, metadata. It
#' iterates through the data to create an adjacency (co-occurrence) vector corresponding
#' to each unit - or in a trajectory model multiple adjacency (co-occurrence) vectors for
#' each unit.
#'
#' In the default MovingStanzaWindow model, co-occurrences between codes are
#' calculated for each line k in the data between line k and the window.size.back-1 previous
#' lines and window.size.forward-1 subsequent lines in the same conversation as line k.
#'
#' In the Conversation model, co-occurrences between codes are calculated across all lines in
#' each conversation. Adjacency (co-occurrence) vectors are constructed for each unit u by
#' summing the co-occurrences for the lines that correspond to u.
#'
#' Options for how the data is accumulated are endpoint, which produces one adjacency (co-occurrence)
#' vector for each until summing the co-occurrences for all lines, and two trajectory models:
#' AccumulatedTrajectory and SeparateTrajectory. Trajectory models produce an adjacency
#' (co-occurrence) model for each conversation for each unit. In a SeparateTrajectory model,
#' each conversation is modeled as a separate network. In an AccumulatedTrajectory model, the
#' adjacency (co-occurrence) vector for the current conversation includes the co-occurrences
#' from all previous conversations in the data.
#'
#' @export
#'
#' @param units A data frame where the columns are the properties by which units will be identified
#' @param conversation A data frame where the columns are the properties by which conversations will be identified
#' @param codes A data frame where the columns are the codes used to create adjacency (co-occurrence) vectors
#' @param metadata (optional) A data frame with additional columns of metadata to be associated with each unit in the data
#' @param model A character, choices: EndPoint (or E), AccumulatedTrajectory (or A), or SeparateTrajectory (or S); default: EndPoint. Determines the ENA model to be constructed
#' @param weight.by (optional) A function to apply to values after accumulation
#' @param mask (optional) A binary matrix of size ncol(codes) x ncol(codes). 0s in the mask matrix row i column j indicates that co-occurrence will not be modeled between code i and code j
#' @param window A character, choices are Conversation (or C), MovingStanzaWindow (MSW, MS); default MovingStanzaWindow. Determines how stanzas are constructed, which defines how co-occurrences are modeled
#' @param window.size.back A positive integer, Inf, or character (INF or Infinite), default: 1. Determines, for each line in the data frame, the number of previous lines in a conversation to include in the stanza window, which defines how co-occurrences are modeled
#' @param window.size.forward (optional) A positive integer, Inf, or character (INF or Infinite), default: 0. Determines, for each line in the data frame, the number of subsequent lines in a conversation to include in the stanza window, which defines how co-occurrences are modeled
#' @param ... additional parameters addressed in inner function
#' @param include.meta Locigal indicating if unit metadata should be attached to the resulting ENAdata object, default is TRUE
#' @param as.list R6 objects will be deprecated, but if this is TRUE, the original R6 object will be returned, otherwise a list with class `ena.set`
#'
#' @seealso \code{\link{ENAdata}}, \code{\link{ena.make.set}}
#'
#' @return \code{\link{ENAdata}} object with data [adjacency (co-occurrence) vectors] accumulated from the provided data frames.
#'
##
ena.accumulate.data <- function(
  units = NULL,
  conversation = NULL,
  codes = NULL,
  metadata = NULL,
  model = c("EndPoint", "AccumulatedTrajectory", "SeparateTrajectory"),
  weight.by = "binary",
  window = c("MovingStanzaWindow", "Conversation"),
  window.size.back = 1,
  window.size.forward = 0,
  mask = NULL,
  include.meta = T,
  as.list = T,
  ...
) {
  if (is.null(units) || is.null(conversation) || is.null(codes)) {
    stop("Accumulation requires: units, conversation, and codes");
  }
  if (nrow(units) != nrow(conversation) || nrow(conversation) != nrow(codes)) {
    stop("Data Frames do not have the same number of rows");
  }

  df <- cbind(units, conversation);
  df <- cbind(df, codes);

  metadata <- data.table::as.data.table(metadata)
  if (!is.null(metadata) && nrow(metadata) == nrow(df)) {
    df <- cbind(df, metadata);
  }

  model <- match.arg(model)
  window <- match.arg(window)

  units.by <- colnames(units);
  conversations.by <- colnames(conversation);
  if (identical(window, "Conversation")) {
    conversations.by <- c(conversations.by, units.by);
    window.size.back <- window;
  }
  else if (identical(window, "MovingStanzaWindow")) {
    if( grepl(pattern = "inf", x = window.size.back, ignore.case = T)) {
      window.size.back <- Inf
    }
    if( grepl(pattern = "inf", x = window.size.forward, ignore.case = T)) {
      window.size.forward <- Inf
    }
  }

  data <- ENAdata$new(
    file = df,
    units = units,
    units.by = units.by,
    conversations.by = conversations.by,
    codes = codes,
    window.size.back = window.size.back,
    window.size.forward = window.size.forward,
    weight.by = weight.by,
    model = model,
    mask = mask,
    include.meta = include.meta,
    ...
  );
  data$process()

  data$function.call <- sys.call()

  if(as.list) {
    data <- ena.set(data)
  } else {
    warning(paste0("Usage of R6 data objects is deprecated and may be removed ",
      "entirely in a future version. Consider upgrading to the new data ",
      "object."))
  }

  data
}

#' Connection counts as square matrix
#'
#' @param x ena.set or ena.connections (i.e. set$connection.counts)
#'
#' @return matrix
#' @export
connection.matrix <- function(x) {
  if(is(x, "ena.set")) {
    connections <- x$connection.counts
  } else {
    connections <- x
  }
  if(!is(connections, "ena.connections")) {
    stop("Unable to find connections. `x` must be connections from an ena.set or an ena.set")
  }

  simplify <-  (nrow(connections) == 1)
  cm <- as.matrix(connections, square = T, simplify = simplify)
  if(simplify == FALSE && is.list(cm))
    names(cm) <- connections$ENA_UNIT

  return(cm);
}

###
#' Calculate the correlations
#'
#' @description Calculate both Spearman and Pearson correlations for the
#' provided ENAset
#'
#' @param enaset ENAset to view methods of
#' @param tool c("rENA","webENA")
#' @param tool.version as.character(packageVersion(tool))
#' @param comparison character string representing the comparison used, c(NULL, "parametric", "non-parametric"). Default NULL
#' @param comparison.groups Groups that were used for the comparison
#' @param sig.dig Integer for the number of digits to round to
#' @param output_dir Where to save the output file
#' @param type c("file","stream") File will save to a file in output_dir, Stream returns the contents directly
#' @param theory Logical indicating whether to include theory in the writeup
#' @param methods Logical indicating whether to include methods in the writeup
#' @param params additional parameters for rmarkdown::render
#' @param output_file character
#' @param output_format character
#'
#' @export
#'
#' @return String representing the methods used to generate the model
ena.writeup <- function(
  enaset,
  tool = "rENA", tool.version = as.character(packageVersion(tool)),
  comparison = NULL, comparison.groups = NULL, sig.dig = 2,
  output_dir = getwd(), type = c("file","stream"), theory = T, methods = T,
  params = NULL, output_file = NULL, output_format = NULL
) {
  if(is.null(enaset$`_function.params`$weight.by))
    enaset$`_function.params`$weight.by <- enaset$`_function.params`$args$weight.by

  type = match.arg(type, choices = c("file","stream"), several.ok = FALSE)

  if(type == "file") {
    output_format = "word_document"
  }

  file = rmarkdown::render(system.file("rmd","methods.rmd", package="rENA"), output_dir = output_dir,
                    knit_root_dir = output_dir, intermediates_dir = output_dir, quiet = TRUE,
                    params = params, output_file = output_file
                    ,output_format = output_format
                    # ,output_format = ifelse(type == "file", rENA::methods_report, rENA::methods_report_stream)
                  )

  if(type == "file")
    file
  else if (type == "stream" && endsWith(file, ".plain"))
    readChar(file, file.info(file)$size)
}

#' @title methods_report
#' @description Methods report for rmarkdwon
#' @param toc [TBD]
#' @param toc_depth [TBD]
#' @param fig_width [TBD]
#' @param fig_height [TBD]
#' @param keep_md [TBD]
#' @param md_extensions [TBD]
#' @param pandoc_args [TBD]
#'
#' @export
methods_report <- function(toc = FALSE,
                          toc_depth = 3,
                          fig_width = 5,
                          fig_height = 4,
                          keep_md = FALSE,
                          md_extensions = NULL,
                          pandoc_args = NULL) {

  # knitr options and hooks
  knitr <- rmarkdown::knitr_options(
    opts_chunk = list(dev = 'png',
                      dpi = 96,
                      fig.width = fig_width,
                      fig.height = fig_height)
  )

  # build pandoc args
  args <- c("--standalone")

  # table of contents
  args <- c(args, rmarkdown::pandoc_toc_args(toc, toc_depth))

  # pandoc args
  args <- c(args, pandoc_args)

  preserved_chunks <- character()

  # pre_processor <- function(metadata, input_file, runtime, knit_meta,
  #                           files_dir, output_dir) {
  #   preserved_chunks <<- extract_preserve_chunks(input_file, knitr::extract_raw_output)
  #   NULL
  # }

  # post_processor <- function(metadata, input_file, output_file, clean, verbose) {
  #   output_str <- readLines(output_file, encoding = 'UTF-8')
  #   output_res <- knitr::restore_raw_output(output_str, preserved_chunks)
  #   if (!identical(output_str, output_res))
  #     writeLines(enc2utf8(output_res), output_file, useBytes = TRUE)
  #   output_file
  # }

  # return output format
  rmarkdown::output_format(
    knitr = knitr,
    pandoc = rmarkdown::pandoc_options(to = "docx",
                            from = rmarkdown::from_rmarkdown(extensions = md_extensions),
                            args = args),
    keep_md = keep_md
    # ,pre_processor = pre_processor,
    # post_processor = post_processor
  )
}

#' @title methods_report_stream
#' @description Methods report for rmarkdwon
#' @param toc [TBD]
#' @param toc_depth [TBD]
#' @param fig_width [TBD]
#' @param fig_height [TBD]
#' @param keep_md [TBD]
#' @param md_extensions [TBD]
#' @param pandoc_args [TBD]
#'
#' @export
methods_report_stream <- function(toc = FALSE,
                          toc_depth = 3,
                          fig_width = 5,
                          fig_height = 4,
                          keep_md = FALSE,
                          md_extensions = NULL,
                          pandoc_args = NULL) {

  # knitr options and hooks
  knitr <- rmarkdown::knitr_options(
    opts_chunk = list(dev = 'png',
                      dpi = 96,
                      fig.width = fig_width,
                      fig.height = fig_height)
  )

  # build pandoc args
  args <- c("--standalone")

  # table of contents
  args <- c(args, rmarkdown::pandoc_toc_args(toc, toc_depth))

  # pandoc args
  args <- c(args, pandoc_args)

  preserved_chunks <- character()

  # return output format
  rmarkdown::output_format(
    knitr = knitr,
    pandoc = rmarkdown::pandoc_options(to = "plain",
                            from = rmarkdown::from_rmarkdown(extensions = md_extensions),
                            args = args),
    keep_md = keep_md
  )
}

ena.set <- function(x) {
  newset = list()
  class(newset) <- c("ena.set", class(newset))
  x.is.set <- T
  if("ENAdata" %in% class(x)) {
    x <- list(enadata = x);
    x.is.set <- F
  }
  code.columns <- apply(x$enadata$adjacency.matrix, 2, paste, collapse = " & ")

  newset$connection.counts <- x$enadata$adjacency.vectors;
  colnames(newset$connection.counts) <- code.columns
  for (i in seq(ncol(newset$connection.counts))) {
    set(newset$connection.counts, j = i, value = as.ena.co.occurrence(newset$connection.counts[[i]]))
  }

  if (grepl(x = x$enadata$model, pattern = "Traj", ignore.case = T)) {
    newset$meta.data <- data.table::copy(x$enadata$trajectories$units)
    newset$meta.data[, ENA_UNIT := apply(x$enadata$trajectories$units, 1, paste, collapse = "::")]

    newset$trajectories <- cbind(newset$meta.data, x$enadata$trajectories$step)
    for (i in seq(ncol(newset$trajectories))) {
      set(newset$trajectories, j = i, value = as.ena.metadata(newset$trajectories[[i]]))
    }
  }
  else {
    newset$meta.data <- x$enadata$metadata
  }

  if (!is.null(newset$meta.data) && ncol(newset$meta.data) > 0) {
    for (i in seq(ncol(newset$meta.data))) {
      set(newset$meta.data, j = i,
          value = as.ena.metadata(newset$meta.data[[i]]))
    }
  }
  newset$meta.data <- as.ena.matrix(newset$meta.data);

  if (x.is.set) {
    newset$line.weights <- as.data.table(cbind(x$enadata$metadata, x$line.weights))
    to_cols <- names(which(!find_meta_cols(newset$line.weights)))
    for(col in to_cols) {
      set(x = newset$line.weights, j = col, value = as.ena.co.occurrence(newset$line.weights[[col]]))
    }
    class(newset$line.weights) <- c("ena.line.weights", class(newset$line.weights))

    newset$points <- cbind(x$enadata$metadata, x$points.rotated)
    to_cols <- names(which(!find_meta_cols(newset$points)))
    for(col in to_cols) {
      set(x = newset$points, j = col, value = as.ena.dimension(newset$points[[col]]))
    }
    newset$points <- as.ena.matrix(newset$points, "ena.points")

    newset$rotation.matrix <- x$rotation.set$rotation
  }

  newset$connection.counts <- cbind(newset$meta.data, newset$connection.counts)
  class(newset$connection.counts) <- c("ena.connections",
                                        class(newset$connection.counts))

  newset$model <- list(
    model.type = x$enadata$model,
    raw.input = x$enadata$raw,
    row.connection.counts = x$enadata$accumulated.adjacency.vectors[,
            unique(names(x$enadata$accumulated.adjacency.vectors)), with = F],
    unit.labels = x$enadata$unit.names
  )

  #####
  # if(quote(x$enadata$function.params$weight.by) != "binary") {
  #   newset$model$unweighted.connection.counts <- x$enadata$adjacency.vectors.raw
  #   class(newset$model$unweighted.connection.counts) <- c("ena.connections",
  #                             class(newset$model$unweighted.connection.counts))
  #   are.codes <- find_code_cols(newset$model$unweighted.connection.counts)
  #   for (i in seq(are.codes)) {
  #     if (are.codes[i]) {
  #       set(newset$model$unweighted.connection.counts, j = i,
  #         value = as.ena.co.occurrence(
  #           newset$model$unweighted.connection.counts[[i]]
  #         )
  #       )
  #     } else {
  #       set(newset$model$unweighted.connection.counts, j = i,
  #         value = as.ena.metadata(
  #           newset$model$unweighted.connection.counts[[i]]
  #         )
  #       )
  #     }
  #   }
  # }
  #####

  cols <- grep("adjacency.code", colnames(newset$model$row.connection.counts))
  colnames(newset$model$row.connection.counts)[cols] <- code.columns
  for(i in cols) {
    set(newset$model$row.connection.counts, j = i,
        value = as.ena.co.occurrence(newset$model$row.connection.counts[[i]]))
  }
  for (i in which(colnames(newset$model$row.connection.counts)
      %in% colnames(newset$meta.data))
  ) {
    set(newset$model$row.connection.counts, j = i,
          value = as.ena.metadata(newset$model$row.connection.counts[[i]]))
  }
  for (i in which(colnames(newset$model$row.connection.counts) %in%
        x$enadata$codes)
  ) {
    set(newset$model$row.connection.counts, j = i,
          value = as.ena.code(newset$model$row.connection.counts[[i]]))
  }
  class(newset$model$row.connection.counts) <- c("row.connections",
                                      class(newset$model$row.connection.counts))

  if (x.is.set) {
    newset$model$centroids <- x$centroids
    newset$model$correlations <- x$correlations
    newset$model$function.call <- x$function.call
    newset$model$function.params <- x$function.params
    newset$model$points.for.projection <- cbind(x$enadata$metadata,
                                               x$points.normed.centered)
    newset$model$variance <- x$variance
    names(newset$model$variance) <- colnames(newset$rotation.matrix)
  }

  newset$rotation <- list(
    adjacency.key = as.data.table(x$enadata$adjacency.matrix),
    codes = x$enadata$codes
  )
  class(newset$rotation) <- c("ena.rotation.set", class(newset$rotation))

  for (i in seq(ncol(newset$rotation$adjacency.key))) {
    set(newset$rotation$adjacency.key, j = i,
          value = as.ena.codes(newset$rotation$adjacency.key[[i]]))
  }

  if(x.is.set) {
    newset$rotation$eigenvalues = x$rotation.set$eigenvalues
    newset$rotation$nodes = x$node.positions
    newset$rotation$rotation.matrix = x$rotation.set$rotation
  }

  newset$`_function.call` <- sys.calls()[[1]]
  back.frame <- sapply(sys.frames(), function(f) {
                                  "window.size.back" %in% ls(envir = f) })
  if (any(back.frame)) {
    call.frame <- sys.frame(which(back.frame))
    newset$`_function.params` <- mget(ls(envir = call.frame),
                                                  envir = call.frame)
  } else {
    newset$`_function.params` <- list()
  }

  return(newset);
}

#####
#' @title Wrapper to generate plots of units, groups, and networks
#'
#' @description Plots individual units, all units, groups of units, networks, and network subtractions
#'
#' @details This function includes options to plots individual units, all units,
#' groups of units, networks, and network subtractions, given an ena.set objects. Plots are stored
#' on the supplied ena.set object.
#'
#'
#' @param set an ena.set object
#' @param groupVar vector, character, of column name containing group identifiers.
#' @param groups vector, character, of values of groupVar column you wish to plot. Maxium of two groups allowed.
#' @param points logical, TRUE will plot points (default: FALSE)
#' @param mean logical, TRUE will plot the mean position of the groups defined in the groups argument (default: FALSE)
#' @param network logical, TRUE will plot networks (default: TRUE)
#' @param networkMultiplier numeric, scaling factor for non-subtracted networks (default: 1)
#' @param subtractionMultiplier numeric, scaling factor for subtracted networks (default: 1)
#' @param unit vector, character, name of a single unit to plot
#' @param colors vector, character, of colors for groups or points. For two-group models supply two values (group1, group2); for single-group or no-group models supply one value. Defaults to "blue"/"red" for two groups and "black" otherwise.
#' @param confidence.interval character, style of confidence interval shown on mean points: "box" (default), "crosshairs", or "none"
#' @param print.plots logical, TRUE will show plots in the Viewer (default: FALSE)
#' @param ... Additional parameters passed to set creation and plotting functions
#' @export
#' @return ena.set object
#####
ena.plotter = function(
  set,
  groupVar = NULL,
  groups = NULL,
  points = FALSE,
  mean = FALSE,
  network = TRUE,
  networkMultiplier = 1,
  subtractionMultiplier = 1,
  unit = NULL,
  colors = NULL,
  confidence.interval = "box",
  print.plots = F,
  ...
) {
  data = set$connection.counts;

  # set$plots[[length(set$plots)]] <- plot
  # plot <- set$plots[[length(set$plots)]]
  if(is.null(unit) == FALSE) {
    plot = ena.plot(enaset = set,title = unit)

    if(any(set$points$ENA_UNIT == unit) == FALSE){
      stop("Unit does not exist!")
    }

    point.row = set$points$ENA_UNIT == unit
    point = as.matrix(set$points)[point.row,]
    point.lw = as.matrix(set$line.weights)[point.row,]*networkMultiplier

    unit.color = if (!is.null(colors)) colors[1] else "black"
    plot = ena.plot.points(enaplot = plot,points = point, colors = unit.color)
    plot = ena.plot.network(enaplot = plot, network = point.lw, colors = unit.color)

    set$plots[[length(set$plots) + 1]] <- plot

    if(print.plots == TRUE) {
      print(set$plots[[length(set$plots)]])
    }

    return(set)
  }

  if(is.null(groupVar) == TRUE) {
    plot = ena.plot(enaset = set, title = "All Units")
    base.color = if (!is.null(colors)) colors[1] else "black"

    if(network == TRUE) {
      lineweights = as.matrix(set$line.weights)
      mean.lineweights = colMeans(lineweights) * networkMultiplier

      plot = ena.plot.network(plot, network = mean.lineweights, colors = base.color)
    }

    if(points == TRUE) {
      points.for.plot = as.matrix(set$points)

      plot = ena.plot.points(enaplot = plot,points = points.for.plot,colors = base.color)
    }

    if(mean == TRUE) {
      points.for.plot = as.matrix(set$points)

      plot = ena.plot.group(plot, points.for.plot, colors = base.color, labels = "Mean", confidence.interval = confidence.interval)
    }

    else if(TRUE %in% c(network,points, mean) == FALSE) {
      stop("You must set at least one of points, mean, or network to TRUE to obtain a plot.")
    }

    set$plots[[length(set$plots) + 1]] <- plot

    if(print.plots == TRUE) {
      print(set$plots)
    }

    return(set)
  }
  else if(is.null(groups) == TRUE) {
    unique.groups = unique(data[[groupVar]])

    if(length(unique.groups) == 1){
      warning("No groups specified and group variable only contains one unique value. Generating plot for one group.")

      group = unique.groups

      group.rows = set$points[[groupVar]] == group
      g.plot = ena.plot(enaset = set, title = group)
      base.color = if (!is.null(colors)) colors[1] else "black"

      if(network == TRUE) {
        g.lw = as.matrix(set$line.weights)[group.rows, , drop = FALSE]
        g.mean.lw = colMeans(g.lw) * networkMultiplier
        g.plot = ena.plot.network(g.plot, network = g.mean.lw, colors = base.color)
      }

      if(points == TRUE) {
        g.points.for.plot = as.matrix(set$points)[group.rows, , drop = FALSE]
        g.plot = ena.plot.points(enaplot = g.plot,points = g.points.for.plot,colors = base.color)
      }

      if(mean == TRUE) {
        g.points.for.plot = as.matrix(set$points)[group.rows, , drop = FALSE]
        g.plot = ena.plot.group(g.plot, g.points.for.plot, colors = base.color, labels = group, confidence.interval = confidence.interval)
      }

      else if(TRUE %in% c(network,points, mean) == FALSE) {
        stop("You must set at least one of points, mean, or network to TRUE to obtain a plot.")
      }
      set$plots[[length(set$plots) + 1]] <- g.plot

      if(print.plots == TRUE) {
        print(set$plots)
      }

      return(set)
    }
    else {
      group1 = unique.groups[1]
      group2 = unique.groups[2]

      warning(paste0("No groups specified. Generating plots of first two unique values of group variable: ",group1," and ",group2))

      set = ena.plot.subtraction(set = set,
               groupVar = groupVar,
               group1 = group1,
               group2 = group2,
               points = points,
               mean = mean,
               network = network,
               networkMultiplier = networkMultiplier,
               subtractionMultiplier = subtractionMultiplier,
               group1.color = if (!is.null(colors)) colors[1] else "blue",
               group2.color = if (!is.null(colors)) colors[2] else "red",
               confidence.interval = confidence.interval)


      if(print.plots == TRUE) {
        print(set$plots)
      }

       return(set)
    }
  }
  else if(length(groups) == 1) {
    group = groups

    if(any(data[[groupVar]] == group) == FALSE){
      stop("Group column does not contain group1 value!")
    }

    group.rows = set$points[[groupVar]] == group
    g.plot = ena.plot(enaset = set, title = group)
    base.color = if (!is.null(colors)) colors[1] else "black"

    if(network == TRUE) {
      g.lw = as.matrix(set$line.weights)[group.rows, , drop = FALSE]
      g.mean.lw = colMeans(g.lw) * networkMultiplier

      g.plot = ena.plot.network(g.plot, network = g.mean.lw, colors = base.color)
    }

    if(points == TRUE) {
      g.points.for.plot = as.matrix(set$points)[group.rows, , drop = FALSE]
      g.plot = ena.plot.points(enaplot = g.plot,points = g.points.for.plot,colors = base.color)
    }

    if(mean == TRUE) {
      g.points.for.plot = as.matrix(set$points)[group.rows, , drop = FALSE]
      g.plot = ena.plot.group(g.plot, g.points.for.plot, colors = base.color, labels = group, confidence.interval = confidence.interval)
    }

    else if(TRUE %in% c(network,points, mean) == FALSE) {
      stop("You must set at least one of points, mean, or network to TRUE to obtain a plot.")
    }
    set$plots[[length(set$plots) + 1]] <- g.plot

    if(print.plots == TRUE) {
      print(set$plots)
    }

    return(set)
  }
  else if (length(groups) >= 2) {
    if (length(groups) > 2) {
      warning(paste0("More than two groups specified. Plotting the first two groups: ", groups))
    }

    groups.missing = groups[which(!groups %in% data[[groupVar]])]
    if(length(groups.missing) > 0) {
      stop(paste0("Group column does not contain group value(s): ", groups.missing))
    }

    set = ena.plot.subtraction(
      set = set,
      groupVar = groupVar,
      group1 = groups[1],
      group2 = groups[2],
      points = points,
      mean = mean,
      network = network,
      networkMultiplier = networkMultiplier,
      subtractionMultiplier = subtractionMultiplier,
      group1.color = if (!is.null(colors)) colors[1] else "blue",
      group2.color = if (!is.null(colors)) colors[2] else "red",
      confidence.interval = confidence.interval,
      ...
    )

    if(print.plots == TRUE) {
      print(set$plots)
    }

    return(set)
  }
}


####
#' @title ENAset R6class
#'
#' @docType class
#' @importFrom R6 R6Class
#' @import data.table
#' @export

#' @field enadata An \code{\link{ENAdata}} object originally used to create the set
#' @field points.raw A data frame containing accumulated adjacency (co-occurrence) vectors per unit
#' @field points.normed.centered A data frame of centered normed accumulated adjacency (co-occurrence) vectors for each unit
#' @field points.rotated A data frame of point positions for number of dimensions specified in ena.make.set (i.e., the centered, normed, and rotated data)
#' @field line.weights A data frame of connections strengths per unit (Data frame of normed accumu- lated adjacency (co-occurrence) vectors for each unit)
#' @field node.positions - A data frame of positions for each code
#' @field codes - A vector of code names
#' @field rotation.set - An \code{\link{ENARotationSet}} object
#' @field variance - A vector of variance accounted for by each dimension specified
#' @field centroids - A matrix of the calculated centroid positions
#' @field function.call - The string representation of function called
#' @field function.params - A list of all parameters sent to function call
#' @field rotation_dists TBD
#' @field points.rotated.scaled TBD
#' @field points.rotated.non.zero TBD
#' @field line.weights.unrotated TBD
#' @field line.weights.non.zero TBD
#' @field correlations A data frame of spearman and pearson correlations for each dimension specified
#' @field center.align.to.origin - align point and centroid centers to origin
####
ENAset = R6::R6Class("ENAset",
  public = list(


  ## Public Functions ----
    #' Create ENAset
    #'
    #' @param enadata TBD
    #' @param dimensions TBD
    #' @param norm.by TBD
    #' @param rotation.by TBD
    #' @param rotation.params TBD
    #' @param rotation.set TBD
    #' @param node.position.method TBD
    #' @param endpoints.only TBD
    #' @param center.align.to.origin TBD
    #' @param ... TBD
    #'
    #' @return ENAset
    initialize = function(
        enadata,
        dimensions = 2,

        norm.by = fun_sphere_norm,

        rotation.by = ena.svd.R6,
        rotation.params = NULL,
        rotation.set = NULL,

        #center.data = center_data_c,    ### made local to run
        node.position.method = lws.positions.sq.R6,
        endpoints.only = TRUE,
        center.align.to.origin = TRUE,
        ...
    ) {
       self$enadata <- enadata;

       private$dimensions <- dimensions;

       self$codes <- enadata$codes;

       self$function.call <- sys.call(-1);

       self$function.params$norm.by <- norm.by;    #was sphere_norm
       #self$function.params$center.data <- center.data;
       self$function.params$node.position.method <- node.position.method;    #was position.method
       self$function.params$rotation.by <- rotation.by;
       self$function.params$rotation.params <- rotation.params;
       self$function.params$rotation.set <- rotation.set;
       self$function.params$endpoints.only <- endpoints.only;
       self$function.params$center.align.to.origin <- center.align.to.origin;
       private$args <- list(...);
     },


    #' Process ENAset
    #'
    #' @return ENASet
    process = function() {
      return(private$run())
    },

    #' Get property from object
    #'
    #' @param x character key to retrieve from object
    #' @return value from object at x
    get = function(x = "enadata") {
      return(private[[x]])
    },

  ## Public Properties ----
    rotation_dists = NULL,  #leave for now - to be removed for a temp variable
    enadata = NULL,
    points.raw = NULL,    #was data$raw
    points.normed.centered = NULL,    #was data$centered$normed
    points.rotated = NULL,    #was data$centered$rotated
    points.rotated.scaled = NULL,
    points.rotated.non.zero = NULL,
    line.weights = NULL,   #was data$normed
    line.weights.non.zero = NULL,
    line.weights.unrotated = NULL,
    node.positions = NULL,  #was nodes$positions$scaled
    codes = NULL,
    rotation.set = NULL,   ## new - ENARotation object
    correlations = NULL,   #not formerly listed, comes from optimized node positions in egr.positions
    variance = NULL,     #was self$data$centered$latent
    centroids = NULL,
    center.align.to.origin = TRUE,
    function.call = NULL,     #new - string reping function call
    function.params = list(   #list containing parameters function was called with
      norm.by = NULL,
      node.position.method = NULL,
      rotation.by = NULL,
      rotation.params = NULL,
      endpoints.only = NULL,
      center.align.to.origin = TRUE
    )
  ),

  private = list(

     ## Private Properties ----
     args = NULL,
     data.original = NULL,
     optim = NULL,

     #moved from public
     dimensions = 2,

     ## Private Functions ----
     run = function() {
       df = self$enadata$adjacency.vectors;

       # Backup of ENA data, this is not touched again.
       #private$data.original = df[,grep("adjacency.code", colnames(df)), with=F];
       private$data.original = df;

       # carry this out for node positioning
       self$function.params$center.align.to.origin = self$center.align.to.origin;

       # Copy of the original data, this is used for all
       # further operations. Unlike, `data.original`, this
       # is likely to be overwritten.
       self$points.raw = data.table::copy(private$data.original);

       ###
       # Normalize the raw data using self$function.params$norm.by,
       # which defaults to calling rENA::.sphere_norm
       ###
       self$line.weights = self$function.params$norm.by(self$points.raw);

       ###
       # Convert the string vector of code names to their corresponding
       # co-occurence names and set as colnames for the self$line.weights
       ##
       codeNames_tri = svector_to_ut(self$enadata$codes);

       colnames(self$line.weights) = codeNames_tri;
       # set the rownames to that of the original ENAdata file object
       rownames(self$line.weights) = rownames(df);

       attr(self$line.weights, opts$UNIT_NAMES) = attr(df, opts$UNIT_NAMES) #df[, .SD, with=T, .SDcols=self$enadata$get("unitsBy")];
       ###


       ###
       # Center the normed data
       # FIX - store as $data$centered
       ###
       #### ISSUE
       if (self$center.align.to.origin) {
         # only centers non-zero networks
         self$points.normed.centered = self$line.weights;

         non_zero_rows <- rowSums(as.matrix(self$line.weights)) != 0;
         self$points.normed.centered[non_zero_rows,] = center_data_c(self$line.weights[non_zero_rows,]);
       }
       else {
        self$points.normed.centered = center_data_c(self$line.weights);
       }
       colnames(self$points.normed.centered) = codeNames_tri;
       rownames(self$points.normed.centered) = rownames(df);
       attr(self$points.normed.centered, opts$UNIT_NAMES) = attr(self$enadata$adjacency.vectors.raw, opts$UNIT_NAMES)

       ###

       ###
       # Generate and Assign the rotation set
       ###
        if(is.function(self$function.params$rotation.by) && is.null(self$function.params$rotation.set)) {
          self$rotation.set = do.call(self$function.params$rotation.by, list(self, self$function.params$rotation.params));
        }
        else if (!is.null(self$function.params$rotation.set)) {
          if(is(self$function.params$rotation.set, "ENARotationSet")) {
            print("Using custom rotation.set.")

            self$rotation.set = self$function.params$rotation.set;
          } else {
            stop("Supplied rotation.set is not an instance of ENARotationSet")
          }
        }
        else {
          stop("Unable to find or create a rotation set")
        }
       ###

       ###
       # Generated the rotated points
       ###
        self$points.rotated = self$points.normed.centered %*% self$rotation.set$rotation;
        private$dimensions = min(private$dimensions, ncol(self$points.rotated))
        attr(self$points.rotated, opts$UNIT_NAMES) = attr(self$points.normed.centered, opts$UNIT_NAMES);
       ###

       ###
       # Calculate node positions
       #  - The supplied methoed is responsible is expected to return a list
       #    with two keys, "node.positions" and "centroids"
       ###
        if(!is.null(self$rotation.set) && is.null(self$function.params$rotation.set)) {
          positions = self$function.params$node.position.method(self);
          if(all(names(positions) %in% c("node.positions","centroids"))) {
            self$node.positions = positions$node.positions
            self$centroids = positions$centroids

            self$rotation.set$node.positions = positions$node.positions
          }
          else {
            stop(paste(
                "The node position method didn't return back the expected objects:",
                "\tExpected: c('node.positions','centroids')",
                paste("\tReceived: ", names(positions), sep=""),
                sep = "\n"
            ));
          }
        }
        else {
          if (!is.null(self$function.params$rotation.set) && !is.null(self$function.params$rotation.set$node.positions)) {
            self$node.positions = self$function.params$rotation.set$node.positions
          }
          else {
            stop("Unable to determine the node positions either by calculating
                  them using `node.position.method` or using a supplied
                  `rotation.set`");
          }
        }
       ###

       ###
       # Variance
       ###
       variance.of.rotated.data = var(self$points.rotated)
       diagonal.of.variance.of.rotated.data = as.vector(diag(variance.of.rotated.data))
       self$variance = diagonal.of.variance.of.rotated.data/sum(diagonal.of.variance.of.rotated.data)

       return(self);
     }
   )
)

### centering for projection

center.projection = function(lws, rotation) {
  if(is.null(rotation) || is.null(rotation$center.vec)) {
    stop("Supplied value for `rotation` does not have a center vector");
  }
  mean_ <- rotation$center.vec;

  centered.lws <- t(lws) - mean_;

  return( t(centered.lws) );
}

# og_lws = as.matrix(set.new$line.weights)
# set.new$rotation$center.vec = colMeans(og_lws)
#
#
#
# test = center.projection(lws = og_lws,set.new)
# centered_og = rENA:::center_data_c(as.matrix(set.new$line.weights))
# #
# View(test == centered_og)   ### DIFFERENCE IN ROUNDING
# View(round(test,3) == round(centered_og,3))

#' Compute Between-Group Scatter Matrix
#'
#' This function calculates the between-group scatter matrix (\code{SB}) for a given numeric matrix and grouping variable.
#'
#' @param A A numeric matrix of dimensions \code{m x n}, where rows represent observations and columns represent features.
#' @param g A grouping variable of length \code{m}, either a factor or a character vector, indicating group membership for each observation.
#'
#' @return A numeric matrix representing the between-group scatter matrix (\code{SB}).
#'
#' @details
#' The function computes the total mean of the matrix \code{A} and the mean for each group defined by \code{g}.
#' It then calculates the between-group scatter matrix by summing the outer product of the mean differences, weighted by the group sizes.
#'
#' @examples
#' # Example usage:
#' A <- matrix(rnorm(20), nrow = 5, ncol = 4)
#' g <- factor(c("A", "B", "A", "B", "A"))
#' SB <- rENA:::compute_SB(A, g)
compute_SB <- function(A, g) {
  if (!is.matrix(A)) stop("A must be a numeric matrix.")
  if (length(g) != nrow(A)) stop("g must have the same length as number of rows in A.")

  g <- as.factor(g);
  groups <- levels(g);
  n_features <- ncol(A);
  m <- nrow(A);

  # Total mean
  mu_total <- colMeans(A);

  # Initialize matrices
  SB <- matrix(0, n_features, n_features);

  for (grp in groups) {
    idx <- which(g == grp);
    A_grp <- A[idx, , drop = FALSE];
    n_g <- nrow(A_grp);
    mu_g <- colMeans(A_grp);

    # Between-group component
    mean_diff <- matrix(mu_g - mu_total, ncol = 1);
    SB <- SB + n_g * (mean_diff %*% t(mean_diff));
  }

  return(SB);
}


#' Generalized Means Rotation (GMR) with optional subsetting and interaction control
#'
#' Computes a rotation (direction) `r` representing the contribution of the
#' first column of `X` to the multivariate ENA matrix `V`. Supports optional
#' subsetting by `groups`, optional inclusion of interaction terms when
#' computing adjusted contributions.
#'
#' @param V Numeric ENA matrix (units × connections) ready for rotation.
#' @param X Data frame or matrix of predictors; the first column is the target.
#' @param groups Optional vector specifying target groups to subset. If `NULL`
#'   (default), all rows are used.
#' @param alpha Elastic-net mixing parameter forwarded to `get_x1_main_effect`
#'   (default `1` — Lasso).
#' @param lambda Lambda selection for `cv.glmnet` forwarded to
#'   `get_x1_main_effect` (default `"lambda.min"`).
#' @param interactions Logical; if `TRUE` (default) interactions are included when computing the adjusted contribution.
#' @param verbose Logical; if `TRUE` (default) the function emits messages about
#'   fails or successes.
#'
#' @return A numeric vector `r` (length = ncol(V)) giving the normalized
#'   rotation direction. Attributes attached:
#'   \describe{
#'     \item{`target`}{The full-length target vector (un-subsetted).}
#'     \item{`Vx1`}{The unadjusted fitted values (`lm(V ~ target)`) embedded in
#'         a full-length matrix (rows outside subset filled with zeros).}
#'   }
#'   If no valid direction can be found (including SVD failure), returns `NULL`
#'   and issues a warning.
#'
#' @examples
#' \dontrun{
#' set.seed(1)
#' V <- matrix(rnorm(200), nrow = 40)
#' X <- data.frame(group = rep(letters[1:4], each = 10),
#'                 x2 = rnorm(40), x3 = rnorm(40))
#' r_all <- gmr2(V, X)
#' r_subset <- gmr2(V, X, groups = c("a", "b"), interactions = TRUE)
#' }
#'
#' @seealso [get_x1_main_effect()]
#' @importFrom stats lm model.matrix
#' @importFrom glmnet cv.glmnet
#' @export

gmr <- function(V, X, groups = NULL, alpha = 1, lambda = "lambda.min",
  interactions = TRUE, verbose = TRUE) {
  # prepare a function for almost zero check
  is_zero <- function(x, tol = 1e-12) all(abs(x) < tol)
  # get full target variable, namely, the first variable in X
  target_full <- X[[1]]
  if (is.list(target_full)) target_full <- unlist(target_full, recursive = FALSE)
  target_full <- as.vector(target_full)

  # --- Fail if target is constant ---
  unique_targets <- unique(target_full)
  if (length(unique_targets) == 1) {
    warning("Target variable is constant; returning NULL.")
    return(NULL)
  }

  # --- Subset by groups if selected groups are provided ---
  if (!is.null(groups)) {
    valid_groups <- intersect(groups, unique(target_full))
    if (length(valid_groups) > 1) {
      subset_idx <- which(target_full %in% valid_groups)
      V_sub <- V[subset_idx, , drop = FALSE]
      X_sub <- X[subset_idx, , drop = FALSE]
      target_sub <- target_full[subset_idx]
    } else {
      warning("Less than 2 valid groups selected; returning NULL.")
      return(NULL)
    }
  } else { # use full data if no groups are selected
    V_sub <- V
    X_sub <- X
    target_sub <- target_full
    subset_idx <- NULL
  }

  # --- Base regression model ---
  model <- lm(V_sub ~ target_sub)
  Vx1_sub <- model$fitted.values

  # --- Compute contributions via Lasso (if covariates exist) ---
  Vx_sub <- NULL
  if (ncol(X_sub) == 1) { # no corariates, use base model
    Vx_sub <- Vx1_sub
  } else { # covariates exist, use Lasso model
    Vx_sub <- get_x1_main_effect(V_sub, X_sub, alpha = alpha,
                                 lambda = lambda, include_interactions = interactions)
  }
  if (is_zero(Vx_sub)) {
    warning("Regression resulted in zeor contribution; returning NULL.")
    return(NULL)
  }
  # --- Compute rotation direction r ---
  r <- NULL
  if (is.numeric(target_sub)) {
    if (verbose) message("Computing direction for numeric target...")
    model =  model <- lm(Vx_sub ~ target_sub)
    beta <- model$coefficients[2,]
    if (is_zero(beta)) {
      warning("Numerical target with zero beta; returning NULL.")
      return(NULL)
    } else {
      r <- beta
    }
  } else {
    if (verbose) message("Computing direction for categorical target...")
    sb <- compute_SB(Vx_sub, target_sub)
    r <- tryCatch(svd(sb)$v[, 1], error = function(e) NULL)
  }

  # --- Final SVD fallback if r is NULL or zero ---
  if (is.null(r) || all(r == 0)) {
    warning("Uable to compute any valid direction; returning NULL.")
    return(NULL)
  }

  # --- Normalize ---
  r <- r / sqrt(sum(r^2))

  # --- Build full-length Vx1 ---
  Vx1_full <- matrix(0, nrow = nrow(V), ncol = ncol(V))
  Vx1_full[subset_idx %||% seq_len(nrow(V)), ] <- Vx_sub
  colnames(Vx1_full) <- colnames(V)

  # --- Attach metadata ---
  attr(r, "target") <- target_full
  attr(r, "Vx1") <- Vx1_full
  #attr(r, "fallback_stage") <- fallback_stage

  if (verbose) message(" gmr completed successfully ")
  return(r)
}
# the fallback mechanism is created but not used.
gmr_with_fallbacks <- function(V, X, groups = NULL, alpha = 1, lambda = "lambda.min",
                interactions = TRUE, verbose = TRUE) {
  # prepare a function for almost zero check
  is_zero <- function(x, tol = 1e-12) all(abs(x) < tol)
  # get full target variable
  target_full <- X[[1]]
  if (is.list(target_full)) target_full <- unlist(target_full, recursive = FALSE)
  target_full <- as.vector(target_full)

  # --- Early SVD fallback if target is constant ---
  unique_targets <- unique(target_full)
  if (length(unique_targets) == 1) {
    if (verbose) message("Target variable is constant; falling back to SVD(V)")
    r <- tryCatch(svd(V)$v[, 1], error = function(e) NULL)
    fallback_stage <- "constant target SVD"

    if (is.null(r)) {
      warning("Unable to compute any valid direction; returning NULL.")
      return(NULL)
     }

     r <- r / sqrt(sum(r^2))
     Vx1_full <- matrix(0, nrow = nrow(V), ncol = ncol(V))
     colnames(Vx1_full) <- colnames(V)

     attr(r, "target") <- target_full
     attr(r, "Vx1") <- Vx1_full
     attr(r, "fallback_stage") <- fallback_stage
     return(r)
  }

  # --- Subset by groups if provided ---
  if (!is.null(groups)) {
    valid_groups <- intersect(groups, unique(target_full))
    if (length(valid_groups) > 1) {
      subset_idx <- which(target_full %in% valid_groups)
      V_sub <- V[subset_idx, , drop = FALSE]
      X_sub <- X[subset_idx, , drop = FALSE]
      target_sub <- target_full[subset_idx]
    } else {
       if (verbose) message("Less than 2 valid groups selected; using all rows instead")
       V_sub <- V
       X_sub <- X
       target_sub <- target_full
       subset_idx <- NULL
    }
  } else {
    V_sub <- V
    X_sub <- X
    target_sub <- target_full
    subset_idx <- NULL
  }

  # --- Base regression model ---
  model <- lm(V_sub ~ target_sub)
  Vx1_sub <- model$fitted.values

  # --- Compute contributions via Lasso (if covariates exist) ---
  Vx_sub <- NULL
  fallback_stage <- NULL

  if (ncol(X_sub) == 1) {
    Vx_sub <- Vx1_sub
    fallback_stage <- "no covariates"
  } else {
    Vx_sub <- get_x1_main_effect(V_sub, X_sub, alpha = alpha,
                                 lambda = lambda, include_interactions = interactions)
    if (is_zero(Vx_sub)) {
      if (verbose) message("⚠️ Lasso with interactions gave zero contribution; trying without interactions.")
      Vx_sub <- get_x1_main_effect(V_sub, X_sub, alpha = alpha,
                                   lambda = lambda, include_interactions = FALSE)
      fallback_stage <- "no interactions"
    } else {
      fallback_stage <- if (interactions) "with interactions" else "no interactions"
    }

    if (is_zero(Vx_sub)) {
      if (verbose) message("⚠️ Lasso without interactions gave zero contribution; falling back to simple model.")
      Vx_sub <- Vx1_sub
      fallback_stage <- "no covariates"
    }
  }

  # --- Compute rotation direction r ---
  if (is.numeric(target_sub)) {
    if (verbose) message("Computing direction for numeric target...")
    model =  model <- lm(Vx_sub ~ target_sub)
    beta <- model$coefficients[2,]
    if (is_zero(beta)) {
      if (verbose) message("⚠️ Beta is zero; falling back to SVD(V_sub).")
      r <- tryCatch(svd(Vx_sub)$v[, 1], error = function(e) NULL)
      fallback_stage <- "SVD fallback"
    } else {
      r <- beta / sqrt(sum(beta^2))
    }
  } else {
    if (verbose) message("Computing direction for categorical target...")
    sb <- compute_SB(Vx_sub, target_sub)
    r <- tryCatch(svd(sb)$v[, 1], error = function(e) NULL)
    fallback_stage <- "SVD of SB"
  }

  # --- Final SVD fallback if r is NULL or zero ---
  if (is.null(r) || all(r == 0)) {
    warning("⚠️ All levels failed; using SVD(V_sub)$v[,1] as final fallback.")
    r <- tryCatch(svd(V_sub)$v[, 1], error = function(e) NULL)
    fallback_stage <- "final SVD"
  }

  if (is.null(r)) {
    warning("❌ Unable to compute any valid direction; returning NULL.")
    return(NULL)
  }

  # --- Normalize ---
  r <- r / sqrt(sum(r^2))

  # --- Build full-length Vx1 ---
  Vx1_full <- matrix(0, nrow = nrow(V), ncol = ncol(V))
  Vx1_full[subset_idx %||% seq_len(nrow(V)), ] <- Vx_sub
  colnames(Vx1_full) <- colnames(V)

  # --- Attach metadata ---
  attr(r, "target") <- target_full
  attr(r, "Vx1") <- Vx1_full
  attr(r, "fallback_stage") <- fallback_stage

  if (verbose) message("✅ gmr completed successfully (", fallback_stage, ").")

  return(r)
}


gmr_bk1 <- function(V,X) {
  # matrix, ENA set points for projection
  # data frame containing all predictor variables, first as target
  Vx <- NULL; # main effect of X1 adjusted for covariates
  r <- NULL; # return direction
  Vx1 <- NULL; # main effect of X1 without adjustment
  target <- X[[1]]          # always returns the column itself
  
  if (is.list(target)) {    # flatten if it's a list-column
    target <- unlist(target, recursive = FALSE)
  }
  target <- as.vector(target)  # ensure atomic

  model <- lm(V ~ target)
  #model <- lm(V ~ X[, 1]); # simple linear model on X[1]
  Vx1 <- model$fitted.values;
  if(ncol(X)==1) { # simple linear model if there is no covariates
    Vx <- Vx1;
  }
  else { # Lasso model adjusted for covariates
    Vx <- get_x1_main_effect(V,X);
  }
  if (is.numeric(target)) { # compute direction for numerical variable
    # Reuse the coefficients from the initial model instead of rebuilding
    print("target is numeric")
    beta <- coef(model)[2,];  # Second coefficient is for the slope
    r <- beta / sqrt(sum(beta^2));
  }
  else {
    print("target is NOT numeric")
    sb <- compute_SB(Vx, target);

    r <- svd(sb)$v[, 1];

  }
  # project r to span of row vectors of V
  #model <- lm(r ~ t(V) + 0)
  #r<- Vx1 <- model$fitted.values;
  #r <- t(V) %*% coef(lm(r ~ t(V) + 0));    # Projection: r ~ V^T %*% beta
  #r <- r / sqrt(sum(r^2));
  attr(r, "target") <- target
  attr(r, "Vx1") <- Vx1# target contribution
  return(r);
}

gmr2_bk <- function(V, X, groups = NULL) {
  target_full <- X[[1]]
  if (is.list(target_full)) target_full <- unlist(target_full, recursive = FALSE)
  target_full <- as.vector(target_full)

  subset_idx <- NULL
  if (!is.null(groups)) {
    if (all(groups %in% unique(target_full))) {
      subset_idx <- which(target_full %in% groups)
      V_sub <- V[subset_idx, , drop = FALSE]
      X_sub <- X[subset_idx, , drop = FALSE]
      target_sub <- target_full[subset_idx]
    } else {
      warning("Specified groups not found; using all data.")
      V_sub <- V
      X_sub <- X
      target_sub <- target_full
    }
  } else {
    V_sub <- V
    X_sub <- X
    target_sub <- target_full
  }

  model <- lm(V_sub ~ target_sub)
  Vx1_sub <- model$fitted.values

  if (ncol(X_sub) == 1) {
    Vx_sub <- Vx1_sub
  } else {
    Vx_sub <- get_x1_main_effect(V_sub, X_sub)
  }

  if (is.numeric(target_sub)) {
    beta <- coef(model)[2, ]
    r <- beta / sqrt(sum(beta^2))
  } else {
    sb <- compute_SB(Vx_sub, target_sub)
    r <- svd(sb)$v[, 1]
  }

  # Build full Vx1: fill subset rows, zeros elsewhere
  Vx1_full <- matrix(0, nrow = nrow(V), ncol = ncol(V))
  Vx1_full[subset_idx %||% seq_len(nrow(V)), ] <- Vx1_sub
  colnames(Vx1_full) <- colnames(V)

  attr(r, "target") <- target_full
  attr(r, "Vx1") <- Vx1_full
  return(r)
}

#' Extract the Main Effect of X on V with Optional Interactions
#'
#' Computes the main-effect contribution of the first column of `X` (the
#' "target") to the multivariate ENA matrix `V`. The function fits penalized
#' regression models (via glmnet) and can optionally include interactions
#' between the target and other covariates. It returns the fitted contribution
#' matrix (units × connections).
#'
#' The function can compute contributions using either only main-effect columns
#' (no interactions) or main-effect plus all interaction columns that start
#' with the target name. If no matching columns are found or all fitted
#' coefficients are zero, the function returns a zero matrix and emits a
#' warning.
#'
#' @param V A numeric matrix (units × connections) of dependent variables.
#' @param X A data frame or matrix of predictors / covariates. The **first**
#'   column is treated as the target variable whose contribution will be extracted.
#' @param alpha Elastic-net mixing parameter passed to `cv.glmnet`. `alpha = 1`
#'   (default) is Lasso; `alpha = 0` is ridge.
#' @param lambda Character or numeric. Which lambda from the `cv.glmnet` fit to
#'   use; e.g. `"lambda.min"` (default) or `"lambda.1se"`, or a numeric value.
#' @param include_interactions Logical; if `TRUE`, include main-effect columns
#'   **and** all interaction columns that begin with the target name (default:
#'   `FALSE`, only main-effect columns).
#'
#' @return A numeric matrix with the same dimensions as `V` containing the
#'   estimated contribution of `X[,1]` to each response. If no columns are
#'   matched or all coefficients are zero, a zero matrix is returned and a
#'   warning is issued.
#'
#' @details
#' Internally this function builds `model.matrix(~ .^2, data = X)` to obtain
#' main effects and pairwise interactions. It sets a `penalty.factor` that
#' leaves the target-related columns unpenalized (0) and fits a multivariate
#' `glmnet` (`family = "mgaussian"`). The returned matrix is dense (numeric).
#'
#' @param ... Additional arguments are not used (kept for forward compatibility).
#'
#' @examples
#' \dontrun{
#' set.seed(1)
#' V <- matrix(rnorm(50), ncol = 5)
#' X <- data.frame(CONFIDENCE = rnorm(10), Condition = factor(rep(1:2, 5)))
#' # main effects only
#' Vx_main <- get_x1_main_effect(V, X, include_interactions = FALSE)
#' # include interactions
#' Vx_full <- get_x1_main_effect(V, X, include_interactions = TRUE, alpha = 0) # ridge
#' }
#'
#' @seealso [gmr2()] for the rotation routine that uses this function.
#' @importFrom stats lm model.matrix
#' @importFrom glmnet cv.glmnet
#' @export

get_x1_main_effect <- function(V, X, alpha = 1, lambda = "lambda.min", include_interactions = FALSE) {
  x1_name <- colnames(X)[1]

  # 1. Formula & Model Matrix
  formula_str <- if (include_interactions) "~ .^2" else "~ ."
  mm <- model.matrix(as.formula(formula_str), data = X)[, -1, drop = FALSE]

  # 2. Identify Main Effect Columns for x1
  safe_x1 <- gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\\1", x1_name)
  x1_main_regex <- paste0("^", safe_x1, "[^:]*$")
  x1_cols <- grep(x1_main_regex, colnames(mm))

  if (length(x1_cols) == 0) {
    warning("No main effect columns found for X[,1]; returning zeros.")
    return(matrix(0, nrow = nrow(V), ncol = ncol(V), dimnames = list(NULL, colnames(V))))
  }

  # 3. Penalty Factors
  p <- ncol(mm)
  penalty_factors <- rep(1, p)
  penalty_factors[x1_cols] <- 0

  # 4. Fitting Logic
  x1_contribution <- matrix(0, nrow = nrow(V), ncol = ncol(V), dimnames = list(NULL, colnames(V)))
  use_ols <- (p <= (nrow(X) - 10)) # Heuristic: Use OLS only if we have enough degrees of freedom

  if (!use_ols) {
    fit <- tryCatch(
      # We add lower.limits/upper.limits or tiny penalty to ensure x1 is NEVER zero if it has signal
      glmnet::cv.glmnet(x = mm, y = V, family = "mgaussian",
                        alpha = alpha, penalty.factor = penalty_factors),
      error = function(e) NULL
    )

    if (!is.null(fit)) {
      coefs_list <- coef(fit, s = lambda)
      # coefs_list is a list of sparse matrices (one per response)
      for (i in seq_along(coefs_list)) {
        # Extract coefs, skipping intercept ([1,])
        # Force to numeric to avoid sparse matrix indexing issues
        beta_all <- as.matrix(coefs_list[[i]])[-1, , drop = FALSE]
        beta_x1 <- beta_all[x1_cols, , drop = FALSE]
        x1_contribution[, i] <- mm[, x1_cols, drop = FALSE] %*% beta_x1
      }
      return(x1_contribution)
    }
    use_ols <- TRUE
  }

  if (use_ols) {
    fit_ols <- lm(V ~ mm)
    # as.matrix handles the 'incorrect number of dimensions' for single response
    beta_ols <- as.matrix(coef(fit_ols))[-1, , drop = FALSE]
    beta_x1_ols <- beta_ols[x1_cols, , drop = FALSE]

    # Handle NAs that OLS produces for rank-deficient matrices
    beta_x1_ols[is.na(beta_x1_ols)] <- 0
    x1_contribution <- mm[, x1_cols, drop = FALSE] %*% beta_x1_ols
  }

  return(x1_contribution)
}


#' ENA line weights as matrix
#'
#' @param x ena.line.weights data.table to covert to matrix
#' @param ... additional arguments to be passed to or from methods
#' @param square [TBD]
#'
#' @return matrix
#' @export
as.matrix.ena.line.weights <- function(x, ..., square = FALSE) {
  args = list(...)

  # if(!is.null(args$square))
  #   square = args$square

  # class(x) = class(x)[-1]
  x.unclass <- data.table::as.data.table(unclass(x))
  rows = x.unclass[, !find_meta_cols(x.unclass), with = F]

  if(square) {
    upperTriSize = ncol(rows)
    number = ( (ceiling(sqrt(2*upperTriSize)) ^ 2) ) - (2*upperTriSize)
    codes = unique(unlist(sapply(colnames(rows), strsplit, split = " & ")))
    cm = sapply(seq(nrow(rows)), function(unit) {
      m = matrix(NA, number,  number, dimnames = list(codes, codes))
      m[upper.tri(m)] = as.numeric(rows[unit,])
      m
    }, simplify = F);
    return(cm)
  }
  else {
    as.matrix(remove_meta_data(rows), ...)
  }
}

#' ENA rotations as matrix
#'
#' @param x ena.rotation.matrix to conver to matrix
#' @param ... 	additional arguments to be passed to or from methods
#'
#' @return matrix
#' @export
as.matrix.ena.rotation.matrix <- function(x, ...) {
  class(x) = class(x)[-1]
  x = remove_meta_data(x)
  as.matrix(x, ...)
}

#' ENA points as matrix
#'
#' @param x ena.points to convert to a matrix
#' @param ... 	additional arguments to be passed to or from methods
#'
#' @return matrix
#' @export
as.matrix.ena.points <- function(x, ...) {
  class(x) = class(x)[-1]
  x = remove_meta_data(x)
  as.matrix(x, ...)
}

#' Matrix without metadata
#'
#' @param x Object to convert to  a matrix
#' @param ... 	additional arguments to be passed to or from methods
#'
#' @return matrix
#' @export
as.matrix.ena.matrix <- function(x, ...) {
  class(x) = class(x)[-1]
  x = remove_meta_data(x)
  as.matrix(x, ...)
}

#' ENA nodes as matrix
#'
#' @param x ena.nodes to convert to matrix
#' @param ... 	additional arguments to be passed to or from methods
#'
#' @return matrix
#' @export
as.matrix.ena.nodes <- function(x, ...) {
  class(x) = class(x)[-1]
  as.matrix(x[,-c("code")], ...)
}

#' ENA row connections as matrix
#'
#' @param x ena.row.connections to conver to a matrix
#' @param ... 	additional arguments to be passed to or from methods
#'
#' @return matrix
#' @export
as.matrix.row.connections <- function(x, ...) {
  class(x) = class(x)[-1]
  as.matrix(x[, sapply(x, is, class2="ena.co.occurrence"), with = F], ...)
}


#' ENA Connections as a matrix
#'
#' @param x ena.connections object
#' @param ... 	additional arguments to be passed to or from methods
# @param square Logical. If TRUE, each row is converted to a square matrix
# @param simplify Logical. If TRUE, returns back a single result as vector
# @param names Ignored
#'
#' @return If square is FALSE (default), a matrix with all metadata columns removed, otherwise a list with square matrices
#' @export
as.matrix.ena.connections <- function(x, ...) {
  class(x) = class(x)[-1]
  xx = remove_meta_data(x)
  rows = as.data.frame(x)[, !find_meta_cols(x), drop = F]

  args = list(...)
  if(is.null(args$square))
    square = F
  else
    square = args$square

  names = args$names

  if(is.null(args$simplify))
    simplify = ifelse(nrow(x) > 1, F, T)
  else
    simplify = args$simplify

  if(square) {
    upperTriSize = ncol(rows)
    number = ( (ceiling(sqrt(2*upperTriSize)) ^ 2) ) - (2*upperTriSize)
    codes = unique(unlist(sapply(colnames(rows), strsplit, split = " & ")))
    cm = sapply(seq(nrow(rows)), function(unit) {
      m = matrix(NA, number, number, dimnames = list(codes, codes))
      m[upper.tri(m)] = as.numeric(rows[unit,])
      m
    }, simplify = F)

    if(simplify) {
      cm = cm[[1]]
    } else {
      names(cm) = names
    }
  } else {
    cm = as.matrix(rows)
    rownames(cm) = names
  }

  cm
}

##
#' @title Plot points on an ENAplot
#'
#' @description Plot all or a subset of the points of an ENAplot using the plotly plotting library
#'
#' @export
#'
#' @param enaplot \code{\link{ENAplot}} object to use for plotting
#' @param points A dataframe of matrix where the first two column are X and Y coordinates
#' @param point.size A data.frame or matrix where the first two column are X and Y coordinates of points to plot in a projected ENA space defined in ENAplot
#' @param labels A character vector of point labels, length nrow(points); default: NULL
#' @param confidence.interval A character determining markings to use for confidence intervals, choices: none, box, crosshair, default: none
#' @param outlier.interval A character determining markings to use for outlier interval, choices: none, box, crosshair, default: none
#' @param confidence.interval.values A matrix/dataframe where columns are CI x and y values for each point
#' @param outlier.interval.values A matrix/dataframe where columns are OI x and y values for each point
#' @param shape A character which determines the shape of point markers, choices:   square, triangle, diamond, circle, default: circle
#' @param colors A character vector of the point marker colors; if one given it is used for all, otherwise must be same length as points; default: black
#' @param label.offset character: top left (default), top center, top right, middle left, middle center, middle right, bottom left, bottom center, bottom right
#' @param label.group A string used to group the labels in the legend. Items plotted with the same label.group will show/hide together when clicked within the legend.
#' @param label.font.size An integer which determines the font size for point labels, default: enaplot$font.size
#' @param label.font.color A character which determines the color of label font, default: enaplot$font.color
#' @param label.font.family	A character which determines label font type, choices: Arial, Courier New, Times New Roman, default: enaplot$font.family
#' @param show.legend Logical indicating whether to show the point labels in the in legend
#' @param legend.name Character indicating the name to show above the plot legend
#' @param texts [TBD]
#' @param ... additional parameters addressed in inner function
#'
#'
#' @seealso \code{\link{ena.plot}}, \code{\link{ENAplot}}, \code{\link{ena.plot.group}}
#'
#' @examples
#' data(RS.data)
#'
#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
#'   'Client.and.Consultant.Requests','Design.Reasoning','Collaboration');
#'
#' accum = ena.accumulate.data(
#'   units = RS.data[,c("UserName","Condition")],
#'   conversation = RS.data[,c("Condition","GroupName")],
#'   metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre","CONFIDENCE.Post")],
#'   codes = RS.data[,codeNames],
#'   window.size.back = 4
#' )
#'
#' set = ena.make.set(
#'   enadata = accum,
#'   rotation.by = ena.rotate.by.mean,
#'   rotation.params = list(
#'       accum$meta.data$Condition=="FirstGame",
#'       accum$meta.data$Condition=="SecondGame"
#'   )
#' )
#'
#' plot = ena.plot(set)
#'
#' group1.points = set$points[set$meta.data$Condition == "FirstGame",]
#' group2.points = set$points[set$meta.data$Condition == "SecondGame",]
#' plot = ena.plot.points(plot, points = group1.points);
#' plot = ena.plot.points(plot, points = group2.points);
#' print(plot);
#'
#' @return \code{\link{ENAplot}} The ENAplot provided to the function, with its plot updated to include the new points.
##
ena.plot.points = function(
  enaplot,

  points = NULL,    #vector of unit names or row indices
  point.size = enaplot$point$size,
  labels = NULL, #unique(enaplot$enaset$enadata$unit.names),
  label.offset = "top left",
  label.group = NULL,

  label.font.size = NULL, #enaplot$get("font.size"),
  label.font.color = NULL, #enaplot$get("font.color"),
  label.font.family = NULL, #enaplot$get("font.family"),

  shape = "circle",
  colors = NULL, # c("blue"), #rep(I("black"), nrow(points)),

  confidence.interval.values = NULL,
  confidence.interval = c("none", "crosshairs", "box"),

  outlier.interval.values = NULL,
  outlier.interval = c("none", "crosshairs", "box"),
  show.legend = T,
  legend.name = "Points",
  texts = NULL,
  ...
) {
  ###
  # Parameter Checking and Cleaning
  ###
    env = environment();
    for(n in c("font.size", "font.color", "font.family")) {
      if(is.null(get(paste0("label.",n))))
        env[[paste0("label.",n)]] = enaplot$get(n);
    }

    if(is.null(points)) {
      # stop("Must provide points to plot.")
      points = enaplot$enaset$points
    }

    if(is(points, "numeric")){
      points = matrix(points);
      dim(points) = c(1,nrow(points))
      points.layout = data.table::data.table(points);
    }
    else if (is.data.table(points)) {
      # points.layout = remove_meta_data(points)
      points.layout = data.table::copy(points)
    }
    else {
      points.layout = data.table::data.table(points);
    }

    if(!is.character(label.font.family)) {
      label.font.family = enaplot$get("font.family");
    }

    confidence.interval = match.arg(confidence.interval);
    outlier.interval = match.arg(outlier.interval);

    # shape = match.arg(shape);
    valid.shapes = c("circle", "square", "triangle-up", "diamond");
    if(!all(shape %in% valid.shapes))
      stop(sprintf( "Unrecognized shapes: %s", paste(unique(shape[!(shape %in% valid.shapes)]), collapse = ", ") ))
    if(length(shape) == 1)
      shape = rep(shape, nrow(points.layout))

    valid.label.offsets = c("top left","top center","top right","middle left","middle center","middle right","bottom left","bottom center","bottom right");
    if(!all(label.offset %in% valid.label.offsets))
      stop(sprintf( "Unrecognized label.offsets: %s", paste(unique(label.offset[!(label.offset %in% valid.label.offsets)]), collapse = ", ") ))
    if(length(label.offset) == 1)
      label.offset = rep(label.offset, nrow(points.layout))

    if(grepl("^c", confidence.interval) && grepl("^c", outlier.interval)) {
      message("Confidence Interval and Outlier Interval cannot both be crosshair");
      message("Plotting Outlier Interval as box");
      outlier.interval = "box";
    }

    if(length(colors) == 1) {
      colors = rep(colors, nrow(points.layout))
    }
    if(length(point.size) == 1)
      point.size = rep(point.size, nrow(points.layout))
    if(is.null(labels))
      show.legend = F
  ###
  # END: Parameter Checking and Cleaning
  ###

  ###
  # Set error value for CI|OI crosshair on plot
  ###
    error = list(x = list(visible=T, type="data"), y = list(visible=T, type="data"));
    int.values = NULL;
    if(grepl("^c", confidence.interval) && !is.null(confidence.interval.values)) {
      int.values = confidence.interval.values;
    }
    else if(grepl("^c", outlier.interval) && !is.null(outlier.interval.values)) {
      int.values = outlier.interval.values;
    }
    error$x$array = int.values[, 1];
    error$y$array = int.values[, 2];
  ###
  # END: Set error value for crosshair on plot
  ###

  ###
  # Set box value for CI|OI box on plot
  #####
    box.values = NULL;
    if(grepl("^b", confidence.interval) && !is.null(confidence.interval.values)) {
      box.values = confidence.interval.values;
      box.label = "Conf. Int.";
    }
    if(grepl("^b", outlier.interval) && !is.null(outlier.interval.values)) {
      box.values = outlier.interval.values;
      box.label = "Outlier Int.";
    }
  ######
  # END: Set box value for CI|OI box on plot
  ###

  ###
  # Plot
  #####
    points.matrix = remove_meta_data(points.layout)
    colnames(points.matrix) = paste0("X", rep(1:ncol(points.matrix)));
    this.max = max(points.matrix);
    for(m in 1:nrow(points.matrix)) {
      enaplot$plot = plotly::add_trace(
        p = enaplot$plot,
        data = points.matrix[m,],
        type ="scatter",
        x = ~X1, y = ~X2,
        mode = "markers+text",
        marker = list(
          symbol = shape[m],
          color = colors[m],
          size = point.size[m]
        ),
        error_x = error$x, error_y = error$y,
        showlegend = show.legend,
        # legendgroup = label.group,
        # legendgroup = ifelse(!is.null(box.label), labels[1], NULL),
        name = labels[m],
        text = texts[m], #labels[m],
        textfont = list(
          family = label.font.family,
          size = label.font.size,
          color = label.font.color
        ),
        legendgroup = legend.name,
        textposition = label.offset[m],
        hoverinfo = "x+y+name"
      )
    }

    if(!is.null(box.values)) {
      boxv = data.frame(
        X1 = c(box.values[1,1], box.values[2,1], box.values[2,1], box.values[1,1] ,box.values[1,1]),
        X2 = c(box.values[1,2], box.values[1,2], box.values[2,2], box.values[2,2], box.values[1,2])
      )
      this.max = max(boxv, this.max)
      enaplot$plot = plotly::add_trace(
        p = enaplot$plot,
        data = boxv,
        type = "scatter",
        x = ~X1, y = ~X2,
        mode = "lines",
        line = list(
          width = 1,
          color = colors[1],
          dash = "dash"
        ),
        # "legendgroup" = labels[1],
        showlegend = show.legend,
        name = box.label
      )
    }

    if(this.max*1.2 > max(enaplot$axes$y$range)) {
      this.max = this.max * 1.2
      enaplot$axes$x$range = c(-this.max, this.max)
      enaplot$axes$y$range = c(-this.max, this.max)
      enaplot$plot = plotly::layout(
        enaplot$plot,
        xaxis = enaplot$axes$x,
        yaxis = enaplot$axes$y
      );
    }
  #####
  # END: Plot
  ###

  return(enaplot);
}


###
#' @title ENA Rotate by regression
#'
#' @description This function allows user to provide a regression formula for rotation on x and optionally on y.
#'    If regression formula for y is not provide, svd is applied to the residual data deflated by x to get y coordinates.
#'    The regression formula uses ENA dimensions are dependent variables.
#'    The first predictor has to be two-group categorical, binary, or numerical.
#'
#' @param enaset An \code{\link{ENAset}}
#' @param params list of parameters, may include:
#'     x_var: Regression formula for x direction, such as "lm(formula=V ~ Condition + GameHalf + Condition : GameHalf)",
#'      where V always stands for the ENA points.
#'     y_var: Regression formula, similar to x_var, for y direction (optional).
#'
#' @export
#' @return \code{\link{ENARotationSet}}
ena.rotate.by.hena.regression = function(enaset, params) {
  # check arguments
  if ( !is.list(params) || is.null(params$x_var) ) {
    stop("params must be provided as a list() and provide `x_var`")
  }

  x <- params$x_var;
  y <- params$y_var;
  points <- params$points;
  fullNames <- params$fullNames;

  if(is.null(fullNames)) {
    fullNames = F;
  }

  #get points
  if(!is.null(points)) {
    p <- points
  }
  else  if (is.null(enaset$points.normed.centered)) {
    p <- as.matrix(enaset$model$points.for.projection);
  }
  else {
    p <- as.matrix(enaset$points.normed.centered);
  }

  #regress to get v1 using x
  V <- p;

  # only works using attach()
  # attach(enaset$meta.data,warn.conflicts = F)
  # v1 = eval(parse(text = x))$coefficients[2,]

  # v1 <- with(enaset$meta.data, {
  #   eval(parse(text = x))$coefficients[2,]
  # });
  # v1 <- with(enaset$model$points.for.projection, NULL, formula = x);
  v1_res <- with.ena.matrix(enaset$model$points.for.projection, {
    lm(formula(params$x_var));
  });
  v1 <- v1_res$coefficients[2,]

  # make v1  a unit vector
  norm_v1 <- sqrt(sum(v1 * v1));
  if (norm_v1 != 0) {
    v1 <- v1 / norm_v1;
  }

  # name v1 vector
  if(is.na(all.vars(x)[2])) {
    xName <- names(v1)[1];
  }
  else {
    if(fullNames) {
      warning("FullName param is likely wrong.")
      xName <- parse(text = x)[[1]][["formula"]][[3]];
    }
    else {
      xName <- all.vars(x)[2];
    }
  }

  # Save v1
  R <- matrix(c(v1), ncol = 1);
  colnames(R) <- c(paste0(xName,"_reg"));

  #deflate matrix by x dimension
  A <- as.matrix(p)
  defA <- as.matrix(A) - as.matrix(A) %*% v1 %*% t(v1)

  #if y formula is given, regress by y formula
  if (!is.null(y)) {

    # regress to get v2 vector using formula y
    V <- defA;

    # Removed attach abvove
    # v2 = eval(parse(text = y))$coefficients[2,]
    # v2 <- with(enaset$meta.data, {
    #   eval(parse(text = y))$coefficients[2,]
    # });
    # v2 <- with(enaset$model$points.for.projection, NULL, formula = y, V = V);
    v2_res <- with.ena.matrix(enaset$model$points.for.projection, {
      lm(formula(params$y_var));
    });
    v2 <- v2_res$coefficients[2,]

    #make v2 a unit vector

    norm_v2 <- sqrt(sum(v2 * v2));
    if (norm_v2 != 0) {
      v2 <- v2 / norm_v2;
    }

    #name v2 vector
    if(is.na(all.vars(y)[2])) {
      yName <- names(v2)[1]
    }
    else {
      if(fullNames) {
        warning("FullName param is likely wrong.")
        yName <- parse(text = y)[[1]][["formula"]][[3]];
      }
      else {
        yName <- all.vars(y)[2]
      }
    }

    # save both v1 and v2
    R <- cbind(v1, v2);
    colnames(R) <- c(paste0(xName,"_reg"), paste0(yName,"_reg"));

    #deflat by v2
    defA <- as.matrix(defA) - as.matrix(defA) %*% v2 %*% t(v2);
  }

  # get svd for deflated points
  svd_result <- prcomp(defA, retx=FALSE, scale=FALSE, center=FALSE, tol=0);
  svd_v <- svd_result$rotation;

  # Merge rotation vectors
  vcount <- ncol(R);
  colNamesR <- colnames(R);
  combined <- cbind(R, svd_v[, 1:(ncol(svd_v) - vcount)]);
  colnames(combined) <- c(
    colNamesR,
    paste0("SVD", ((vcount + 1):ncol(combined)))
  );

  #create rotation set
  rotation_set <- ENARotationSet$new(
    node.positions = NULL,
    rotation = combined,
    codes = enaset$rotation$codes,
    eigenvalues = NULL
  )

  return(rotation_set);
}

#####
#'
#' @title Wrapper to generate an ENA model
#'
#' @description Generates an ENA model by constructing a dimensional reduction
#' of adjacency (co-occurrence) vectors as defined by the supplied
#' conversations, units, and codes.
#'
#' @details This function generates an ena.set object given a data.frame, units,
#' conversations, and codes. After accumulating the adjacency (co-occurrence)
#' vectors, computes a dimensional reduction (projection), and calculates node
#' positions in the projected ENA space. Returns location of the units in the
#' projected space, as well as locations for node positions, and normalized
#' adjacency (co-occurrence) vectors to construct network graphs. Includes options
#' for returning statistical tests between groups of units.
#'
#' @param data data.frame with containing metadata and coded columns
#' @param codes vector, numeric or character, of columns with codes
#' @param units vector, numeric or character, of columns representing units
#' @param conversation  vector, numeric or character, of columns to segment conversations by
#' @param metadata  vector, numeric or character, of columns with additional meta information for units
#' @param model character, the ENA model to construct: \code{EndPoint} (default) produces a single adjacency vector per unit summing co-occurrences across all lines; \code{AccumulatedTrajectory} produces one adjacency vector per unit per conversation, where each successive conversation accumulates prior ones; \code{SeparateTrajectory} produces one adjacency vector per unit per conversation, each modeled independently
#' @param weight.by "binary" is default, can supply a function to call (e.g. sum)
#' @param window MovingStanzaWindow (default) or Conversation
#' @param window.size.back integer, number of lines back from each line to include in the stanza window (default: 1)
#' @param window.size.forward integer, number of lines forward from each line to include in the stanza window (default: 0). Set to model bidirectional co-occurrence within a window.
#' @param include.meta logical, if TRUE (default) unit metadata is attached to the resulting ENAdata object and accessible via the set; set to FALSE to omit metadata from the model output
#' @param groupVar vector, character, of column name containing group identifiers.
#' If column contains at least two unique values, will generate model using a means rotation (a dimensional reduction maximizing the variance between the means of the two groups)
#' @param groups vector, character, of values of groupVar column used for means rotation or statistical tests
#' @param runTest logical, TRUE will run a Student's t-Test and a Wilcoxon test for groups defined by the groups argument
#' @param ... Additional parameters passed to model generation, including \code{mask} (an optional binary matrix of size ncol(codes) x ncol(codes) where 0 suppresses co-occurrence modeling between a pair of codes; see \code{\link{ena.accumulate.data}})
#'
#'
#' @return ena.set object
#####
ena.set.creator = function(
  data,
  codes,
  units,
  conversation,
  metadata = NULL,
  model = c("EndPoint", "AccumulatedTrajectory", "SeparateTrajectory"),
  weight.by = "binary",
  window = c("MovingStanzaWindow", "Conversation"),
  window.size.back = 1,
  window.size.forward = 0,
  include.meta = TRUE,
  groupVar = NULL,
  groups = NULL,
  runTest = FALSE,
  ...
) {
  data <- data.table::data.table(data)

  model = match.arg(model)
  window = match.arg(window)
  accum = ena.accumulate.data(
    units = data[, ..units, drop = FALSE],
    conversation = data[, ..conversation, drop = FALSE],
    metadata = data[, ..metadata, drop = FALSE],
    codes = data[, ..codes, drop = FALSE],
    window = window,
    window.size.back = window.size.back,
    window.size.forward = window.size.forward,
    weight.by = weight.by,
    model = model,
    include.meta = include.meta,
    ...
  );

  accum$model$raw.input <- as.data.table(data);
  accum$model$raw.input$ENA_UNIT <- merge_columns_c(accum$model$raw.input, units)
  group1 = NULL
  group2 = NULL
  group1.rows = NULL
  group2.rows = NULL

  set_params = list(...)
  set_params$enadata = accum

  ### make set if no group column is specified
  if(is.null(groupVar)) {
    if(runTest == TRUE) {
      warning("Group variable and groups not specified. Unable to run test")
    }
  }

  ### make set if group column is specified, but groups are not
  else if(is.null(groups) == TRUE) {
    unique.groups = unique(as.character(data[[groupVar]]))

    if(length(unique.groups) == 1) {
      warning("Group variable only contains one unique value. ENAset has been created without means rotation")

      if(runTest == TRUE) {
        warning("Multiple groups not specified. Unable to run test")
      }
    }

    else{
      group1 = unique.groups[1]
      group2 = unique.groups[2]

      message(paste0("No groups specified. Defaulting to means rotation using first two unique group values of group variable: ",group1," and ",group2))

      set_params$rotation.by = ena.rotate.by.mean
      set_params$rotation.params = list(accum$meta.data[[groupVar]] == group1, accum$meta.data[[groupVar]] == group2)

      if(runTest == TRUE) {
        warning(paste0("No groups specified. Running test on the first two unique group values of the group variable: ",group1," and ",group2))
      }
    }
  }
  else if(length(groups) == 1) {
    message("Only one group value specified. ENAset has been created without means rotation")

    if(runTest == TRUE) {
      warning("Multiple groups not specified. Unable to run test")
    }
  }
  else {
    group1 = groups[1]
    group2 = groups[2]

    if(length(groups) > 2) {
      warning(paste0("Only two groups are allowed for means rotation. ENAset has been created using a means rotation on the first two groups given: ",group1," and ",group2))
    }

    groups.missing = groups[which(!groups %in% data[[groupVar]])]
    if(length(groups.missing) > 0) {
      stop(paste("Group column does not contain supplied group value(s): ", groups.missing))
    }

    if(runTest == TRUE) {
      if(length(groups) > 2) {
        warning(paste0("More than two groups specified. Running test on the first two groups: ",group1," and ",group2))
      }
    }
  }

  if(!any(is.null(c(group1, group2)))) {
    set_params$rotation.by = ena.rotate.by.mean
    set_params$rotation.params = list(accum$meta.data[[groupVar]] == group1, accum$meta.data[[groupVar]] == group2)

    group1.rows = accum$meta.data[[groupVar]] == group1
    group2.rows = accum$meta.data[[groupVar]] == group2
  }

  set = do.call(ena.make.set, set_params)

  if(
    runTest == TRUE &&
    !any(is.null(c(group1.rows, group2.rows)))
  ) {
    group1.dim1 = as.matrix(set$points)[group1.rows,1]
    group2.dim1 = as.matrix(set$points)[group2.rows,1]
    group1.dim2 = as.matrix(set$points)[group1.rows,2]
    group2.dim2 = as.matrix(set$points)[group2.rows,2]

    set$tests = list(
      wilcox.test = list(
        test.dim1 = wilcox.test(x = group1.dim1, y = group2.dim1),
        test.dim2 = wilcox.test(x = group1.dim2, y = group2.dim2)
      ),
      t.test = list(
        test.dim1 = t.test(x = group1.dim1, y = group2.dim1),
        test.dim2 = t.test(x = group1.dim2, y = group2.dim2)
      )
    )
  } else {
    set$tests = NULL
  }

  return(set)
}

#' Correlation between distances in two ENA spaces
#'
#' @description
#' Calculates the Pearson correlation between the pairwise Euclidean distances
#' of points in two ENA spaces (\code{A} and \code{B}). For smaller datasets,
#' it computes the exact correlation. For larger datasets, it estimates the
#' correlation using a sampled subset of pairs.
#'
#' @param A A matrix or data frame representing the first ENA space (rows as points).
#' @param B A matrix or data frame representing the second ENA space (must have the same number of rows as A).
#' @param max_sample_size Numeric. The maximum number of pairwise distances to compute.
#' If the total possible pairs exceeds this value, sampling is used. Default is 100,000.
#'
#' @return A numeric value representing the Pearson correlation.
#' @importFrom stats dist cor sample.int
#' @export
ena_space_dist_corr <- function(A, B, max_sample_size = 100000) {
  m <- nrow(A)

  if (is.null(m) || m == 0 || nrow(B) != m) {
    stop("The spaces must have the same non-zero number of rows.")
  }

  # Calculate total unique pairs m(m-1)/2
  total_possible_pairs <- choose(m, 2)

  # Use Exact if total pairs is less than limit
  if (total_possible_pairs <= max_sample_size) {
    # CASE 1: Small m - Exact calculation
    d_A <- as.vector(dist(A))
    d_B <- as.vector(dist(B))
    return(cor(d_A, d_B, method = "pearson"))

  } else {
    # CASE 2: Large m - Simple Sample & Filter
    # Sample indices with replacement
    idx1 <- sample.int(m, max_sample_size, replace = TRUE)
    idx2 <- sample.int(m, max_sample_size, replace = TRUE)

    # Filter out identity pairs (per user instruction for large m)
    keep <- idx1 != idx2
    idx1 <- idx1[keep]
    idx2 <- idx2[keep]

    # Vectorized Euclidean Distance: sqrt(sum((x-y)^2))
    dist_A <- sqrt(rowSums((A[idx1, , drop = FALSE] - A[idx2, , drop = FALSE])^2))
    dist_B <- sqrt(rowSums((B[idx1, , drop = FALSE] - B[idx2, , drop = FALSE])^2))

    return(cor(dist_A, dist_B, method = "pearson"))
  }
}

#' Calculate the Peak Ground Diversity Window
#'
#' This function acts as a wrapper to identify the sliding window size that yields
#' the highest (peak) ground diversity/entropy for a given dataset.
#'
#' @param data A data.frame or data.table containing the conversation data.
#' @param codeNames A character vector of column names representing the binary codes.
#' @param conversation_cols A character vector of column names that define unique conversations.
#' @param max_window An integer specifying the maximum window size to evaluate. Default is 20.
#'
#' @return An integer representing the window size that maximizes ground entropy.
#' @export
#'
#' @examples
#' \dontrun{
#' peak_w <- ena.gd.window(data = my_data,
#'                         codeNames = c("CodeA", "CodeB"),
#'                         conversation_cols = "ConversationID",
#'                         max_window = 15)
#' }
ena.gd.window <- function(data, codeNames, conversation_cols, max_window = 20) {
  gd <- ena.ground.diversity(data, codeNames, conversation_cols, max_window)
  return(gd$peaks$peak_window)
}

#' Calculate Ground Diversity (Entropy) Curves and Peaks
#'
#' Evaluates the diversity of "ground types" (combinations of active codes)
#' across a series of sliding window sizes. It returns both the curves and the
#' peak window values.
#'
#' @param data A data.frame or data.table containing the conversation data.
#' @param codeNames A character vector of column names representing the binary codes.
#' @param conversation_cols A character vector of column names that define unique conversations.
#' @param max_window An integer specifying the maximum window size to evaluate. Default is 20.
#'
#' @return A list containing three elements:
#' \describe{
#'   \item{curves}{A data.table containing aggregated entropy curves across windows.}
#'   \item{peaks}{A data.table highlighting the window size(s) where entropy peaked.}
#'   \item{conversation_curves}{A data.table showing calculated entropy per individual conversation per window.}
#' }
#' @export
#'
#' @examples
#' \dontrun{
#' gd_results <- ena.ground.diversity(data = my_data,
#'                                    codeNames = c("CodeA", "CodeB"),
#'                                    conversation_cols = "ConversationID")
#' }
ena.ground.diversity <- function(data, codeNames, conversation_cols, max_window = 20) {
  num_codes <- length(codeNames)

  counts <- ground.type.counts(
    data = data,
    codeNames = codeNames,
    conversation_cols = conversation_cols,
    max_window = max_window
  )

  ground.entropy.curves(
    counts = counts,
    conversation_cols = conversation_cols,
    num_codes = num_codes,
    methods = "mean",
    normalize = TRUE
  )
}

#' Calculate Entropy Curves from Ground Type Counts
#'
#' Internal helper function that takes ground type frequencies and computes Shannon entropy
#' across different aggregation methods (mean, weighted, or pooled) and window sizes.
#'
#' @param counts A data.table generated by \code{\link{ground.type.counts}}.
#' @param conversation_cols A character vector of column names defining unique conversations.
#' @param num_codes An integer representing the total number of unique codes (used for normalization base \eqn{2^C}).
#' @param group_cols A character vector of columns to group by (e.g., experimental conditions). Default is NULL.
#' @param methods A character vector specifying which aggregation methods to calculate. Options include "mean", "weighted", and "pooled". Default is all three.
#' @param normalize Logical; if TRUE (default), entropy is normalized to a \eqn{[0, 1]} scale.
#'
#' @return A list containing:
#' \describe{
#'   \item{curves}{Aggregated entropy metrics across windows and methods.}
#'   \item{peaks}{The optimal window size that maximized entropy for each method.}
#'   \item{conversation_curves}{Entropy calculations mapped back to individual conversations.}
#' }
#' @keywords internal
ground.entropy.curves <- function(
    counts,
    conversation_cols,
    num_codes,
    group_cols = NULL,
    methods = c("mean", "weighted", "pooled"),
    normalize = TRUE
) {
  dt <- as.data.table(counts)

  if (is.null(group_cols)) {
    dt[, .group := "all"]
    group_cols <- ".group"
  }

  log_base <- if (normalize) log(2^num_codes) else 1

  conv_entropy <- dt[
    ,
    {
      p <- count / sum(count)
      .(
        entropy = -sum(p * log(p)) / log_base,
        n_unique = .N,
        n_obs = unique(n_obs)
      )
    },
    by = c(conversation_cols, group_cols, "window")
  ]

  results <- list()

  # --- Mean ---
  if ("mean" %in% methods) {
    tmp <- conv_entropy[
      ,
      .(
        entropy = mean(entropy),
        n_unique = mean(n_unique),
        n_conversations = .N
      ),
      by = c(group_cols, "window")
    ]
    tmp[, method := "mean"]
    results[[length(results) + 1]] <- tmp
  }

  # --- Weighted ---
  if ("weighted" %in% methods) {
    tmp <- conv_entropy[
      ,
      .(
        entropy = weighted.mean(entropy, n_obs),
        n_unique = weighted.mean(n_unique, n_obs),
        n_conversations = .N
      ),
      by = c(group_cols, "window")
    ]
    tmp[, method := "weighted"]
    results[[length(results) + 1]] <- tmp
  }

  # --- Pooled ---
  if ("pooled" %in% methods) {
    tmp <- dt[
      ,
      .(count = sum(count)),
      by = c(group_cols, "window", "ground_type")
    ][
      ,
      {
        p <- count / sum(count)
        .(
          entropy = -sum(p * log(p)) / log_base,
          n_unique = .N
        )
      },
      by = c(group_cols, "window")
    ]
    tmp[, method := "pooled"]
    results[[length(results) + 1]] <- tmp
  }

  curves <- rbindlist(results, fill = TRUE)

  peaks <- curves[
    ,
    .SD[which.max(entropy)][1],
    by = c(group_cols, "method")
  ][
    ,
    .(
      peak_window = window,
      peak_entropy = entropy,
      method = method
    ),
    by = group_cols
  ]

  list(
    curves = curves,
    peaks = peaks,
    conversation_curves = conv_entropy
  )
}

#' Generate Binary-Encoded Ground Type Counts
#'
#' Compiles the frequencies of unique co-occurrence patterns ("ground types")
#' across multiple sliding window sizes, using fast column-wise rolling maximum operations.
#'
#' @param data A data.frame or data.table.
#' @param codeNames A character vector of column names representing the binary codes.
#' @param conversation_cols A character vector of column names that define unique conversations.
#' @param max_window An integer specifying the maximum window size to evaluate. Default is 20.
#'
#' @return A data.table containing the counts of ground types per conversation per window size.
#' @keywords internal
ground.type.counts <- function(
    data,
    codeNames,
    conversation_cols,
    max_window = 20
) {
  stopifnot(all(codeNames %in% names(data)))
  stopifnot(all(conversation_cols %in% names(data)))

  dt <- data.table::as.data.table(data)

  # Ground-type encoding assumes each code is a binary presence/absence flag;
  # non-binary values would corrupt the bit-weighted `ground_type` id.
  code_vals <- unique(as.vector(as.matrix(dt[, ..codeNames])))
  if (!all(code_vals %in% c(0, 1))) {
    stop("All codeNames columns must be binary (0/1); found non-binary values.")
  }
  bit_weights <- 2^(seq_along(codeNames) - 1)

  out <- list()

  for (w in seq_len(max_window)) {
    temp_dt <- dt[, c(..conversation_cols, ..codeNames)]

    # The rolling max of a binary code column is simply "is any code active in
    # the window", i.e. frollsum(col, w) > 0. frollsum runs in C, whereas
    # frollapply(col, w, max) invokes an R closure at every window position, so
    # this is ~2 orders of magnitude faster for the same result.
    temp_dt[, (codeNames) := lapply(.SD, function(col) {
      rolled <- as.numeric(data.table::frollsum(col, w, align = "right") > 0)
      na_indices <- which(is.na(rolled))
      if (length(na_indices) > 0) {
        rolled[na_indices] <- cummax(col[na_indices])
      }
      return(rolled)
    }), by = conversation_cols, .SDcols = codeNames]

    g_matrix <- as.matrix(temp_dt[, ..codeNames])
    temp_dt[, ground_type := as.vector(g_matrix %*% bit_weights)]

    counts <- temp_dt[
      ,
      .(count = .N),
      by = c(conversation_cols, "ground_type")
    ]

    counts[, window := w]
    counts[, n_obs := sum(count), by = conversation_cols]

    out[[w]] <- counts
  }

  data.table::rbindlist(out, use.names = TRUE)
}

accumulate.data <- function(enadata) {
  dfDT <- enadata$raw;

  units.used <- enadata$get("units.used")
  units.by <- enadata$get("units.by")
  trajectory.by <- enadata$get("trajectory.by")
  codes <- enadata$codes

  if (is.data.frame(codes)) {
    codes <- colnames(codes);
  }

  conversations.by <- enadata$get("conversations.by")
  window <- enadata$get("window.size")
  # binaryStanzas <- F
  units.exclude <- enadata$get("units.exclude")

  if(is.null(trajectory.by)) {
    trajectory.by = conversations.by
  }

  ### should work to determine if binary is desired
  binary <- T;
  if (!identical(enadata$get("weight.by"), "binary")) {
    binary <- F
  }
  else {
    binary <- T
  }

  ### We need data
  if (is.null(dfDT) || nrow(dfDT) < 1) {
    stop("The provided data is NULL")
  }

  ###
  # We need a data.table, it's worth it.
  ###
  if(!data.table::is.data.table(dfDT)) {
    dfDT <- data.table::as.data.table(dfDT)
  }

  ###
  # Make a copy of the data for safe usage
  ###
  dfDT_codes <- data.table::copy(dfDT)

  ###
  # Create a column representing the ENA_UNIT as defined
  # by the the `units.by` parameter
  ###
  if(!"ENA_UNIT" %in% colnames(dfDT_codes)) {
    dfDT_codes$ENA_UNIT <- enadata$raw$ENA_UNIT <- merge_columns_c(
      dfDT_codes,
      cols = units.by, sep = "::"
    )
  }

  ##
  # String vector of codesnames representing the names of the co-occurrences
  ##
  vL <- length(codes);
  adjacency.length <- ( (vL * (vL + 1)) / 2) - vL ;
  codedTriNames <- paste("adjacency.code",rep(1:adjacency.length), sep=".");

  initial_cols <- c(units.by, codes)
  just_codes <- c(codes)

  ##
  # Accumulated windows appended to the end of each row
  #
  # FIXME: Don't append on the results to the initial data.table,
  #        keep a separate to lookup the results for the co-occurred
  #        values later on.
  ##
  if (window$back == 1 && window$forward == 0) {
    dfDT.co.occurrences <- dfDT_codes[,{
        ocs <- data.table::as.data.table(
                rows_to_co_occurrences(
                  .SD[,.SD,.SDcols=codes, with=T],
                  binary = binary
                )
              );

        # Return value from data.table back to dfDT.co.occurrences
        data.table::data.table(.SD, ocs)
      },
      .SDcols = c(codes, conversations.by, trajectory.by),
      with = T
    ]

    ### Generate the ENA_UNIT column
    dfDT.co.occurrences$ENA_UNIT <- dfDT_codes$ENA_UNIT

    ### Keep original columns used for units
    dfDT.co.occurrences[, (units.by) := dfDT_codes[, .SD, .SDcols = units.by]]
  }
  else if (window$back == "Conversation") {
    ###
    # First sum all lines by conversation and unit to get vectors of codes
    # occurring in the whole conversation for each unit
    ###
    dfDT.conv.sum <- dfDT_codes[,
      lapply(.SD, sum), by = c(unique(conversations.by)),
      .SDcols = c(codes),
      with = T
    ]

    ###
    # Convert each units converstation sums into adjacency vectors
    ###
    # browser()
    dfDT.co.occurrences <- dfDT.conv.sum[,{
        ocs = data.table::as.data.table(rows_to_co_occurrences(.SD[,.SD,.SDcols=codes, with=T], binary = binary));
        data.table::data.table(.SD,ocs, ENA_UNIT=merge_columns_c(.SD, cols = units.by, sep="::"))
      },
      .SDcols=unique(c(codes, conversations.by, trajectory.by, units.by)),
      with=T
    ];
  }
  else {
    ## parallell: https://stackoverflow.com/questions/14759905/data-table-and-parallel-computing
    ### Calculate occurrences of code within the provided window

    # if(enadata$function.params$in.par == T) {
    #   grainSize = ifelse(!is.null(enadata$function.params$grainSize), enadata$function.params$grainSize, 10);
    #   dfDT.co.occurrences = dfDT_codes[,
    #                            (codedTriNames) := try_one(
    #                              .SD[,.SD, .SDcols=just_codes],
    #                              window=window$back,
    #                              binary = binary,
    #                              grainSize = grainSize
    #                            ),
    #                            by=conversations.by,
    #                            .SDcols=initial_cols,
    #                            with=T
    #                          ];
    #
    # } else {
            # ,binaryStanzas = binaryStanzas
      dfDT.co.occurrences <- dfDT_codes[,
          (codedTriNames) := ref_window_df(
            .SD[, .SD, .SDcols = just_codes],
            windowSize = window$back,
            windowForward = window$forward,
            binary = binary
          ),
          by = conversations.by,
          .SDcols = initial_cols,
          with = T
      ];
    # }
  }
  # browser()

  if( is.function(enadata$get("weight.by")) ) {
    cols <- colnames(dfDT.co.occurrences)[
              grep("adjacency.code", colnames(dfDT.co.occurrences))
            ]
    dfDT.co.occurrences <- dfDT.co.occurrences[,
                                (cols) := lapply(
                                  .SD,
                                  enadata$get("weight.by")
                                ),
                                .SDcols = cols,
                                by = 1:nrow(dfDT.co.occurrences)
                           ]
  }


  ###
  # Convert the generic `V` names to corresponding `adjacency.vector` names
  ###
    vCols <- grep("V\\d+", colnames(dfDT.co.occurrences))
    if(length(vCols) == length(codedTriNames)) {
      colnames(dfDT.co.occurrences)[vCols] <- codedTriNames
    }

  ##
  # If units aren't supplied, use all available
  ##
    if (is.null(units.used)) {
      units.used <- dfDT_codes$ENA_UNIT
    }


  ###
  # Trajectory Checks
  ###

  ## Not a Trajectory
  if (enadata$model == "EndPoint") {
    ###
    # Sum each unit found in dfDT.co.occurrences
    ###
    dfDT.summed.units <- dfDT.co.occurrences[ENA_UNIT %in% units.used,lapply(.SD,sum),by=units.by,.SDcols=codedTriNames]
    dfDT.summed.units$ENA_UNIT <- merge_columns_c(dfDT.summed.units, units.by, sep="::");

    enadata$unit.names <- dfDT.summed.units$ENA_UNIT;
  }
  ## Trajectory
  else {
    ## First sum all units within each Trajectory Group (trajectory.by)
    dfDT.summed.traj.by <- dfDT.co.occurrences[
      ENA_UNIT %in% units.used,
      {
        sums <- lapply(.SD, sum)
        data.frame(ENA_ROW_IDX = .GRP, sums); # Return value
      },
      by = c(units.by, trajectory.by),
      .SDcols = (codedTriNames)
    ];
    dfDT.summed.traj.by$ENA_UNIT <- merge_columns_c(
      dfDT.summed.traj.by, units.by, sep = "::"
    )
    dfDT.summed.traj.by$TRAJ_UNIT <- merge_columns_c(
      dfDT.summed.traj.by, trajectory.by, sep = "::"
    );

    enadata$trajectories$step <- dfDT.summed.traj.by$TRAJ_UNIT;

    # Accumulated
    if (enadata$model == opts$TRAJ_TYPES[1]) {
      dfDT.summed.units <- dfDT.summed.traj.by[
        ENA_UNIT %in% unique(units.used), {
          cols <- colnames(.SD)
          ENA_UNIT <- paste(as.character(.BY), collapse = "::")
          TRAJ_UNIT <- .SD[, c(trajectory.by), with = F]
          inc_cols <- cols[! cols %in% c(trajectory.by, "ENA_ROW_IDX")]
          lag <- ref_window_lag(.SD[, .SD, .SDcols = inc_cols], .N)

          data.table::data.table(
            ENA_ROW_IDX,
            TRAJ_UNIT, lag, ENA_UNIT = ENA_UNIT
          )
        },
        by = c(units.by),
        .SDcols = c(codedTriNames, trajectory.by, "ENA_ROW_IDX")
      ]
      dfDT.summed.units$TRAJ_UNIT <- merge_columns_c(
        dfDT.summed.units, trajectory.by, sep = "::"
      )
    }
    # Non-accumulated
    else if (enadata$model == opts$TRAJ_TYPES[2]) {
      dfDT.summed.units <- dfDT.summed.traj.by;
    }
    else {
      stop("Unsupported Model type.");
    }

    dfDT.summed.units$ENA_UNIT <- merge_columns_c(
      dfDT.summed.units, units.by, sep = "::"
    )
  }
  ###
  # END: Trajectory Checks
  ###

  ###
  # Name the rows and columns accordingly
  ###
    colnames(dfDT.summed.units)[
      grep("V\\d+", colnames(dfDT.summed.units))
    ] <- codedTriNames

  ###
  # Set attributes
  #
  # TODO Most of this should be moved to a more prominent spot on ENAdata
  ###
    adjRows <- triIndices(length(codes)) + 1
    codedRow1 <- codes[adjRows[1, ]]
    codedRow2 <- codes[adjRows[2, ]]
    attr(dfDT.summed.units, "adjacency.matrix") <- rbind(codedRow1, codedRow2)
    attr(dfDT.summed.units, "adjacency.codes") <- codedTriNames
    attr(dfDT.summed.units, opts$UNIT_NAMES)  <- dfDT.summed.units[,
        .SD, with = T, .SDcols = units.by]

    enadata$adjacency.matrix <- rbind(codedRow1, codedRow2)
    enadata$accumulated.adjacency.vectors <- dfDT.co.occurrences
    enadata$adjacency.vectors <- dfDT.summed.units
  ###
  # END: Set attributes
  ###

  return(enadata);
}

####
#' ENAdata R6class
#'
#' @docType class
#' @importFrom R6 R6Class
#' @import data.table
#' @export
#'
#' @field raw A data frame constructed from the unit, convo, code, and metadata parameters of ena.accumulate.data
#' @field adjacency.vectors A data frame of adjacency (co-occurrence) vectors by row
#' @field accumulated.adjacency.vectors A data frame of adjacency (co-occurrence) vectors accumulated per unit
#' @field model The type of ENA model: EndPoint, Accumulated Trajectory, or Separate Trajectory
#' @field units A data frame of columns that were combined to make the unique units. Includes column for trajectory selections. (unique)
#' @field unit.names A vector of unique unit values
#' @field metadata A data frame of unique metadata for each unit
#' @field trajectories A list: units - data frame, for a given row tells which trajectory it's a part; step - data frame, where along the trajectory a row sits
#'
#' @field adjacency.matrix TBD
#' @field adjacency.vectors.raw TBD
#' @field codes A vector of code names
#' @field function.call The string representation of function called and parameters provided
#' @field function.params A list of all parameters sent to function call
####
ENAdata <- R6::R6Class("ENAdata", public = list(

  #' Construct ENAdata
  #'
  #' @param file TBD
  #' @param units TBD
  #' @param units.used TBD
  #' @param units.by TBD
  #' @param conversations.by TBD
  #' @param codes TBD
  #' @param model TBD
  #' @param weight.by TBD
  #' @param window.size.back TBD
  #' @param window.size.forward TBD
  #' @param mask TBD
  #' @param include.meta logical, if TRUE (default) unit metadata is attached to the resulting ENAdata object and accessible via the set; set to FALSE to omit metadata from the model output
  #' @param ... TBD
  #'
  #' @return
  initialize = function(
    file,
    units = NULL,
    units.used = NULL,
    units.by = NULL,
    conversations.by = NULL,
    codes = NULL,
    model = NULL,
    weight.by = "binary",
    window.size.back = 1,
    window.size.forward = 0,
    mask = NULL,
    include.meta = T,
    ...
  ) {
    args <- list(...);
    self$function.call <- sys.call(-1);
    self$function.params <- list();

    private$file <- file;
    self$units <- units;
    private$units.used <- units.used;
    private$units.by <- units.by
    private$conversations.by <- conversations.by;
    self$codes <- codes;

    if (is.data.frame(self$codes)) {
      self$codes <- colnames(self$codes);
    }

    private$weight.by <- weight.by;
    private$window.size <- list(
      "back" = window.size.back,
      "forward" = window.size.forward
    );

    for (p in c("units", "units.used", "units.by",
               "conversations.by", "codes", "model", "weight.by",
               "window.size.back", "window.size.forward", "mask",
               "in.par", "grainSize", "include.meta")
    ) {
      if (exists(x = p)) {
        self$function.params[[p]] <- get(p)
      }
      else if (!is.null(args[[p]])) {
        self$function.params[[p]] <- args[[p]]
      }
    }

    self$model <- model

    private$mask <- mask

    return(self)
  },

    ## Public Properties ----
      model = NULL,
      raw = NULL,
      adjacency.vectors = NULL,
      adjacency.matrix = NULL,
      accumulated.adjacency.vectors = NULL,
      adjacency.vectors.raw = NULL,
      units = NULL,
      unit.names = NULL,
      metadata = NULL,
      trajectories = list(
        units = NULL,
        step = NULL
      ),
      codes = NULL,
      function.call = NULL,
      function.params = NULL,

    ## Public Functions ----

      #' Process accumulation
      #'
      #' @return ENAdata
      process = function() {
        private$loadFile();
      },

      #' Get property from object
      #'
      #' @param x character key to retrieve from object
      #' @return value from object at x
      get = function(x = "data") {
        return(private[[x]])
      },

      #' Add metadata
      #'
      #' @param merge logical (default: FALSE)
      #'
      #' @return data.frame
      add.metadata = function(merge = F) {
        meta_avail <- colnames(self$raw)[
          -which(colnames(self$raw) %in%
                  c(self$codes, private$units.by, private$conversations.by))]
                  # c(self$codes, private$units.by))] # private$conversations.by))]

        meta_avail <- meta_avail[which(meta_avail != "ENA_UNIT")]
        meta_cols_to_use <- meta_avail[apply(self$raw[, lapply(.SD, uniqueN),
                                                    by = c(private$units.by),
                                                    .SDcols = meta_avail
                                                 ][, c(meta_avail), with = F]
                                    , 2, function(x) all(x == 1))
                                  ]
        raw.meta <- self$raw[!duplicated(ENA_UNIT)][
                      ENA_UNIT %in% unique(
                        self$accumulated.adjacency.vectors$ENA_UNIT
                      ),
                      c("ENA_UNIT", private$units.by, meta_cols_to_use),
                      with = F
                    ]

        df_to_return <- raw.meta[ENA_UNIT %in% self$unit.names,];

        return(df_to_return)
      }

  ),

  ### Private ----
  private = list(

    ## Private Properties ----
      file = NULL,
      window.size = NULL,
      units.used = NULL,
      units.by = NULL,
      conversations.by = NULL,
      weight.by = NULL,
      trajectory.by = NULL,
      mask = NULL,

    ## Private Functions ----
      loadFile = function() {
      if(any(class(private$file) == "data.table")) {
        df_DT <- private$file
      } else {
        if(any(class(private$file) == "data.frame")) {
          df <- private$file
        } else {
          df <- read.csv(private$file)
        }
        df_DT <- data.table::as.data.table(df)
      }

      self$raw <- data.table::copy(df_DT)
      self$raw$ENA_UNIT <- merge_columns_c(self$raw, private$units.by, "::")

      self <- accumulate.data(self)
      self$units <- self$adjacency.vectors[, private$units.by, with = F]

      if (!self$model %in% c("AccumulatedTrajectory", "SeparateTrajectory")) {
        self$unit.names <- self$adjacency.vectors$ENA_UNIT
      }
      else {
        self$trajectories$units <- self$units
        conversation <- self$adjacency.vectors[, private$conversations.by, with = F];

        self$trajectories$step <- conversation
        self$units <- cbind(self$units, conversation)
        self$unit.names <- paste(
          self$adjacency.vectors$ENA_UNIT,
          self$adjacency.vectors$TRAJ_UNIT,
          sep = "::"
        )
      }

      self$adjacency.vectors.raw <- self$adjacency.vectors

      adjCols <- colnames(self$adjacency.vectors)[
                  grep("adjacency.code", colnames(self$adjacency.vectors))
                ];

      if (is.null(private$mask)) {
        private$mask <- matrix(1,
                          nrow = length(self$codes),
                          ncol = length(self$codes),
                          dimnames = list(self$codes, self$codes))
      }

      self$adjacency.vectors[, c(adjCols)] <-
        self$adjacency.vectors[, c(adjCols), with = F] *
          rep(
            private$mask[upper.tri(private$mask)],
            rep(nrow(self$adjacency.vectors), length(adjCols))
          )

      # if( is.function(private$weight.by) ) {
      #   cols <- colnames(self$adjacency.vectors)[
      #             grep("adjacency.code", colnames(self$adjacency.vectors))
      #           ]
      #   self$adjacency.vectors <- self$adjacency.vectors[,
      #                               lapply(
      #                                 .SD,
      #                                 private$weight.by
      #                               ),
      #                               .SDcols = cols,
      #                               by = 1:nrow(self$adjacency.vectors)
      #                             ]
      # }

      if( self$function.params$include.meta == T) {
        self$metadata <- self$add.metadata(merge = F);
      } else {
        self$metadata <- data.frame();
      }

      self$adjacency.vectors <- self$adjacency.vectors[,
                                  grep("adjacency.code",
                                    colnames(self$adjacency.vectors)),
                                  with = F
                                ]

      return(self);
    }
  )
)

#' Apply metadata and code transformations to a data.table
#'
#' This function applies metadata and code transformations to a data.table if provided.
#' It checks if the metadata and codes are supplied as vectors of column names.
#'
#' @param x A data.table. The data.table to be transformed.
#' @param metadata_cols A vector of column names or NULL. A vector specifying the columns for metadata transformations.
#' @param codes_cols A vector of column names or NULL. A vector specifying the columns for code transformations.
#' @param horizon_cols A vector of column names or NULL. A vector specifying the columns for horizon transformations.
#' @param units_cols A vector of column names or NULL. A vector specifying the columns for unit transformations.
#'
#' @return The modified data.table after applying the metadata and code transformations.
#' @examples
#' library(data.table)
#' dt <- data.table(a = 1:5, b = 6:10)
#' dt <- define(dt, metadata = c("a"), codes = c("b"))
#' @export
define <- function(
  x,
  metadata_cols = find_meta_cols(x),
  codes_cols = find_binary_cols(x),
  horizon_cols = NULL,
  units_cols = NULL
) {
  x <- as.qe.data(x);

  do_call <- function(y, wh) {
    args <- list(x = x);
    for(u in y) args[[length(args) + 1]] <- u
    x <<- do.call(wh, args);

    return(x);
  }

  if(!is.null(metadata_cols)) {
    if(
      (is.numeric(metadata_cols) || is.character(metadata_cols)) &&
      length(metadata_cols) > 0
    ) {
      x <- do_call(metadata_cols, metadata);
    }
    else {
      warning(WARNINGS$null_metadata);
    }
  }

  if(!is.null(codes_cols)) {
    if(
      (is.numeric(codes_cols) || is.character(codes_cols)) &&
      length(codes_cols) > 0
    ) {
      x <- do_call(codes_cols, codes);
    }
    else {
      warning(WARNINGS$null_codes);
    }
  }

  if(!is.null(units_cols)) {
    if(
      (is.numeric(units_cols) || is.character(units_cols)) &&
      length(units_cols) > 0
    ) {
      x <- do_call(units_cols, units);
    }
    else {
      warning(WARNINGS$null_units);
    }
  }

  if(!is.null(horizon_cols)) {
    if(
      (is.numeric(horizon_cols) || is.character(horizon_cols)) &&
      length(horizon_cols) > 0
    ) {
      x <- do_call(horizon_cols, horizon);
    }
    else {
      warning(WARNINGS$null_horizon);
    }
  }

  invisible(x);
}

#' Reclassify specified columns as codes or list codes columns in a data.table
#'
#' This function reclassifies specified columns of a data.table to the 'qe.code' format if column names are provided.
#' If no column names are provided, it returns the names of columns that are already classified as 'qe.code'.
#'
#' @param x A data.table. The data.table containing the columns to be reclassified or checked.
#' @param ... Additional arguments specifying the names of the columns to be reclassified.
#'
#' @return The modified data.table with specified columns reclassified as 'qe.code', or a character vector of column names already classified as 'qe.unit'.
#' @examples
#' library(data.table)
#' dt <- data.table(a = 1:5, b = 6:10)
#' # Reclassify columns 'a' and 'b' as 'qe.code'
#' dt <- codes(dt, "a", "b")
#' # List columns classified as 'qe.code'
#' code_columns <- codes(dt)
#' @export
codes <- function(x, ...) {
  x <- as.qe.data(x);

  if(...length() > 0) {
    dot_args <- list(...);

    # x <- reclassify(x, dot_args, as.qe.code);
    dot_args$x <- x;
    dot_args$v <- as.qe.code;
    x <- do.call(reclassify, dot_args);

    return(x);
  }
  else {
    return(colnames(x)[sapply(x, is.qe.code)]);
  }
}

#' Reclassify specified columns as metadata or list metadata columns in a data.table
#'
#' This function reclassifies specified columns of a data.table to the 'qe.metadata' format if column names are provided.
#' If no column names are provided, it returns the names of columns that are already classified as 'qe.metadata'.
#'
#' @param x A data.table. The data.table containing the columns to be reclassified or checked.
#' @param ... Additional arguments specifying the names of the columns to be reclassified.
#'
#' @return The modified data.table with specified columns reclassified as 'qe.metadata', or a character vector of column names already classified as 'qe.metadata'.
#' @examples
#' library(data.table)
#' dt <- data.table(a = 1:5, b = 6:10)
#' # Reclassify columns 'a' and 'b' as 'qe.metadata'
#' dt <- metadata(dt, "a", "b")
#' # List columns classified as 'qe.metadata'
#' metadata_columns <- metadata(dt)
#' @export
metadata <- function(x, ...) {
  x <- as.qe.data(x);

  if(...length() > 0) {
    dot_args <- list(...);

    dot_args$x <- x;
    dot_args$v <- as.qe.metadata;
    x <- do.call(reclassify, dot_args);

    return(x);
  }
  else {
    return(colnames(x)[sapply(x, is.qe.metadata)]);
  }
}

#' Reclassify specified columns as units or list unit columns in a data.table
#'
#' This function reclassifies specified columns of a data.table to the 'qe.unit' format if column names are provided.
#' If no column names are provided, it returns the names of columns that are already classified as 'qe.unit'.
#'
#' @param x A data.table. The data.table containing the columns to be reclassified or checked.
#' @param ... Additional arguments specifying the names of the columns to be reclassified.
#'
#' @return The modified data.table with specified columns reclassified as 'qe.unit', or a character vector of column names already classified as 'qe.unit'.
#' @examples
#' library(data.table)
#' dt <- data.table(a = 1:5, b = 6:10)
#' # Reclassify columns 'a' and 'b' as 'qe.unit'
#' dt <- units(dt, "a", "b")
#' # List columns classified as 'qe.unit'
#' unit_columns <- units(dt)
#' @export
units <- function(x, ...) {
  x <- as.qe.data(x);

  if(...length() > 0) {
    dot_args <- list(...);

    dot_args$x <- x;
    dot_args$v <- as.qe.unit;
    x <- do.call(reclassify, dot_args);
    return(x);
  }
  else {
    return(colnames(x)[sapply(x, is.qe.unit)]);
  }
}

#' Reclassify specified columns as horizon or list horizon columns in a data.table
#'
#' This function reclassifies specified columns of a data.table to the 'qe.horizon' format if column names are provided.
#' If no column names are provided, it returns the names of columns that are already classified as 'qe.horizon'.
#'
#' @param x A data.table. The data.table containing the columns to be reclassified or checked.
#' @param ... Additional arguments specifying the names of the columns to be reclassified.
#'
#' @return The modified data.table with specified columns reclassified as 'qe.horizon', or a character vector of column names already classified as 'qe.horizon'.
#' @examples
#' library(data.table)
#' dt <- data.table(a = 1:5, b = 6:10)
#' # Reclassify columns 'a' and 'b' as 'qe.horizon'
#' dt <- horizon(dt, "a", "b")
#' # List columns classified as 'qe.horizon'
#' horizon_columns <- horizon(dt)
#' @export
horizon <- function(x, ...) {
  x <- as.qe.data(x);

  if(...length() > 0) {
    dot_args <- list(...);

    dot_args$x <- x;
    dot_args$v <- as.qe.horizon;
    x <- do.call(reclassify, dot_args);

    return(x);
  }
  else {
    return(colnames(x)[sapply(x, is.qe.horizon)]);
  }
}

#' @export
'@.horizon' <- horizon

#' Reclassify specified columns in a data.table
#'
#' This function reclassifies specified columns of a data.table using a provided function.
#'
#' @param x A data.table. The data.table containing the columns to be reclassified.
#' @param ... Additional arguments specifying the names of the columns to be reclassified.
#' @param v A function. The function to apply to each specified column for reclassification.
#'
#' @return The modified data.table with specified columns reclassified.
#' @examples
#' library(data.table)
#' dt <- data.table(a = 1:5, b = 6:10)
#' dt <- reclassify(dt, as.qe.code, "a", "b")
#' @export
reclassify <- function(x, v, ...) {
  wh <- list(...);
  for (i in wh) {
    data.table::set(x, j = i, value = v(x[[i]]))
  }

  return(x);
}

##
#' @title Find conversations by unit
#'
#' @description Find rows of conversations by unit
#'
#' @details [TBD]
#'
#' @param set [TBD]
#' @param units [TBD]
#' @param units.by [TBD]
#' @param codes [TBD]
#' @param conversation.by [TBD]
#' @param window [TBD]
#' @param conversation.exclude [TBD]
#'
#' @examples
#' data(RS.data)
#'
#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
#'               'Client.and.Consultant.Requests','Design.Reasoning',
#'               'Collaboration');
#'
#' accum = ena.accumulate.data(
#'   units = RS.data[,c("Condition","UserName")],
#'   conversation = RS.data[,c("Condition","GroupName")],
#'   metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre",
#'                         "CONFIDENCE.Post","C.Change")],
#'   codes = RS.data[,codeNames],
#'   model = "EndPoint",
#'   window.size.back = 4
#' );
#' set = ena.make.set(
#'   enadata = accum,
#'   rotation.by = ena.rotate.by.mean,
#'   rotation.params = list(accum$meta.data$Condition=="FirstGame",
#'                          accum$meta.data$Condition=="SecondGame")
#' );
#' ena.conversations(set = RS.data,
#'   units = c("FirstGame.steven z"), units.by=c("Condition","UserName"),
#'   conversation.by = c("Condition","GroupName"),
#'   codes=codeNames, window = 4
#' )
#'
#' @export
#' @return list containing row indices representing conversations
##
ena.conversations = function(set, units, units.by=NULL, codes=NULL, conversation.by = NULL, window = 4, conversation.exclude = c()) {
  # rawData = data.table::copy(set$enadata$raw);
  if(is.null(units.by)) {
    if(!is(set, "ena.set")) {
      stop("If units.by is NULL, set must be an ena.set object")
    }
    units.by = set$`_function.params`$units.by;
  }
  # conversation.by = set$enadata$function.params$conversations.by;
  # window = set$enadata$function.params$window.size.back;
  # rawAcc = data.table::copy(set$enadata$accumulated.adjacency.vectors);
  if(is(set, "ena.set")) {
    rawAcc2 = set$model$raw.input
  } else {
    rawAcc2 = data.table::data.table(set) #$enadata$raw);
  }

  # rawAcc$KEYCOL = merge_columns_c(rawAcc, conversation.by)
  rawAcc2$KEYCOL = merge_columns_c(rawAcc2, conversation.by, sep = "::")

  # conversationsTable = rawAcc[, paste(.I, collapse = ","), by = c(conversation.by)]
  conversationsTable2 = rawAcc2[, paste(.I, collapse = ","), by = c(conversation.by)]

  # rows = sapply(conversationsTable$V1, function(x) as.numeric(unlist(strsplit(x, split=","))),USE.NAMES = T)
  rows2 = lapply(conversationsTable2$V1, function(x) as.numeric(unlist(strsplit(x, split=","))))
  # browser()
  # names(rows) = merge_columns_c(conversationsTable,conversation.by); #unique(rawAcc[,KEYCOL])
  names(rows2) = merge_columns_c(conversationsTable2,conversation.by, sep = "::"); #unique(rawAcc[,KEYCOL])

  # unitRows = merge_columns_c(rawAcc[,c(units.by),with=F], units.by)
  unitRows2 = merge_columns_c(rawAcc2[,c(units.by),with=F], units.by, sep = "::")

  # adjCol = set$enadata$adjacency.matrix[1,] %in%  codes[1] & set$enadata$adjacency.matrix[2,] %in% codes[2]
  # adjColName = paste("adjacency.code.", which(adjCol), sep = "")
  # codedUnitRows = which(unitRows %in% units & rawAcc[[adjColName]] == 1)

  codedRows = rawAcc2[, rowSums(.SD), .SDcols = codes] > 0
  codedUnitRows2 = which(unitRows2 %in% units & codedRows)
  codedUnitRows2 = codedUnitRows2[!(codedUnitRows2 %in% as.vector(unlist(rows2[conversation.exclude])))]
  # codedUnitRowConvs = rawAcc[codedUnitRows,KEYCOL];
  codedUnitRowConvs2 = rawAcc2[codedUnitRows2,KEYCOL];

  codedUnitRowConvsAll = NULL;
  codedUnitRowConvsAll2 = NULL;
  unitRowsNotCooccurred = c()
  if(length(codedUnitRows2) > 0) {
    codedUnitRowConvsAll = unique(unlist(sapply(X = 1:length(codedUnitRows2), simplify = F, FUN = function(x) {
      thisConvRows = rows2[[codedUnitRowConvs2[x]]]
      thisRowInConv = which(thisConvRows == codedUnitRows2[x])
      winUse = ifelse(is.infinite(window), thisRowInConv, window)
      thisRowAndWindow = rep(thisRowInConv,winUse) - (winUse-1):0
      coOccursFound = all(rawAcc2[thisConvRows[thisRowAndWindow[thisRowAndWindow > 0]], lapply(.SD, sum), .SDcols=codes] > 0)
      if(coOccursFound) {
        thisConvRows[thisRowAndWindow[thisRowAndWindow > 0]]
      } else {
        unitRowsNotCooccurred <<- c(unitRowsNotCooccurred, thisConvRows[thisRowInConv])
        # coOccursFound
        NULL
      }
    })))
  }
  return(list(
    conversations = as.list(rows2),
    unitConvs = unique(rawAcc2[codedUnitRows2,KEYCOL]),
    allRows = codedUnitRowConvsAll,
    unitRows = codedUnitRows2,
    toRemove = unitRowsNotCooccurred
  ));
}

# Pure-R replacements for functions that were previously compiled C++ exports
# (rENA/src/ena.cpp).  All math now lives in libqe; these wrappers preserve
# existing R-level function names so no call sites in R code need to change.
#
# Public API functions (exported) are marked @export.
# Internal functions (not exported) have no @export tag.

# ── public API ────────────────────────────────────────────────────────────────

#' Merge data frame columns
#'
#' Paste together multiple columns of a data frame or data.table with a
#' separator, used internally to construct unit-ID strings.
#'
#' @param df    A data.frame or data.table
#' @param cols  Character vector of column names to paste together
#' @param sep   Separator string (default "::")
#' @return A character vector of length \code{nrow(df)}
#' @export
merge_columns_c <- function(df, cols, sep = "::") {
  do.call(paste, c(lapply(cols, function(col) df[[col]]), list(sep = sep)))
}

#' Row-wise L2 (Sphere) Normalization
#'
#' Normalizes each row of a numeric data frame or matrix to unit L2 norm.
#'
#' @param dfM A data.frame or matrix
#' @return A numeric matrix with each row normalized to unit L2 length
#' @export
fun_sphere_norm <- function(dfM) {
  libqe::normalize_networks(as.matrix(dfM))
}

#' Row-wise Max-Norm Scaling
#'
#' Scales all rows of a numeric data frame by dividing by the largest row
#' L2 norm.
#'
#' @param dfM A data.frame or matrix
#' @return A numeric matrix scaled by the largest row L2 norm
#' @export
fun_skip_sphere_norm <- function(dfM) {
  libqe::scale_networks(as.matrix(dfM))
}

#' Upper Triangle from Vector (numeric)
#'
#' Compute pairwise products v[j] * v[i] for all j < i.
#'
#' @param v Numeric vector or single-row matrix
#' @return Numeric row vector of pairwise products
#' @export
vector_to_ut <- function(v) {
  libqe::code_connections(as.matrix(v))
}

#' Directed ENA node positions
#'
#' Least-squares node positions for directed ENA.
#'
#' @param line_weights Numeric matrix (units x connections)
#' @param points       Numeric matrix of rotated points (units x dims)
#' @param numDims      Number of dimensions
#' @return List with nodes, centroids, weights, points
#' @export
directed_node_positions <- function(line_weights, points, numDims) {
  libqe::directed_node_positions(line_weights, points, numDims)
}

#' Directed node positions with ground+response combined
#'
#' Directed node positions with paired ground+response rows combined.
#'
#' @param line_weights Numeric matrix (units x connections)
#' @param points       Numeric matrix of rotated points (units x dims)
#' @param numDims      Number of dimensions
#' @return List with nodes, centroids, weights, points
#' @export
directed_node_positions_with_ground_response_added <- function(line_weights,
                                                                points,
                                                                numDims) {
  libqe::directed_node_positions_combine_pairs(line_weights, points, numDims)
}

#' Calculate ENA correlations
#'
#' Pearson correlation with confidence interval between ENA points and
#' centroids.
#'
#' @param points     Numeric matrix (units x dims)
#' @param centroids  Numeric matrix (units x dims)
#' @param conf_level Confidence level (default 0.95)
#' @return Numeric matrix with columns: r, lower CI, upper CI
#' @export
ena_correlation <- function(points, centroids, conf_level = 0.95) {
  libqe::ena_correlation(points, centroids, conf_level)
}

#' Confidence intervals around group mean positions
#'
#' Per-dimension t-based confidence intervals around the column means of a
#' numeric matrix of ENA points.
#'
#' @param points     Numeric matrix (units x dims)
#' @param conf_level Confidence level (default 0.95)
#' @return Numeric matrix (dims x 3): mean, lower CI, upper CI
#' @export
ena_mean_ci <- function(points, conf_level = 0.95) {
  libqe::mean_ci(as.matrix(points), conf_level)
}

#' Outlier (Tukey-fence) intervals for group positions
#'
#' Per-dimension Tukey-fence intervals: Q1 - k*IQR to Q3 + k*IQR.
#'
#' @param points     Numeric matrix (units x dims)
#' @param iqr_factor IQR multiplier (default 1.5)
#' @return Numeric matrix (dims x 2): lower fence, upper fence
#' @export
ena_outlier_ci <- function(points, iqr_factor = 1.5) {
  libqe::outlier_ci(as.matrix(points), iqr_factor)
}

#' Two-group comparison statistics for ENA points
#'
#' Per-dimension parametric (Welch t-test, Cohen's d) and non-parametric
#' (Wilcoxon rank-sum, rank-biserial r) statistics comparing two groups.
#'
#' @param g1 Numeric matrix of group 1 points (units x dims)
#' @param g2 Numeric matrix of group 2 points (units x dims)
#' @return List with: n1, n2, t, df, pvalue_t, cohens_d, means, sds,
#'   U, pvalue_u, effect_r, medians — each a vector/matrix of length dims
#' @export
ena_group_stats <- function(g1, g2) {
  libqe::group_stats(as.matrix(g1), as.matrix(g2))
}

# ── internal (not exported) ───────────────────────────────────────────────────

# Per-row upper-triangle co-occurrence.
# @param df     A data.frame or matrix of code columns
# @param binary If TRUE, binarise non-zero products
rows_to_co_occurrences <- function(df, binary = TRUE) {
  libqe::row_connections(as.matrix(df), binary)
}

# Stanza-window co-occurrence accumulation.
# @param df            A data.frame or matrix of code columns
# @param windowSize    Rows to look back (default 1; Inf = whole conversation)
# @param windowForward Rows to look forward (default 0)
# @param binary        Binarise co-occurrence counts (default TRUE)
ref_window_df <- function(df, windowSize = 1, windowForward = 0,
                           binary = TRUE) {
  INT_MAX <- .Machine$integer.max
  wb <- if (is.infinite(windowSize)    || windowSize    >= INT_MAX) INT_MAX
        else as.integer(windowSize)
  wf <- if (is.infinite(windowForward) || windowForward >= INT_MAX) INT_MAX
        else as.integer(windowForward)
  data.table::as.data.table(libqe::accumulate_stanza(as.matrix(df), wb, wf, binary))
}

# Rolling backward window sum of code columns.
# @param df         A data.frame or matrix of code columns
# @param windowSize Number of rows to look back (default 0, treated as 1)
# @param binary     Unused; kept for API compatibility
ref_window_lag <- function(df, windowSize = 0, binary = TRUE) {
  libqe::rolling_window_sum(as.matrix(df), windowSize)
}

# Upper-triangle index pairs (0-based, +1 before use as R indices).
# @param len Side length of square code matrix
# @param row -1 = both rows, 0 = row indices, 1 = col indices
triIndices <- function(len, row = -1L) {
  libqe::connection_indices(len, row)
}

# Least-squares node positions (undirected ENA).
# @param adjMats  Numeric matrix of line weights (units x connections)
# @param t        Numeric matrix of rotated points (units x dims)
# @param numDims  Number of dimensions
lws_lsq_positions <- function(adjMats, t, numDims) {
  libqe::node_positions(adjMats, t, numDims)
}

# String upper-triangle pairs: "A" "B" "C" -> "A & B" "A & C" "B & C".
# @param v Character vector of code names
svector_to_ut <- function(v) {
  libqe::connection_names(v)
}

# Center data by subtracting column means.
# @param values Numeric matrix or data.frame
center_data_c <- function(values) {
  libqe::center_points(as.matrix(values))
}

##
#' @title Compute summary statistic for groupings of units using given method (typically, mean)
#'
#' @description Computes summary statistics for groupings (given as vector) of units in ena data using given method (typically, mean); computes summary statistic for point locations and edge weights for each grouping
#'
#' @export
#'
#' @param enaset An \code{\link{ENAset}} or a vector of values to group.
#' @param by A vector of values the same length as units. Uses rotated points for group positions and normed data to get the group edge weights
#' @param method A function that is used on grouped points. Default: mean().  If `enaset` is an ENAset, enaset$points.rotated will be groups using `mean` regardless of `method` provided
#' @param names A vector of names to use for the results. Default: unique(by)
#'
#' @examples
#' data(RS.data)
#'
#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
#'   'Client.and.Consultant.Requests','Design.Reasoning','Collaboration');
#'
#' accum = ena.accumulate.data(
#'   units = RS.data[,c("UserName","Condition")],
#'   conversation = RS.data[,c("Condition","GroupName")],
#'   metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre","CONFIDENCE.Post")],
#'   codes = RS.data[,codeNames],
#'   window.size.back = 4
#' )
#'
#' set = ena.make.set(
#'   enadata = accum
#' )
#'
#' means = ena.group(set, "Condition")
#'
#'
#' @return A list containing names, points, and edge weights for each of the unique groups formed by the function
##
ena.group <- function(
  enaset = NULL,
  by = NULL,
  method = mean,
  names = as.vector(unique(by))
) {
  run.method = function(pts, m = method) {
    to_matrix <- class(pts)[1];
    points.dt = pts;

    if(is.logical(by)) {
      points.dt.means = points.dt[by, { lapply(.SD, m) }, .SDcols = find_dimension_cols(points.dt) | find_code_cols(points.dt)];
      if(length(names) == 1) {
        points.dt.means[['ENA_GROUP_NAME']] <- as.ena.metadata(names)
      }
    }
    else if(all(by %in% colnames(pts))) {
      points.dt.means <- points.dt[,
                          {lapply(.SD, function(x) {
                            get(paste0("as.", class(x)[1]))(m(x))
                          })},
                          by = by,
                          .SDcols = find_dimension_cols(points.dt) | find_code_cols(points.dt)
                        ];
      points.dt.means[, ENA_GROUP_NAME := do.call(paste, c(.SD, sep = ".")) , .SDcols = c(by)]
      points.dt.means <- points.dt.means[, !find_meta_cols(points.dt.means), with = F]
      set(points.dt.means, j = "ENA_GROUP_NAME", value = as.ena.metadata(points.dt.means[["ENA_GROUP_NAME"]]))
    }
    else {
      to_what <- get(paste0("as.", class(pts[[which(!find_meta_cols(pts))[1]]])[1]))
      to_cols <- names(which(!find_meta_cols(pts)))

      points.dt.means = as.data.frame(aggregate(as.matrix(points.dt), by = list(by), FUN = m)) #"mean"))
      set(points.dt.means, j = "Group.1", value = as.ena.metadata(points.dt.means$Group.1))
      colnames(points.dt.means)[colnames(points.dt.means) == "Group.1"] <- "ENA_GROUP_NAME"
      set(x = points.dt.means, j = to_cols, value = lapply(points.dt.means[, to_cols], to_what))
      points.dt.means <- as.data.table(points.dt.means)

      # agg.df[as.vector(unique(group.by)),]u
      # return (points.dt.means[as.vector(unique(by)),]);
      return(as.ena.matrix(points.dt.means[which(points.dt.means$ENA_GROUP_NAME %in% unique(by)),], to_matrix))
    }

    return(as.ena.matrix(points.dt.means, to_matrix));
  }

  if(is.character(method)) {
    method = get(method)
  }

  if(is(enaset, "ENAset")) {
    enaset <- ena.set(enaset);
  }

  if (is(enaset, "ena.set")) {
    pts <- run.method(enaset$points)
    return(list(
      "names" = pts$ENA_GROUP_NAME,
      "points" = pts,
      "line.weights" = run.method(enaset$line.weights)
    ));
  }
  else {
    return(run.method(enaset))
  }
}

#' Re-class matrix as ena.matrix
#'
#' @param x data.frame, data.table, or matrix to extend
#' @param new.class Additional class to extend the matrix with, default: NULL
#'
#' @return Object of same st
#' @export
as.ena.matrix <- function(x, new.class = NULL) {
  class(x) = c(new.class, "ena.matrix", class(x))
  x
}

#' Re-class matrix as ena.metadata
#'
#' @param x data.frame, data.table, or matrix to extend
#'
#' @return Object of same st
#' @export
as.ena.metadata <- function(x) {
  if(is.factor(x)) {
    x = as.character(x)
  }
  class(x) = c("ena.metadata", "character") # This fails in the $.ena.metadata if is extending character, class(x))
  x
}
as.ena.code <- function(x) {
  if(is.factor(x)) {
    x = as.character(x)
  }
  class(x) = c("ena.code", class(x))
  x
}
as.ena.codes <- function(x) {
  if(is.factor(x)) {
    x = as.character(x)
  }
  class(x) = c("ena.codes", class(x))
  x
}
#' Re-class vector as ena.co.occurrence
#'
#' @param x Vector to re-class
#'
#' @return re-classed vector
#' @export
as.ena.co.occurrence <- function(x) {
  if(is.factor(x)) {
    x = as.character(x)
  }
  class(x) = c("ena.co.occurrence", class(x))
  x
}
#' Re-class vector as ena.dimension
#'
#' @param x Vector to re-class
#'
#' @return re-classed vector
#' @export
as.ena.dimension <- function(x) {
  if(is.factor(x)) {
    x = as.character(x)
  }
  class(x) = c("ena.dimension", class(x))
  x
}

#####
#' @title Wrapper to generate, and optionally plot, an ENA model
#'
#' @description Convenience entry point for constructing an ENA model from a
#' coded data frame. Handles accumulation, dimensional reduction, and optional
#' plot generation in a single call, returning an \code{ena.set} object that
#' contains unit positions, network weights, node positions, and plots.
#'
#' @details
#' \code{ena()} runs three phases internally:
#'
#' \strong{1. Accumulation} — co-occurrence counts are computed for each unit
#' across stanza windows defined by \code{window}, \code{window.size.back}, and
#' \code{window.size.forward}.
#'
#' \strong{2. Dimensional reduction} — accumulated vectors are normed, centered,
#' and rotated into a low-dimensional ENA space. When \code{groupVar} and two
#' \code{groups} are supplied the rotation maximises separation between the group
#' means (means rotation); otherwise SVD is used.
#'
#' \strong{3. Plotting} — plots are built and stored on the returned set in
#' \code{set$plots}. Pass \code{include.plots = FALSE} to skip this phase
#' entirely, which is useful for programmatic use (simulations, parameter
#' sweeps) where plot objects are not needed.
#'
#' \strong{Plot defaults:} \code{network = TRUE} but \code{points = FALSE} and
#' \code{mean = FALSE}. For a two-group comparison you almost always want
#' \code{mean = TRUE} as well, to show group centroids and confidence intervals
#' alongside the network.
#'
#' \strong{Accessing results:} the returned \code{ena.set} object contains:
#' \describe{
#'   \item{\code{$points}}{unit positions in the rotated ENA space (rows = units)}
#'   \item{\code{$line.weights}}{normed co-occurrence weights per unit (rows = units, cols = code pairs)}
#'   \item{\code{$node.positions}}{positions of each code node in the ENA space}
#'   \item{\code{$plots}}{named list of \code{ENAplot} objects; two-group models
#'     produce three plots keyed by \code{group1}, \code{group2}, and
#'     \code{"group1-group2"}}
#'   \item{\code{$tests}}{list of Wilcoxon and t-test results on dimensions 1
#'     and 2, populated when \code{runTest = TRUE}}
#'   \item{\code{$variance}}{proportion of variance explained by each dimension}
#' }
#'
#' @param data data.frame containing metadata and coded columns
#' @param codes vector, numeric or character, of column names or indices containing the codes to model
#' @param units vector, numeric or character, of column names that together uniquely identify each unit of analysis
#' @param conversation vector, numeric or character, of column names used to segment the data into conversations (stanza boundaries reset at each new conversation)
#' @param metadata vector, numeric or character, of column names to carry through as unit-level metadata (default: NULL)
#' @param model character, the ENA model to construct: \code{EndPoint} (default) produces a single adjacency vector per unit summing co-occurrences across all lines; \code{AccumulatedTrajectory} produces one adjacency vector per unit per conversation, where each successive conversation accumulates prior ones; \code{SeparateTrajectory} produces one adjacency vector per unit per conversation, each modeled independently
#' @param weight.by how to weight co-occurrences: \code{"binary"} (default) counts each co-occurrence once per stanza window; supply a function (e.g. \code{sum}) to use raw counts
#' @param window stanza window type: \code{"MovingStanzaWindow"} (default) or \code{"Conversation"} (all lines in a conversation form one window)
#' @param window.size.back integer, number of lines back from each line to include in the stanza window (default: 1)
#' @param window.size.forward integer, number of lines forward from each line to include in the stanza window (default: 0). Set to model bidirectional co-occurrence within a window.
#' @param include.meta logical, if TRUE (default) unit metadata is attached to the resulting ENAdata object and accessible via the set; set to FALSE to omit metadata from the model output
#' @param groupVar character, name of the column containing group labels. When supplied with two \code{groups}, the model uses a means rotation that maximises variance between group means.
#' @param groups vector, character, of exactly the group values from \code{groupVar} to use for means rotation, plotting, and statistical tests. If omitted, the first two unique values of \code{groupVar} are used with a warning.
#' @param runTest logical, if TRUE runs a Wilcoxon rank-sum test and a Student's t-test comparing the two groups on dimensions 1 and 2; results stored in \code{set$tests} (default: FALSE)
#' @param points logical, TRUE will plot individual unit points (default: FALSE)
#' @param mean logical, TRUE will plot group mean positions with confidence intervals — recommended whenever \code{groupVar} is supplied (default: FALSE)
#' @param network logical, TRUE will plot mean networks (default: TRUE)
#' @param networkMultiplier numeric, scaling factor applied to edge weights in non-subtracted network plots (default: 1)
#' @param subtractionMultiplier numeric, scaling factor applied to edge weights in the subtracted network plot (default: 1)
#' @param unit character, name of a single unit to plot in isolation; when supplied, all group plotting is skipped
#' @param colors vector, character, of colors for groups or points. For two-group models, supply two values (group1, group2); for single-group or no-group models, supply one value. Defaults to "blue"/"red" for two groups and "black" otherwise.
#' @param confidence.interval character, style of confidence interval shown on mean points: "box" (default), "crosshairs", or "none"
#' @param include.plots logical, if TRUE (default) generates and attaches plot objects to the returned set; set to FALSE to skip all plotting for faster programmatic use
#' @param print.plots logical, if TRUE renders plots in the Viewer as they are created (default: FALSE)
#' @param ... Additional parameters passed to set creation and plotting functions, including \code{mask} (an optional binary matrix of size ncol(codes) x ncol(codes) where 0 suppresses co-occurrence modeling between a pair of codes; see \code{\link{ena.accumulate.data}})
#'
#' @examples
#' data(RS.data)
#'
#' codes = c('Data',
#'           'Technical.Constraints',
#'           'Performance.Parameters',
#'           'Client.and.Consultant.Requests',
#'           'Design.Reasoning',
#'           'Collaboration')
#'
#' # Minimal call: fit a model with no group comparison
#' rs = ena(
#'   data = RS.data,
#'   units = c("UserName", "Condition", "GroupName"),
#'   conversation = c("Condition", "GroupName"),
#'   codes = codes,
#'   window.size.back = 4
#' )
#'
#' # Two-group comparison with means rotation, centroids, and statistical tests
#' rs = ena(
#'   data = RS.data,
#'   units = c("UserName", "Condition", "GroupName"),
#'   conversation = c("Condition", "GroupName"),
#'   codes = codes,
#'   window.size.back = 4,
#'   groupVar = "Condition",
#'   groups = c("FirstGame", "SecondGame"),
#'   mean = TRUE,
#'   runTest = TRUE,
#'   print.plots = FALSE
#' )
#'
#' # Model fitting only, no plots (faster for programmatic use)
#' rs = ena(
#'   data = RS.data,
#'   units = c("UserName", "Condition", "GroupName"),
#'   conversation = c("Condition", "GroupName"),
#'   codes = codes,
#'   window.size.back = 4,
#'   include.plots = FALSE
#' )
#'
#' @return An \code{ena.set} object. See the Details section for a description
#'   of the key fields (\code{$points}, \code{$line.weights}, \code{$plots},
#'   \code{$tests}, etc.).
#' @export
#####
ena <- function(
  data,
  codes,
  units,
  conversation,
  metadata = NULL,
  model = c("EndPoint", "AccumulatedTrajectory", "SeparateTrajectory"),
  weight.by = "binary",
  window = c("MovingStanzaWindow", "Conversation"),
  window.size.back = 1,
  window.size.forward = 0,
  include.meta = TRUE,
  groupVar = NULL,
  groups = NULL,
  runTest = FALSE,
  points = FALSE,
  mean = FALSE,
  network = TRUE,
  networkMultiplier = 1,
  subtractionMultiplier = 1,
  unit = NULL,
  colors = NULL,
  confidence.interval = "box",
  include.plots = T,
  print.plots = F,
  ...
) {
  set <- ena.set.creator(
    data = data,
    codes = codes,
    units = units,
    conversation = conversation,
    metadata = metadata,
    model = model,
    weight.by = weight.by,
    window = window,
    window.size.back = window.size.back,
    window.size.forward = window.size.forward,
    include.meta = include.meta,
    groupVar = groupVar,
    groups = groups,
    runTest = runTest,
    ...
  )

  if (include.plots) {
    set <- ena.plotter(
      set = set,
      groupVar = groupVar,
      groups = groups,
      points = points,
      mean = mean,
      network = network,
      networkMultiplier = networkMultiplier,
      subtractionMultiplier = subtractionMultiplier,
      unit = unit,
      colors = colors,
      confidence.interval = confidence.interval,
      print.plots = print.plots,
      ...
    )
  }

  return(set)
}

##
# @title Accumulate Data from csv
#
# @description This function accumulates rows of data.
#
# @details [TBD]
#
#@export
#
# @param file The csv file location or data.frame for the function
# @param units.used Delimits columns based on the units (which specific units to use)
# @param units.by unit columns to accumulate by
# @param conversations.by Columns used in the conversation
# @param codes Columns used based on codes
# @param window.size.back Number of lines back to include window in stanza
# @param window.size.forward Number of lines forward in stanza window
# @param binary [TBD]
# @param model [TBD]
# @param window [TBD]
# @param weight.by [TBD]
# @param binary.stanzas [TBD]
# @param mask [TBD]
# @param ... additional parameters addressed in inner function
#
#
# @seealso \code{\link{ena.make.set}}
#
# @examples
# \dontrun{
# codeNames = c(
#   "E.data","S.data","E.design","S.design","S.professional","E.client",
#   "V.client","E.consultant","V.consultant","S.collaboration","I.engineer",
#   "I.intern","K.actuator","K.rom","K.materials","K.power"
# )
#
# df.file <- system.file("extdata", "rs.data.csv", package="rENA")
#
# # Given a csv file location
# ena.accumulate.data(
#   df.file, units.by = c("UserName","Condition"),
#   conversations.by = c("ActivityNumber","GroupName"),
#   codes = codeNames
# )
# }
# @return \code{\link{ENAdata}} class object with accumulated data
#
##
ena.accumulate.data.file <- function(
  file,
  units.used = NULL,
  conversations.used = NULL,
  units.by,
  conversations.by,
  codes = NULL,
  model = c("EndPoint",
            "AccumulatedTrajectory",
            "SeparateTrajectory"),
  window = c("Moving Stanza", "Conversation"),
  window.size.back = 1,
  window.size.forward = 0,
  weight.by = "binary",
  binary.stanzas = F,
  mask = NULL,
  include.meta = T,
  as.list = T,
  ...
) {
  if(is.null(file) ||
     is.null(units.by) ||
     is.null(conversations.by) || is.null(codes)
  ) {
    stop("Accumulation: file, units.by, conversations.by, and codes")
  }

  units <- NULL;
  model <- match.arg(model);
  window <- match.arg(window);

  if (identical(window, "Conversation")) {
    conversations.by = c(conversations.by, units.by);
    window.size.back = window;
  }
  data = ENAdata$new(
    file = file,
    units = units,
    units.used = units.used,
    units.by = units.by,
    conversations.by = conversations.by,
    codes = codes,
    window.size.back = window.size.back,
    window.size.forward = window.size.forward,
    weight.by = weight.by,
    model = model,
    mask = mask,
    include.meta = include.meta,
    ...
  );
  data$process();

  data$function.call = sys.call();
  # output = match.arg(output);
  # if(output == "json") {
  #   output.class = get(class(data))
  #
  #   if(is.null(output.fields)) {
  #     output.fields = names(output.class$public_fields)
  #   }
  #
  #   r6.to.json(data, o.class = output.class, o.fields = output.fields)
  # }
  #else

  if(as.list) {
    data = ena.set(data);
  } else {
    warning("Usage of R6 data objects is deprecated and may be removed entirely in a future version. Consider upgrading to the new data object.")
  }
  data
}

CLASS_NAMES <- list(
  data = "qe.data",
  meta = "qe.metadata",
  code = "qe.code",
  unit = "qe.unit",
  horizon = "qe.horizon"
)

WARNINGS <- list(
  data_from_vector = "Cannot transform vectors to `qe.data`",
  null_metadata = "`metadata` must be supplied as a vector of column names. No metadata classified.",
  null_codes = "`codes` must be supplied as a vector of column names. No codes classified.",
  null_units = "`units` must be supplied as a vector of column names. No units classified.",
  null_horizon = "`horizon` must be supplied as a vector of column names. No horizon classified."
)

#' Convert an object to 'qe.data' class
#'
#' This function converts an object to the 'qe.data' class. If the object is not a data.frame or matrix, it is first converted to a data.table.
#'
#' @param x An object. The object to be converted to 'qe.data' class.
#'
#' @return The modified object with the 'qe.data' class.
#' @examples
#' library(data.table)
#'
#' dt <- data.table(
#'   ID = 1:5,
#'   Name = c("Alice", "Bob", "Charlie", "David", "Eve"),
#'   Age = c(25, 30, 35, 40, 45),
#'   Score = c(85, 90, 95, 80, 75)
#' )
#' dt <- as.qe.data(dt);
#' class(dt) # Should show 'qe.data' along with other classes
#'
#' @export
as.qe.data <- function(x) {
  if(!is.qe.data(x)) {
    if(is.vector(x)) {
      warning(WARNINGS$data_from_vector);
    }
    else {
      if(
         is.matrix(x) ||
        (is.data.frame(x) && !data.table::is.data.table(x))
      ) {
        x <- data.table::as.data.table(x);
      }
      class(x) <- c(CLASS_NAMES$data, class(x));
    }
  }

  # return(data.table::copy(x));
  return(x);
}

#' Convert a vector to 'qe.code' class
#'
#' This function converts a vector to the 'qe.code' class. If the vector is a factor, it is first converted to a character vector.
#'
#' @param x A vector. The vector to be converted to 'qe.code' class.
#'
#' @return The modified vector with the 'qe.code' class.
#' @examples
#' vec <- factor(c("A", "B", "C"))
#' vec <- as.qe.code(vec)
#' class(vec) # Should show 'qe.code' along with other classes
#' @export
as.qe.code <- function(x) {
  if(is.qe.code(x)) return(x);

  if(is.factor(x)) {
    x <- as.character(x);
  }
  class(x) <- c(CLASS_NAMES$code, class(x));

  return(x);
}

#' Convert a vector to 'qe.metadata' class
#'
#' This function converts a vector to the 'qe.metadata' class. If the vector is a factor, it is first converted to a character vector.
#'
#' @param x A vector. The vector to be converted to 'qe.metadata' class.
#'
#' @return The modified vector with the 'qe.metadata' class.
#' @examples
#' vec <- factor(c("A", "B", "C"))
#' vec <- as.qe.metadata(vec)
#' class(vec) # Should show 'qe.metadata' along with other classes
#' @export
as.qe.metadata <- function(x) {
  if(is.qe.metadata(x)) return(x);

  if(is.factor(x)) {
    x <- as.character(x);
  }
  class(x) <- c(CLASS_NAMES$meta, class(x));

  return(x);
}

#' Convert a vector to 'qe.unit' class
#'
#' This function converts a vector to the 'qe.unit' class. If the vector is a factor, it is first converted to a character vector.
#'
#' @param x A vector. The vector to be converted to 'qe.unit' class.
#'
#' @return The modified vector with the 'qe.unit' class.
#' @examples
#' vec <- factor(c("A", "B", "C"))
#' vec <- as.qe.unit(vec)
#' class(vec) # Should show 'qe.unit' along with other classes
#' @export
as.qe.unit <- function(x) {
  if(is.qe.unit(x)) return(x);

  if(is.factor(x)) {
    x <- as.character(x);
  }
  class(x) <- c(CLASS_NAMES$unit, class(x));

  return(x);
}

#' Convert a vector to 'qe.horizon' class
#'
#' This function converts a vector to the 'qe.horizon' class. If the vector is a factor, it is first converted to a character vector.
#'
#' @param x A vector. The vector to be converted to 'qe.horizon' class.
#'
#' @return The modified vector with the 'qe.horizon' class.
#' @examples
#' vec <- factor(c("A", "B", "C"))
#' vec <- as.qe.horizon(vec)
#' class(vec) # Should show 'qe.horizon' along with other classes
#' @export
as.qe.horizon <- function(x) {
  if(is.qe.horizon(x)) return(x);

  if(is.factor(x)) {
    x <- as.character(x);
  }
  class(x) <- c(CLASS_NAMES$horizon, class(x));

  return(x);
}

#' Check if an object is of class 'qe.data'
#'
#' This function checks if an object is of class 'qe.data'.
#'
#' @param x An object. The object to be checked.
#'
#' @return A logical value. TRUE if the object is of class 'qe.data', otherwise FALSE.
#' @examples
#' library(data.table)
#'
#' dt <- data.table(ID = 1:5)
#' class(dt) <- c("qe.data", class(dt))
#' is.qe.data(dt) # Should return TRUE
#' @export
is.qe.data <- function(x) {
  return(CLASS_NAMES$data %in% class(x));
}

#' Check if an object is of class 'qe.code'
#'
#' This function checks if an object is of class 'qe.code'.
#'
#' @param x An object. The object to be checked.
#'
#' @return A logical value. TRUE if the object is of class 'qe.code', otherwise FALSE.
#' @examples
#' dt <- 1:5
#' class(dt) <- c("qe.code", class(dt))
#' is.qe.code(dt) # Should return TRUE
#' @export
is.qe.code <- function(x) {
  return(CLASS_NAMES$code %in% class(x));
}

#' Check if an object is of class 'qe.metadata'
#'
#' This function checks if an object is of class 'qe.metadata'.
#'
#' @param x An object. The object to be checked.
#'
#' @return A logical value. TRUE if the object is of class 'qe.metadata', otherwise FALSE.
#' @examples
#' dt <- 1:5
#' class(dt) <- c("qe.metadata", class(dt))
#' is.qe.metadata(dt) # Should return TRUE
#' @export
is.qe.metadata <- function(x) {
  return(CLASS_NAMES$meta %in% class(x));
}


#' Check if an object is of class 'qe.unit'
#'
#' This function checks if an object is of class 'qe.unit'.
#'
#' @param x An object. The object to be checked.
#'
#' @return A logical value. TRUE if the object is of class 'qe.unit', otherwise FALSE.
#' @examples
#' dt <- 1:5
#' class(dt) <- c("qe.unit", class(dt))
#' is.qe.unit(dt) # Should return TRUE
#' @export
is.qe.unit <- function(x) {
  return(CLASS_NAMES$unit %in% class(x));
}

#' Check if an object is of class 'qe.horizon'
#'
#' This function checks if an object is of class 'qe.horizon'.
#'
#' @param x An object. The object to be checked.
#'
#' @return A logical value. TRUE if the object is of class 'qe.horizon', otherwise FALSE.
#' @examples
#' dt <- 1:5
#' class(dt) <- c("qe.horizon", class(dt))
#' is.qe.horizon(dt) # Should return TRUE
#' @export
is.qe.horizon <- function(x) {
  return(CLASS_NAMES$horizon %in% class(x));
}

# Ellipsoidal scaling version
lws.positions.sq <- function(enaset) {
  points = as.matrix(enaset$points)
  weights = as.matrix(enaset$line.weights)
  positions = lws_lsq_positions(weights, points, ncol(points));

  node.positions = positions$nodes;
  rownames(node.positions) = enaset$rotation$codes
  colnames(node.positions) = colnames(points)
  
  return(list("node.positions" = node.positions, "centroids" = positions$centroids))
}

lws.positions.sq.R6 <- function(enaset) {
  if( enaset$function.params$center.align.to.origin ) {
    non_zero_rows <- rowSums(as.matrix(enaset$line.weights)) != 0
    positions = lws_lsq_positions(enaset$line.weights[non_zero_rows,], enaset$points.rotated[non_zero_rows,], ncol(enaset$points.rotated));
    mean_centroids = colMeans(positions$centroids);
    centroids = enaset$points.rotated;

    non_zero_row_centroids = rowSums(as.matrix(centroids))!=0;
    centroids[non_zero_row_centroids,] = t(t(positions$centroids) - mean_centroids)
    positions$centroids = centroids;
    positions$nodes = t(t(positions$nodes)-mean_centroids)
  }
  else {
    positions = lws_lsq_positions(enaset$line.weights, enaset$points.rotated, ncol(enaset$points.rotated));
  }

  node.positions = positions$nodes;
  rownames(node.positions) = enaset$enadata$codes;

  return(list("node.positions" = node.positions, "centroids" = positions$centroids))

}

### plot subtraction ###

ena.plot.subtraction = function(
  set,
  groupVar = NULL,
  group1 = NULL,
  group2 = NULL,
  points = FALSE,
  mean = FALSE,
  network = TRUE,
  networkMultiplier = 1,
  subtractionMultiplier = 1,
  group1.color = "blue",
  group2.color = "red",
  confidence.interval = "box",
  ...
) {
  group1.rows = set$points[[groupVar]] == group1
  group2.rows = set$points[[groupVar]] == group2

  g1.plot = ena.plot(enaset = set, title = group1)
  g2.plot = ena.plot(enaset = set, title = group2)
  sub.plot = ena.plot(enaset = set, title = paste0("Network Subtraction -- ",group1," vs ",group2))

  if(network == TRUE) {
    g1.lw = as.matrix(set$line.weights)[group1.rows,,drop=FALSE]
    g1.mean.lw = colMeans(g1.lw) * networkMultiplier

    g2.lw = as.matrix(set$line.weights)[group2.rows,,drop=FALSE]
    g2.mean.lw = colMeans(g2.lw) * networkMultiplier

    sub = (g1.mean.lw - g2.mean.lw) * subtractionMultiplier

    g1.plot = ena.plot.network(g1.plot, network = g1.mean.lw, colors = group1.color)
    g2.plot = ena.plot.network(g2.plot, network = g2.mean.lw, colors = group2.color)
    sub.plot = ena.plot.network(sub.plot, network = sub)
  }

  if(points == TRUE) {
    g1.points.for.plot = as.matrix(set$points)[group1.rows,,drop=FALSE]
    g2.points.for.plot = as.matrix(set$points)[group2.rows,,drop=FALSE]

    g1.plot = ena.plot.points(enaplot = g1.plot, points = g1.points.for.plot, colors = group1.color)
    g2.plot = ena.plot.points(enaplot = g2.plot, points = g2.points.for.plot, colors = group2.color)
    sub.plot = ena.plot.points(enaplot = sub.plot, points = g1.points.for.plot, colors = group1.color)
    sub.plot = ena.plot.points(enaplot = sub.plot, points = g2.points.for.plot, colors = group2.color)
  }

  if(mean == TRUE) {
    g1.points.for.plot = as.matrix(set$points)[group1.rows,,drop=FALSE]
    g2.points.for.plot = as.matrix(set$points)[group2.rows,,drop=FALSE]

    g1.plot = ena.plot.group(g1.plot, g1.points.for.plot, colors = group1.color, labels = group1, confidence.interval = confidence.interval)
    g2.plot = ena.plot.group(g2.plot, g2.points.for.plot, colors = group2.color, labels = group2, confidence.interval = confidence.interval)
    sub.plot = ena.plot.group(sub.plot, g1.points.for.plot, colors = group1.color, labels = group1, confidence.interval = confidence.interval)
    sub.plot = ena.plot.group(sub.plot, g2.points.for.plot, colors = group2.color, labels = group2, confidence.interval = confidence.interval)
  }

  else if(TRUE %in% c(network,points, mean) == FALSE) {
    stop("You must set at least one of points, mean, or network to TRUE to obtain a plot.")
  }

  set$plots[[group1]] = g1.plot
  set$plots[[group2]] = g2.plot
  set$plots[[paste0(group1,"-",group2)]] = sub.plot

  return(set)
}

###
#' @title ENA Rotate by mean
#'
#' @description Computes a dimensional reduction from a matrix of points such
#'   that the first dimension of the projected space passes through the means of
#'   two groups in the original space. Subsequent dimensions are computed using
#'   SVD on the deflated data. Delegates to \code{\link[libqe]{means_rotation}}.
#'
#' @param enaset An \code{\link{ENAset}} or compatible list with
#'   \code{model$points.for.projection}, \code{connection.counts$ENA_UNIT},
#'   \code{line.weights}, and \code{rotation$codes}.
#' @param groups A list containing one or more pairs; each pair is a length-2
#'   list \code{list(a, b)} where \code{a} and \code{b} are either logical
#'   vectors (length = number of units) or character vectors of unit IDs.
#' @param params Alias for \code{groups}; used when called from the pipe API.
#'
#' @importFrom libqe means_rotation
#' @export
#' @return A list with \code{rotation}, \code{codes}, \code{eigenvalues}, and
#'   \code{node.positions = NULL}, suitable for use inside \code{rotate()}.
###
ena.rotate.by.mean <- function(enaset, groups = NULL, params = groups) {
  if (is.null(groups) && !is.null(params)) {
    groups <- params
  } else {
    groups <- list(groups)[[1]]
    if (length(groups) < 1) stop("Unable to rotate without 2 groups.")
  }
  if (!is(groups[[1]], "list")) groups <- list(groups)

  # Extract the data matrix (as.matrix strips metadata columns for ena.matrix)
  data <- if (!is.null(enaset$points.normed.centered)) {
    as.matrix(enaset$points.normed.centered)
  } else {
    as.matrix(enaset$model$points.for.projection)
  }

  # Convert groups (logical or character) to 0-based integer index pairs
  # required by libqe::means_rotation
  ena_unit <- enaset$connection.counts$ENA_UNIT
  group_pairs <- lapply(groups, function(pair) {
    a <- pair[[1]]
    b <- pair[[2]]
    if (!is.logical(a)) a <- ena_unit %in% a
    if (!is.logical(b)) b <- ena_unit %in% b
    list(as.integer(which(a) - 1L), as.integer(which(b) - 1L))
  })

  result <- libqe::means_rotation(data, group_pairs)

  rotation <- result$rotation
  colnames(rotation) <- result$column_names
  rownames(rotation) <- colnames(as.matrix(enaset$line.weights))

  list(
    node.positions = NULL,
    rotation       = rotation,
    codes          = enaset$rotation$codes,
    eigenvalues    = result$eigenvalues
  )
}

##
#' @title Plot of ENA set groups
#'
#' @description Plot a point based on a summary statistic computed from a given method (typically, mean) for a set of points in a projected ENA space
#'
#' @details Plots a point based on a summary statistic for a group (typically, mean)
#'
#' @export
#'
#' @param enaplot \code{\link{ENAplot}} object to use for plotting
#' @param points A matrix or data.frame where columns contain coordinates of points in a projected ENA space
#' @param method A function for computing a summary statistic for each column of points
#' @param labels A character which will be the label for the group's point
#' @param colors A character, determines color of the group's point, default: enaplot$color
#' @param shape A character, determines shape of the group's point, choices:  square, triangle, diamond, circle, default: square
#' @param confidence.interval A character that determines how the confidence interval is displayed, choices: none, box, crosshair, default: none
#' @param outlier.interval A character that determines how outlier interval is displayed, choices: none, box, crosshair, default: none
#' @param label.offset character: top left (default), top center, top right, middle left, middle center, middle right, bottom left, bottom center, bottom right
#' @param label.font.size An integer which determines the font size for label, default: enaplot$font.size
#' @param label.font.color A character which determines the color of label, default: enaplot$font.color
#' @param label.font.family A character which determines font type, choices: Arial, Courier New, Times New Roman, default: enaplot$font.family
#' @param show.legend Logical indicating whether to show the point labels in the in legend
#' @param legend.name Character indicating the name to show above the plot legend
#' @param ... Additional parameters
#'
#' @seealso \code{\link{ena.plot}}, \code{ena.plot.points}
#'
#' @examples
#' data(RS.data)
#'
#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
#'   'Client.and.Consultant.Requests','Design.Reasoning','Collaboration');
#'
#' accum = ena.accumulate.data(
#'   units = RS.data[,c("UserName","Condition")],
#'   conversation = RS.data[,c("Condition","GroupName")],
#'   metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre","CONFIDENCE.Post")],
#'   codes = RS.data[,codeNames],
#'   window.size.back = 4
#' )
#'
#' set = ena.make.set(
#'   enadata = accum,
#'   rotation.by = ena.rotate.by.mean,
#'   rotation.params = list(
#'       accum$meta.data$Condition=="FirstGame",
#'       accum$meta.data$Condition=="SecondGame"
#'   )
#' )
#'
#' plot = ena.plot(set)
#'
#' unitNames = set$enadata$units
#'
#' ### Plot Condition 1 Group Mean
#' plot = ena.plot.group(plot, as.matrix(set$points$Condition$FirstGame), labels = "FirstGame",
#'     colors = "red", confidence.interval = "box")
#'
#' ### plot Condition 2 Group Mean
#' plot = ena.plot.group(plot, as.matrix(set$points$Condition$SecondGame), labels = "SecondGame",
#'     colors  = "blue", confidence.interval = "box")
#'
#' print(plot);
#'
#' @return The  \code{\link{ENAplot}} provided to the function, with its plot updated to include the new group point.
##
ena.plot.group <- function(
  enaplot,
  points = NULL,
  method = "mean",
  labels = NULL,
  colors = default.colors[1],
  shape = c("square", "triangle-up", "diamond", "circle"),
  confidence.interval = c("none", "crosshairs", "box"),
  outlier.interval = c("none", "crosshairs", "box"),
  label.offset = "bottom right",
  label.font.size = NULL,
  label.font.color = NULL,
  label.font.family = NULL,
  show.legend = T,
  legend.name = NULL,
  ...
) {
  shape = match.arg(shape);
  confidence.interval = match.arg(confidence.interval);
  outlier.interval = match.arg(outlier.interval);

  if(is.null(points)) {
    stop("Points must be provided.");
  }
  else if(is(points, "ena.points")) {
    points = remove_meta_data(points)
  }

  ### problem if outlier and confidence intervals selected for crosshair
  if(confidence.interval == "crosshairs" && outlier.interval == "crosshairs") {
    message("Confidence Interval and Outlier Interval cannot both be crosshair. Plotting Outlier Interval as box");
    outlier.interval = "box";
  }

  ### if group more than one row, combine to mean
  confidence.interval.values = NULL;
  outlier.interval.values = NULL;
  if(
    (is(points, "data.frame") || is(points, "matrix")) &&
    nrow(points) > 1
  ) {
    if(is.null(method) || method == "mean") {
      if(confidence.interval != "none") {
        confidence.interval.values = matrix(
          c(as.vector(t.test(points[,1], conf.level = 0.95)$conf.int), as.vector(t.test(points[,2], conf.level = 0.95)$conf.int)),
          ncol=2
        );
      }
      if(outlier.interval != "none") {
        outlier.interval.values = c(IQR(points[,1]), IQR(points[,2])) * 1.5;
        outlier.interval.values = matrix(rep(outlier.interval.values, 2), ncol = 2, byrow = T) * c(-1, 1)
      }

      if(length(unique(colors)) > 1) {
        points = t(sapply(unique(colors), function(color) colMeans(points[color == colors,]), simplify = T))
        colors = unique(colors)
        attr(enaplot, "means") <- length(attr(enaplot, "means")) + length(colors)
      } else {
        points = colMeans(points);
        attr(enaplot, "means") <- length(attr(enaplot, "means")) + 1
      }
    }
    else {
      if(confidence.interval != "none") warning("Confidence Intervals can only be used when method=`mean`")
      if(outlier.interval != "none") warning("Outlier Intervals can only be used when method=`mean`")

      points = apply(points, 2, function(x) do.call(method, list(x)) )
      attr(enaplot, "means") <- length(attr(enaplot, "means")) + 1
    }
  }

  enaplot <- ena.plot.points(
    enaplot,
    points = points,
    labels = labels,
    colors = colors,
    shape = shape,
    confidence.interval = confidence.interval,
    confidence.interval.values = confidence.interval.values,
    outlier.interval = outlier.interval,
    outlier.interval.values = outlier.interval.values,
    label.offset = label.offset,
    label.font.size = label.font.size,
    label.font.color = label.font.color,
    label.font.family = label.font.family,
    show.legend = show.legend,
    legend.name = legend.name,
    ...
  )
  return(enaplot)

  #
  # group.layout = data.frame(dfDT.points);
  #
  # ### INTERVAL CALCULATIONS
  # error = NULL;
  # lines = list();
  #
  # if(confidence.interval == "crosshair") {
  #   ci.x = t.test(points.raw, conf.level = .95)$conf.int[1];
  #   ci.y = t.test(points.raw, conf.level = .95)$conf.int[2];
  #   error = list(
  #     x = list(type = "data", array = ci.x),
  #     y = list(type = "data", array = ci.y)
  #   )
  # } else if(outlier.interval == "crosshair") {
  #   oi.x = IQR(points.raw$V1) * 1.5;
  #   oi.y = IQR(points.raw$V2) * 1.5;
  #   error = list(
  #     x = list(type = "data", array = oi.x),
  #     y = list(type = "data", array = oi.y)
  #   )
  # }
  #
  # if(confidence.interval == "box") {
  #
  #   conf.ints = t.test(points.raw, conf.level = .95)$conf.int;
  #   dfDT.points[,c("ci.x", "ci.y") := .(conf.ints[1], conf.ints[2])]
  #
  #   #add cols for coordinates of CI lines
  #   dfDT.points[, c("ci.x1", "ci.x2", "ci.y1", "ci.y2") := .(V1 - ci.x, V1 + ci.x, V2 - ci.y, V2 + ci.y)]
  #
  #   lines.CI = apply(dfDT.points,1,function(x) {
  #     list(
  #       "type" = "square",
  #       "line" = list(
  #         width = 1,
  #         color = color,
  #         dash="dash"
  #       ),
  #       "xref" = "x",
  #       "yref" = "y",
  #       "x0" = x[['ci.x1']],
  #       "x1" = x[['ci.x2']],
  #       "y0" = x[['ci.y1']],
  #       "y1" = x[['ci.y2']]
  #     );
  #   });
  #   lines = lines.CI;
  # }
  # if(outlier.interval == "box") {
  #
  #   oi.x = IQR(points.raw$V1) * 1.5;
  #   oi.y = IQR(points.raw$V2) * 1.5;
  #
  #   dfDT.points[,c("oi.x", "oi.y") := .(oi.x, oi.y)]
  #
  #   #add cols for coordinates of CI lines
  #   dfDT.points[, c("oi.x1", "oi.x2", "oi.y1", "oi.y2") := .(V1 - oi.x, V1 + oi.x, V2 - oi.y, V2 + oi.y)]
  #
  #   lines.OI = apply(dfDT.points,1,function(x) {
  #     list(
  #       "type" = "square",
  #       "line" = list(
  #         width = 1,
  #         color = color,
  #         dash="dash"
  #       ),
  #       "xref" = "x",
  #       "yref" = "y",
  #       "x0" = x[['oi.x1']],
  #       "x1" = x[['oi.x2']],
  #       "y0" = x[['oi.y1']],
  #       "y1" = x[['oi.y2']]
  #     );
  #   });
  #
  #   lines = c(lines, lines.OI);
  # }
  #
  #
  # if(!is.null(error)) {
  #   #plot group w/ crosshair error bars
  #   enaplot$plot = plotly::add_trace(
  #     enaplot$plot,
  #     data = group.layout,
  #     type="scatter",
  #     x = ~V1, y = ~V2,
  #     mode="markers",
  #     marker = list(
  #       symbol =  shape,
  #       color = color,
  #       size = size
  #     ),
  #     error_x = error$x,
  #     error_y = error$y,
  #     showlegend = F,
  #     text = label,
  #     hoverinfo = "text+x+y"
  #   )
  # } else {
  #   #plot group w/o crosshair error bars
  #   enaplot$plot = plotly::add_trace(
  #     enaplot$plot,
  #     data = group.layout,
  #     type="scatter",
  #     x = ~V1, y = ~V2,
  #     mode="markers",
  #     marker = list(
  #       symbol =  shape,  #c(rep("circle",nrow(data)),rep("square", ifelse(!is.null(dfDT.groups), nrow(dfDT.groups), 0))),
  #       color = color,
  #       #size = c(rep(unit.size * unit.size.multiplier, nrow(data)), rep(group.size, ifelse(!is.null(dfDT.groups),nrow(dfDT.groups), 0)))
  #       size = size
  #     ),
  #     showlegend = F,
  #     text = label,
  #     hoverinfo = "text+x+y"
  #   )
  # }
  #
  # ##### WEIGHTING OFFSET
  # if(is.null(label.offset)) { label.offset = c(.05,.05) }
  # else label.offset = c(label.offset[1] * 0.1, label.offset[2] * 0.1)
  #
  # enaplot$plot = plotly::add_annotations(
  #   enaplot$plot,
  #   x = group.layout$V1[1] + label.offset[1],
  #   y = group.layout$V2[1] + label.offset[2],
  #   text = label,
  #   font = text.info,
  #   xref = "x",
  #   yref = "y",
  #   ax = label.offset[1],
  #   ay = label.offset[2],
  #   #xanchor = "left",
  #   showarrow = F
  # );
  #
  # enaplot$plot = plotly::layout(
  #   enaplot$plot,
  #   shapes = lines
  #   #annotations = label.info
  # )
  #
  # return(enaplot);
}

#' Tune Window Size for ENA Accumulation
#'
#' This function iterates through a range of window sizes to find the optimal size
#' for a discourse accumulation. It identifies the "stability plateau" by calculating
#' the correlation between adjacent window sizes and selecting the smallest size
#' that meets a specified threshold of the maximum observed stability.
#'
#' @param accum_object An \code{ENAAccumulation} object or a call that can be
#' re-evaluated to create one.
#' @param min_size Integer. The minimum window size (default=1) to test.
#' @param max_size Integer. The maximum window size (default=20) to test.
#' @param cutoff Numeric. The threshold (default 0.95) of the maximum correlation
#' used to determine the "best" window size.
#'
#' @details
#' The function uses the internal \code{_function.call} from the \code{accum_object}
#' to iteratively rebuild the accumulation. For each window size, it generates an
#' ENA set and extracts the unit points and compute the correlations between ENA points with
#' adjacent window sizes.
#' The best window size is the lowest with correlation higher than cutoff*max_correlation
#'
#' @return A new \code{ENAAccumulation} object generated with the
#' \code{best_window_size}.
#'
#' @export
#' @importFrom rENA ena.make.set ena_space_dist_corr
#'
#' @examples
#' \dontrun{
#' # Assuming 'accum' is your existing accumulation object
#' tuned_accum <- tune_window_size(accum, min_size = 1, max_size = 20, cutoff = 0.95)
#' }
ena.tune.window.size <- function(accum_object, min_size=1, max_size=20,cutoff=0.95) {
  # 1. Extract the original call from the accumulation object
  # Assuming the ENA object stores the call in `_function.call`
  orig_call <- accum_object$`_function.call`
  call_list <- as.list(orig_call)

  window_range <- min_size:max_size
  all_points <- list()

  # 2. Iterative accumulation and weight extraction
  for(i in seq_along(window_range)) {
    window_size <- window_range[i]

    # Update the window size in the call
    call_list[["window.size.back"]] <- window_size
    new_call <- as.call(call_list)

    # Evaluate the call to get a new accumulation object
    # Using parent.frame() is safer than .GlobalEnv for package/function scoping
    #print(new_call)
    new_accum <- eval(new_call, envir = parent.frame())

    # Create the ENA set and extract line weights
    # Note: For large m, ENA sets can be memory intensive
    curr_set <- rENA::ena.make.set(new_accum)

    # Logic for large m: extract weights as matrix
    points <- as.matrix(curr_set$points)

    # APPLY CASE 2: Filter out identity/duplicated pairs if m is large
    # This ensures correlations are based on unique, non-self-referential connections
    all_points[[i]] <- points
  }

  # 3. Calculate adjacent correlations
  n_steps <- length(window_range) - 1
  adj_correlations <- numeric(n_steps)

  for (i in 1:n_steps) {
    # Calculate correlation between consecutive window sizes
    adj_correlations[i] <- rENA::ena_space_dist_corr(all_points[[i]], all_points[[i+1]])
  }

  # 4. Determine the optimal window size
  results <- data.frame(
    window_size = window_range[1:n_steps],
    correlation = adj_correlations
  )

  max_corr <- max(adj_correlations, na.rm = TRUE)
  threshold <- cutoff * max_corr

  # Find the first window size that crosses the 95% threshold of the max stability
  best_idx <- which(adj_correlations >= threshold)[1]
  best_window_size <- results$window_size[best_idx]

  # 5. Return the final accumulation object
  call_list[["window.size.back"]] <- best_window_size
  final_call <- as.call(call_list)
  new_accum<-eval(final_call, envir = parent.frame())
  return(new_accum)
}

##
#' @title Names to Adjacency Key
#'
#' @description Convert a vector of strings, representing names of a square matrix, to an adjacency
#'
#' @details Returns a matrix of 2 rows by choose(length(vector), 2) columns
#'
#' @param vector Vector representing the names of a square matrix
#' @param upper_triangle Not Implemented
#'
#' @export
##
namesToAdjacencyKey <- function(vector, upper_triangle = TRUE) {
  upperTriIndices = triIndices(length(vector)) + 1;
  matrix(vector[upperTriIndices], nrow=2)
}

##
#' @title Generate a plot of an ENAset
#'
#' @description Generates an a plot from a given ENA set object
#'
#' @details This function defines the axes and other features of a plot for displaying an ENAset; generates an ENAplot object that can used to plot points, network graphs, and other information from an ENAset
#'
#' @export
#'
#' @param enaset The \code{\link{ENAset}} that will be used to generate a plot
#' @param title A character used for the title of the plot, default: ENA Plot
#' @param dimension.labels A character vector containing labels for the axes, default: c(X, Y)
#' @param font.size An integer determining the font size for graph labels, default: 10
#' @param font.color A character determining the color of label font, default: black
#' @param font.family A character determining the font type, choices: Arial, Courier New, Times New Roman, default: Arial
#' @param scale.to "network" (default), "points", or a list with x and y ranges. Network and points both scale to the c(-max, max) of the corresponding data.frame
#' @param ... additional parameters addressed in inner function
#'
#'
#' @seealso \code{\link{ena.make.set}}, \code{\link{ena.plot.points}}
#'
#' @examples
#' data(RS.data)
#'
#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
#'   'Client.and.Consultant.Requests','Design.Reasoning','Collaboration');
#'
#' accum = ena.accumulate.data(
#'   units = RS.data[,c("UserName","Condition")],
#'   conversation = RS.data[,c("Condition","GroupName")],
#'   metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre","CONFIDENCE.Post")],
#'   codes = RS.data[,codeNames],
#'   window.size.back = 4
#' )
#'
#' set = ena.make.set(
#'   enadata = accum
#' )
#'
#' plot = ena.plot(set)
#'
#' group1.points = set$points.rotated[set$enadata$units$Condition == "FirstGame",]
#' plot = ena.plot.points(plot, points = group1.points);
#' print(plot);
#'
#' @return \code{\link{ENAplot}} used for plotting an ENAset

##
ena.plot <- function(
  enaset,

  title = "ENA Plot",

  dimension.labels = c("",""),

  font.size = 10,
  font.color = "#000000",
  font.family = c("Arial", "Courier New", "Times New Roman"),
  scale.to = "network", #, "points"),
  ...
) {
  if (is(enaset, "ENAset")) {
    warning(paste0("Usage of ENAset objects will be deprecated ",
      "and potentially removed altogether in future versions."))

    enaset <- ena.set(enaset);
  }

  font.family = match.arg(font.family);

  plot = ENAplot$new(enaset,
                     title,
                     dimension.labels,
                     font.size,
                     font.color,
                     font.family,
                     scale.to = scale.to,
                     ...
                   );

  return(plot);
}

#' @title ENA SVD
#' @description Computes a dimensional reduction of points in an ENA set using Singular Value Decomposition (SVD).
#' @param enaset An \code{ENAset} object containing the points to be reduced.
#' @param params A list of parameters. Use \code{params$as_object = TRUE} to return an ENARotationSet object, or \code{FALSE} (default) to return a list.
#' @details This function computes the SVD of the points in the ENA set and returns either an ENARotationSet object or a list with the rotation matrix, codes, node positions, and eigenvalues, depending on \code{params$as_object}.
#' @return An ENARotationSet object or a list containing:
#'   \item{rotation}{The rotation matrix from SVD}
#'   \item{codes}{The code names used for the matrix}
#'   \item{node.positions}{(Currently NULL) Node positions}
#'   \item{eigenvalues}{The eigenvalues (squared singular values) from SVD}
#' @examples
#' data(RS.data)
#' codes <- c("Data", "Technical.Constraints", "Performance.Parameters",
#'            "Client.and.Consultant.Requests", "Design.Reasoning",
#'            "Collaboration")
#' units <- c("Condition", "UserName")
#' horizon <- c("Condition", "GroupName")
#' enaset <- RS.data |>
#'   accumulate(units, codes, horizon) |>
#'   model()
#' # SVD as list:
#' svd_result <- ena.svd(enaset, list(as_object = FALSE))
#' # SVD as ENARotationSet object:
#' svd_obj <- ena.svd(enaset, list(as_object = TRUE))
#' @export
ena.svd <- function(enaset, params) {
  # to.norm = data.table::data.table(
  #   enaset$points.normed.centered,
  #   enaset$enadata$unit.names
  # )
  # to.norm = as.matrix(to.norm[,tail(.SD,n=1),.SDcols=colnames(to.norm)[which(colnames(to.norm) != "V2")],by=c("V2")][,2:ncol(to.norm)]);
  # pcaResults = pca_c(to.norm, dims = enaset$get("dimensions"));
  # pcaResults = pca_c(enaset$points.normed.centered, dims = enaset$get("dimensions"));
  as_object = FALSE;
  if(!is.null(params$as_object)) {
    as_object = params$as_object
  }

  # pts = enaset$model$points.for.projection[,!colnames(enaset$model$points.for.projection) %in% colnames(enaset$meta.data), with=F]
  pts = as.matrix(enaset$model$points.for.projection)
  pcaResults = prcomp(pts, retx=FALSE, scale=FALSE, center=FALSE, tol=0)

  ### used to be  enaset$data$centered$pca
  #enaset$rotation.set = pcaResults$pca;


  colnames(pcaResults$rotation) = c(
    paste('SVD',as.character(1:ncol(pcaResults$rotation)), sep='')
  );

  # rotationSet = ENARotationSet$new(rotation = pcaResults$pca, codes = enaset$codes, node.positions = NULL, eigenvalues = pcaResults$latent)
  if(isTRUE(as_object)) {
    rotationSet = ENARotationSet$new(
      rotation = pcaResults$rotation,
      codes = enaset$rotation$codes,
      node.positions = NULL,
      eigenvalues = pcaResults$sdev^2
    )
  }
  else {
    rotationSet <- list(
      rotation = pcaResults$rotation,
      codes = enaset$rotation$codes,
      node.positions = NULL,
      eigenvalues = pcaResults$sdev^2
    )
  }
  return(rotationSet)
}

ena.svd.R6 <- function(enaset, ...) {
  pcaResults = prcomp(enaset$points.normed.centered, retx=FALSE,scale=FALSE,center=FALSE, tol=0)

  colnames(pcaResults$rotation) = c(
    paste('SVD',as.character(1:ncol(pcaResults$rotation)), sep='')
  );

  rotationSet = ENARotationSet$new(
    rotation = pcaResults$rotation, codes = enaset$codes,
    node.positions = NULL, eigenvalues = pcaResults$sdev^2
  )
  return(rotationSet)
}

###
#' Calculate the correlations
#'
#' @description Calculate both Spearman and Pearson correlations for the
#' provided ENAset
#'
#' @param enaset ENAset to run correlations on
#' @param dims The dimensions to calculate the correlations for. Default: c(1,2)
#'
#' @return Matrix of 2 columns, one for each correlation method, with the corresponding
#' correlations per dimension as the rows.
#'
#' @export
###
ena.correlations <- function(enaset, dims = c(1:2)) {
  pComb = combn(nrow(enaset$points),2)
  point1 = pComb[1,]
  point2 = pComb[2,]

  points = as.matrix(enaset$points)
  centroids = as.matrix(enaset$model$centroids)
  svdDiff = matrix(points[point1, dims] - points[point2, dims], ncol=length(dims), nrow=length(point1))
  optDiff = matrix(centroids[point1, dims] - centroids[point2, dims], ncol=length(dims), nrow=length(point1))

  correlations = as.data.frame(mapply(function(method) {
    lapply(dims, function(dim) {
      cor(as.numeric(svdDiff[,dim]), as.numeric(optDiff[,dim]), method=method)
    });
  }, c("pearson","spearman")))

  return(correlations);
}


#' @title ENA Rotate by SVD (Principal Components)
#'
#' @description Performs a standard SVD (principal components) rotation on the
#'   ENA points. This is the rotation method used by default in Ordered Network
#'   Analysis (ONA). Unlike the generalized means rotation, no group labels are
#'   required.
#'
#' @param enaset An \code{\link{ENAset}} or compatible list with a
#'   \code{model$points.for.projection} matrix.
#' @param params A list of additional parameters (currently unused; kept for
#'   interface compatibility with other rotation functions).
#'
#' @return A list with:
#'   \describe{
#'     \item{rotation}{Rotation matrix (connection columns × dimensions),
#'       with columns named \code{SVD1}, \code{SVD2}, …}
#'     \item{codes}{Character vector of code names}
#'     \item{eigenvalues}{Variance explained per component}
#'     \item{node.positions}{NULL (not computed here)}
#'   }
#'
#' @export
ena.rotate.by.svd <- function(enaset, params = list()) {
  pts <- as.matrix(enaset$model$points.for.projection)

  pca <- prcomp(pts, retx = FALSE, scale. = FALSE, center = FALSE, tol = 0)

  colnames(pca$rotation) <- paste0("SVD", seq_len(ncol(pca$rotation)))

  list(
    rotation       = pca$rotation,
    codes          = enaset$rotation$codes,
    eigenvalues    = pca$sdev ^ 2,
    node.positions = NULL
  )
}

#' ENARotationSet R6class
#
#' @docType class
#' @importFrom R6 R6Class
#' @import data.table
#' @export
#
#' @field rotation TBD
#' @field node.positions TBD
#' @field codes TBD
#' @field eigenvalues TBD
ENARotationSet = R6::R6Class("ENARotationSet",
  public = list(

    ## Public Functions ----
      #' Create ENARotationSet
      #'
      #' @param rotation TBD
      #' @param codes TBD
      #' @param node.positions TBD
      #' @param eigenvalues TBD
      #'
      #' @return ENARotationsSet
      initialize = function(
        rotation,
        codes,
        node.positions,
        eigenvalues = NULL
      ) {
        self$node.positions = node.positions;
        self$rotation = rotation;
        self$codes = codes;
        if(!is.null(codes) && !is.null(self$node.positions)) {
         rownames(self$node.positions) = codes;
        }
        self$eigenvalues = eigenvalues;
      },

    ## Public Properties ----
      rotation = NULL,
      node.positions = NULL,
      codes = NULL,
      eigenvalues = NULL
  ),
  private = list(
    #####
    ## Private Properties
    #####
      args = NULL
    #####
    ## END: Private Properties
    #####
  )
)

##
#' Cohen's d calculation
#'
#' @title Cohen's d
#'
#' @description Calculate Conhen's d
#'
#' @details [TBD]
#'
#' @param x [TBD]
#' @param y [TBD]
#'
#' @export
#' @return numeric Cohen's d calculation
fun_cohens.d <- function(x, y) {
  lx <- length(x)- 1
  ly <- length(y)- 1
  md  <- abs(mean(x) - mean(y))        ## mean difference (numerator)
  csd <- lx * var(x) + ly * var(y)
  csd <- csd/(lx + ly)
  csd <- sqrt(csd)                     ## common sd computation

  cd  <- md/csd
  return(cd)## cohen's d
}

.onLoad <- function(libname, pkgname) {
  globalVariables(c(
    ".","ENA_ROW_IDX","ENA_UNIT","V1","V2","V3",
    "ci.x","ci.y","e","handle","name","unit.groups",
    "V","graph_from_data_frame","|>","X1","X2",
    "dfDT.points","points.raw","lines","KEYCOL","ENA_CONV",
    "..groupCol","..units","..metadata", "..codes", "..conversation","ENA_GROUP_NAME",
    "label.font.color","label.font.family","label.font.size",
    "label.offset","legend.include.edges","legend.name",
    "network.edges.shapes","nodes","rows.to.keep","show.legend",
    "..connection_name", "..dimension_names", "..first_meta",
    "count","window","entropy","ground_type","n_obs","n_unique",
    "method","n_conversations",".group"
  ))
#   op <- options()
#   op.rENA <- list(
#     UNIT_NAMES = "ena.unit.names",
#     TRAJ_TYPES = c("accumulated","non-accumulated")
#   );
#
#   toset <- !("rENA" %in% names(op))
#   print(paste("ToSet:", toset));
#
#   if(toset) {
#     options(rENA = op.rENA)
#   }
#
#   invisible()
}

.onAttach <- function(libname, pkgname) {
  packageStartupMessage("For the latest features and updates, install from https://cran.qe-libs.org");
  invisible();
}

1		###
2		#' @title ENA Rotate by generalized means rotation (GMR)
3		#'
4		#' @description Computes a dimensional reduction from a matrix of ENA points
5		#' such that the first dimension best represents the contribution of a target
6		#' variable after controlling for covariates via Lasso. An optional second
7		#' GMR axis can be computed for \code{y_var}; remaining dimensions are filled
8		#' by SVD of the doubly-deflated space. Delegates to
9		#' \code{\link[libqe]{generalized_means_rotation}}.
10		#'
11		#' @param enaset An \code{\link{ENAset}} or compatible list with
12		#' \code{model$points.for.projection} (or \code{points.normed.centered}),
13		#' \code{line.weights}, and \code{rotation$codes}.
14		#' @param params A list with the following named elements:
15		#' \describe{
16		#' \item{\code{x_var}}{Required. A \code{data.frame} (or character vector of
17		#' column names in \code{enaset$meta.data}) whose first column is the
18		#' target variable. Additional columns are treated as covariates and
19		#' penalized via Lasso.}
20		#' \item{\code{y_var}}{Optional. Same format as \code{x_var}. When provided
21		#' a second GMR axis is computed.}
22		#' \item{\code{select_2_groups}}{Optional length-2 list/vector of group
23		#' labels. When given, the GMR fit for the x axis uses only rows whose
24		#' target value is in these two groups. The group mean difference for x1
25		#' (the secondary axis that keeps group means on the x-axis) is always
26		#' computed from the full data.}
27		#' \item{\code{interactions}}{Logical; if \code{TRUE} (default) pairwise
28		#' interaction terms are added to the model matrix when covariates are
29		#' present. Set \code{FALSE} for main-effects-only Lasso.}
30		#' }
31		#'
32		#' @importFrom libqe generalized_means_rotation
33		#' @importFrom stats model.matrix as.formula
34		#' @export
35		#' @return A list with \code{rotation} (q x q matrix, column names GMR1,
36		#' GMR2\|SVD2, SVD3, …), \code{codes}, \code{eigenvalues}, and
37		#' \code{node.positions = NULL}, suitable for use inside \code{rotate()}.
38		###
39		ena.rotate.by.generalized <- function(enaset, params) {
40
41		## ── Input validation ────────────────────────────────────────────────────────
42	3x	if (!is.list(params) \|\| is.null(params$x_var)) {
43	!	stop("params must be provided as a list() and provide `x_var`")
44		}
45
46		## ── Resolve x_var → data.frame ──────────────────────────────────────────────
47	3x	if (!is.data.frame(params$x_var)) {
48	!	if (all(params$x_var %in% colnames(enaset$meta.data))) {
49	!	x <- enaset$meta.data[, params$x_var, with = FALSE]
50		} else {
51	!	stop(paste("x_var incorrect:", paste(params$x_var, collapse = ", ")))
52		}
53		} else {
54	3x	x <- params$x_var
55		}
56
57		## ── ENA point matrix ────────────────────────────────────────────────────────
58	3x	V <- if (!is.null(enaset$points.normed.centered))
59	3x	as.matrix(enaset$points.normed.centered)
60		else
61	3x	as.matrix(enaset$model$points.for.projection)
62
63		## ── Target variable & encoding ──────────────────────────────────────────────
64		## For categorical targets, encode as 0-based integer codes.
65		## When select_2_groups is provided, the two selected groups are encoded as
66		## 0 and 1 (required by the C++ x1 computation, which uses labels == 0/1).
67	3x	target_full <- as.vector(x[[1]])
68	3x	x_categorical <- !is.numeric(target_full)
69
70	3x	if (x_categorical) {
71	3x	grp <- params$select_2_groups
72	3x	if (!is.null(grp) && length(grp) == 2) {
73	!	all_levels <- c(grp[[1]], grp[[2]],
74	!	setdiff(unique(target_full), c(grp[[1]], grp[[2]])))
75		} else {
76	3x	all_levels <- unique(target_full)
77		}
78	3x	x_target_enc <- as.numeric(factor(target_full, levels = all_levels)) - 1.0
79	3x	x_n_groups <- as.integer(length(all_levels))
80		} else {
81	!	x_target_enc <- as.numeric(target_full)
82	!	x_n_groups <- 0L
83		}
84
85		## ── Row subset (select_2_groups → 0-based integer indices) ──────────────────
86	3x	if (!is.null(params$select_2_groups) && length(params$select_2_groups) == 2) {
87	!	subset_rows <- which(target_full %in% params$select_2_groups)
88	!	if (length(subset_rows) < 2L) {
89	!	warning("select_2_groups produced < 2 matching rows; using all rows")
90	!	x_subset <- integer(0)
91		} else {
92	!	x_subset <- as.integer(subset_rows - 1L)
93		}
94		} else {
95	3x	x_subset <- integer(0)
96		}
97
98		## ── Model matrix for x ──────────────────────────────────────────────────────
99		## Interaction terms are included by default when covariates are present.
100	3x	interactions <- isTRUE(if (!is.null(params$interactions)) params$interactions else TRUE)
101	3x	fstr_x <- if (ncol(x) > 1L && interactions) "~ .^2" else "~ ."
102	3x	mm_x <- model.matrix(as.formula(fstr_x), data = x)[, -1L, drop = FALSE]
103
104		## x1_cols (0-based): columns in mm_x that belong to the target variable
105		## (main-effect columns only; interaction columns stay penalized)
106	3x	x1_name <- colnames(x)[1L]
107	3x	safe_x1 <- gsub("([.\|()\\^{}+$*?]\|\\[\|\\])", "\\\\\\1", x1_name)
108	3x	x1_regex <- paste0("^", safe_x1, "[^:]*$")
109	3x	x1_cols <- as.integer(grep(x1_regex, colnames(mm_x)) - 1L)
110	!	if (length(x1_cols) == 0L) x1_cols <- 0L # guard: treat first col as target
111
112		## ── Y axis ──────────────────────────────────────────────────────────────────
113	3x	has_y <- !is.null(params$y_var)
114
115	3x	if (has_y) {
116	!	if (!is.data.frame(params$y_var)) {
117	!	if (all(params$y_var %in% colnames(enaset$meta.data))) {
118	!	y <- enaset$meta.data[, params$y_var, with = FALSE]
119		} else {
120	!	stop("y_var must be a data.frame or a column name in enaset$meta.data")
121		}
122		} else {
123	!	y <- params$y_var
124		}
125	!	y_target_raw <- as.vector(y[[1]])
126	!	y_categorical <- !is.numeric(y_target_raw)
127	!	if (y_categorical) {
128	!	y_levels <- unique(y_target_raw)
129	!	y_target_enc <- as.numeric(factor(y_target_raw, levels = y_levels)) - 1.0
130	!	y_n_groups <- as.integer(length(y_levels))
131		} else {
132	!	y_target_enc <- as.numeric(y_target_raw)
133	!	y_n_groups <- 0L
134		}
135	!	fstr_y <- if (ncol(y) > 1L && interactions) "~ .^2" else "~ ."
136	!	mm_y <- model.matrix(as.formula(fstr_y), data = y)[, -1L, drop = FALSE]
137	!	y1_name <- colnames(y)[1L]
138	!	safe_y1 <- gsub("([.\|()\\^{}+$*?]\|\\[\|\\])", "\\\\\\1", y1_name)
139	!	y1_regex <- paste0("^", safe_y1, "[^:]*$")
140	!	y1_cols <- as.integer(grep(y1_regex, colnames(mm_y)) - 1L)
141	!	if (length(y1_cols) == 0L) y1_cols <- 0L
142		} else {
143		## Dummy y params — passed but ignored by the C++ when has_y = FALSE
144	3x	mm_y <- matrix(0.0, nrow(V), 1L)
145	3x	y_target_enc <- numeric(nrow(V))
146	3x	y1_cols <- 0L
147	3x	y_categorical <- FALSE
148	3x	y_n_groups <- 0L
149		}
150
151		## ── Delegate to libqe ───────────────────────────────────────────────────────
152	3x	result <- libqe::generalized_means_rotation(
153	3x	V = V,
154	3x	x_model_matrix = mm_x,
155	3x	x_target = x_target_enc,
156	3x	x1_cols = x1_cols,
157	3x	x_categorical = x_categorical,
158	3x	x_n_groups = x_n_groups,
159	3x	x_subset = x_subset,
160	3x	has_y = has_y,
161	3x	y_model_matrix = mm_y,
162	3x	y_target = y_target_enc,
163	3x	y1_cols = y1_cols,
164	3x	y_categorical = y_categorical,
165	3x	y_n_groups = y_n_groups
166		)
167
168		## ── Assemble rotation matrix ─────────────────────────────────────────────────
169	3x	rotation <- result$rotation
170	3x	colnames(rotation) <- result$column_names
171	3x	rownames(rotation) <- colnames(as.matrix(enaset$line.weights))
172
173	3x	list(
174	3x	node.positions = NULL,
175	3x	rotation = rotation,
176	3x	codes = enaset$rotation$codes,
177	3x	eigenvalues = result$eigenvalues
178		)
179		}

1		ena.set <- function(x) {
2	83x	newset = list()
3	83x	class(newset) <- c("ena.set", class(newset))
4	83x	x.is.set <- T
5	83x	if("ENAdata" %in% class(x)) {
6	80x	x <- list(enadata = x);
7	80x	x.is.set <- F
8		}
9	83x	code.columns <- apply(x$enadata$adjacency.matrix, 2, paste, collapse = " & ")
10
11	83x	newset$connection.counts <- x$enadata$adjacency.vectors;
12	83x	colnames(newset$connection.counts) <- code.columns
13	83x	for (i in seq(ncol(newset$connection.counts))) {
14	908x	set(newset$connection.counts, j = i, value = as.ena.co.occurrence(newset$connection.counts[[i]]))
15		}
16
17	83x	if (grepl(x = x$enadata$model, pattern = "Traj", ignore.case = T)) {
18	8x	newset$meta.data <- data.table::copy(x$enadata$trajectories$units)
19	8x	newset$meta.data[, ENA_UNIT := apply(x$enadata$trajectories$units, 1, paste, collapse = "::")]
20
21	8x	newset$trajectories <- cbind(newset$meta.data, x$enadata$trajectories$step)
22	8x	for (i in seq(ncol(newset$trajectories))) {
23	34x	set(newset$trajectories, j = i, value = as.ena.metadata(newset$trajectories[[i]]))
24		}
25		}
26		else {
27	75x	newset$meta.data <- x$enadata$metadata
28		}
29
30	83x	if (!is.null(newset$meta.data) && ncol(newset$meta.data) > 0) {
31	83x	for (i in seq(ncol(newset$meta.data))) {
32	383x	set(newset$meta.data, j = i,
33	383x	value = as.ena.metadata(newset$meta.data[[i]]))
34		}
35		}
36	83x	newset$meta.data <- as.ena.matrix(newset$meta.data);
37
38	83x	if (x.is.set) {
39	3x	newset$line.weights <- as.data.table(cbind(x$enadata$metadata, x$line.weights))
40	3x	to_cols <- names(which(!find_meta_cols(newset$line.weights)))
41	3x	for(col in to_cols) {
42	40x	set(x = newset$line.weights, j = col, value = as.ena.co.occurrence(newset$line.weights[[col]]))
43		}
44	3x	class(newset$line.weights) <- c("ena.line.weights", class(newset$line.weights))
45
46	3x	newset$points <- cbind(x$enadata$metadata, x$points.rotated)
47	3x	to_cols <- names(which(!find_meta_cols(newset$points)))
48	3x	for(col in to_cols) {
49	40x	set(x = newset$points, j = col, value = as.ena.dimension(newset$points[[col]]))
50		}
51	3x	newset$points <- as.ena.matrix(newset$points, "ena.points")
52
53	3x	newset$rotation.matrix <- x$rotation.set$rotation
54		}
55
56	83x	newset$connection.counts <- cbind(newset$meta.data, newset$connection.counts)
57	83x	class(newset$connection.counts) <- c("ena.connections",
58	83x	class(newset$connection.counts))
59
60	83x	newset$model <- list(
61	83x	model.type = x$enadata$model,
62	83x	raw.input = x$enadata$raw,
63	83x	row.connection.counts = x$enadata$accumulated.adjacency.vectors[,
64	83x	unique(names(x$enadata$accumulated.adjacency.vectors)), with = F],
65	83x	unit.labels = x$enadata$unit.names
66		)
67
68		#####
69		# if(quote(x$enadata$function.params$weight.by) != "binary") {
70		# newset$model$unweighted.connection.counts <- x$enadata$adjacency.vectors.raw
71		# class(newset$model$unweighted.connection.counts) <- c("ena.connections",
72		# class(newset$model$unweighted.connection.counts))
73		# are.codes <- find_code_cols(newset$model$unweighted.connection.counts)
74		# for (i in seq(are.codes)) {
75		# if (are.codes[i]) {
76		# set(newset$model$unweighted.connection.counts, j = i,
77		# value = as.ena.co.occurrence(
78		# newset$model$unweighted.connection.counts[[i]]
79		# )
80		# )
81		# } else {
82		# set(newset$model$unweighted.connection.counts, j = i,
83		# value = as.ena.metadata(
84		# newset$model$unweighted.connection.counts[[i]]
85		# )
86		# )
87		# }
88		# }
89		# }
90		#####
91
92	83x	cols <- grep("adjacency.code", colnames(newset$model$row.connection.counts))
93	83x	colnames(newset$model$row.connection.counts)[cols] <- code.columns
94	83x	for(i in cols) {
95	908x	set(newset$model$row.connection.counts, j = i,
96	908x	value = as.ena.co.occurrence(newset$model$row.connection.counts[[i]]))
97		}
98	83x	for (i in which(colnames(newset$model$row.connection.counts)
99	83x	%in% colnames(newset$meta.data))
100		) {
101	260x	set(newset$model$row.connection.counts, j = i,
102	260x	value = as.ena.metadata(newset$model$row.connection.counts[[i]]))
103		}
104	83x	for (i in which(colnames(newset$model$row.connection.counts) %in%
105	83x	x$enadata$codes)
106		) {
107	415x	set(newset$model$row.connection.counts, j = i,
108	415x	value = as.ena.code(newset$model$row.connection.counts[[i]]))
109		}
110	83x	class(newset$model$row.connection.counts) <- c("row.connections",
111	83x	class(newset$model$row.connection.counts))
112
113	83x	if (x.is.set) {
114	3x	newset$model$centroids <- x$centroids
115	3x	newset$model$correlations <- x$correlations
116	3x	newset$model$function.call <- x$function.call
117	3x	newset$model$function.params <- x$function.params
118	3x	newset$model$points.for.projection <- cbind(x$enadata$metadata,
119	3x	x$points.normed.centered)
120	3x	newset$model$variance <- x$variance
121	3x	names(newset$model$variance) <- colnames(newset$rotation.matrix)
122		}
123
124	83x	newset$rotation <- list(
125	83x	adjacency.key = as.data.table(x$enadata$adjacency.matrix),
126	83x	codes = x$enadata$codes
127		)
128	83x	class(newset$rotation) <- c("ena.rotation.set", class(newset$rotation))
129
130	83x	for (i in seq(ncol(newset$rotation$adjacency.key))) {
131	908x	set(newset$rotation$adjacency.key, j = i,
132	908x	value = as.ena.codes(newset$rotation$adjacency.key[[i]]))
133		}
134
135	83x	if(x.is.set) {
136	3x	newset$rotation$eigenvalues = x$rotation.set$eigenvalues
137	3x	newset$rotation$nodes = x$node.positions
138	3x	newset$rotation$rotation.matrix = x$rotation.set$rotation
139		}
140
141	83x	newset$`_function.call` <- sys.calls()[[1]]
142	83x	back.frame <- sapply(sys.frames(), function(f) {
143	83x	"window.size.back" %in% ls(envir = f) })
144	83x	if (any(back.frame)) {
145	79x	call.frame <- sys.frame(which(back.frame))
146	79x	newset$`_function.params` <- mget(ls(envir = call.frame),
147	79x	envir = call.frame)
148		} else {
149	4x	newset$`_function.params` <- list()
150		}
151
152	83x	return(newset);
153		}

1		#' ENA line weights as matrix
2		#'
3		#' @param x ena.line.weights data.table to covert to matrix
4		#' @param ... additional arguments to be passed to or from methods
5		#' @param square [TBD]
6		#'
7		#' @return matrix
8		#' @export
9		as.matrix.ena.line.weights <- function(x, ..., square = FALSE) {
10	108x	args = list(...)
11
12		# if(!is.null(args$square))
13		# square = args$square
14
15		# class(x) = class(x)[-1]
16	108x	x.unclass <- data.table::as.data.table(unclass(x))
17	108x	rows = x.unclass[, !find_meta_cols(x.unclass), with = F]
18
19	108x	if(square) {
20	1x	upperTriSize = ncol(rows)
21	1x	number = ( (ceiling(sqrt(2upperTriSize)) ^ 2) ) - (2upperTriSize)
22	1x	codes = unique(unlist(sapply(colnames(rows), strsplit, split = " & ")))
23	1x	cm = sapply(seq(nrow(rows)), function(unit) {
24	1x	m = matrix(NA, number, number, dimnames = list(codes, codes))
25	1x	m[upper.tri(m)] = as.numeric(rows[unit,])
26	1x	m
27	1x	}, simplify = F);
28	1x	return(cm)
29		}
30		else {
31	107x	as.matrix(remove_meta_data(rows), ...)
32		}
33		}
34
35		#' ENA rotations as matrix
36		#'
37		#' @param x ena.rotation.matrix to conver to matrix
38		#' @param ... additional arguments to be passed to or from methods
39		#'
40		#' @return matrix
41		#' @export
42		as.matrix.ena.rotation.matrix <- function(x, ...) {
43	104x	class(x) = class(x)[-1]
44	104x	x = remove_meta_data(x)
45	104x	as.matrix(x, ...)
46		}
47
48		#' ENA points as matrix
49		#'
50		#' @param x ena.points to convert to a matrix
51		#' @param ... additional arguments to be passed to or from methods
52		#'
53		#' @return matrix
54		#' @export
55		as.matrix.ena.points <- function(x, ...) {
56	162x	class(x) = class(x)[-1]
57	162x	x = remove_meta_data(x)
58	162x	as.matrix(x, ...)
59		}
60
61		#' Matrix without metadata
62		#'
63		#' @param x Object to convert to a matrix
64		#' @param ... additional arguments to be passed to or from methods
65		#'
66		#' @return matrix
67		#' @export
68		as.matrix.ena.matrix <- function(x, ...) {
69	6x	class(x) = class(x)[-1]
70	6x	x = remove_meta_data(x)
71	6x	as.matrix(x, ...)
72		}
73
74		#' ENA nodes as matrix
75		#'
76		#' @param x ena.nodes to convert to matrix
77		#' @param ... additional arguments to be passed to or from methods
78		#'
79		#' @return matrix
80		#' @export
81		as.matrix.ena.nodes <- function(x, ...) {
82	46x	class(x) = class(x)[-1]
83	46x	as.matrix(x[,-c("code")], ...)
84		}
85
86		#' ENA row connections as matrix
87		#'
88		#' @param x ena.row.connections to conver to a matrix
89		#' @param ... additional arguments to be passed to or from methods
90		#'
91		#' @return matrix
92		#' @export
93		as.matrix.row.connections <- function(x, ...) {
94	20x	class(x) = class(x)[-1]
95	20x	as.matrix(x[, sapply(x, is, class2="ena.co.occurrence"), with = F], ...)
96		}
97
98
99		#' ENA Connections as a matrix
100		#'
101		#' @param x ena.connections object
102		#' @param ... additional arguments to be passed to or from methods
103		# @param square Logical. If TRUE, each row is converted to a square matrix
104		# @param simplify Logical. If TRUE, returns back a single result as vector
105		# @param names Ignored
106		#'
107		#' @return If square is FALSE (default), a matrix with all metadata columns removed, otherwise a list with square matrices
108		#' @export
109		as.matrix.ena.connections <- function(x, ...) {
110	98x	class(x) = class(x)[-1]
111	98x	xx = remove_meta_data(x)
112	98x	rows = as.data.frame(x)[, !find_meta_cols(x), drop = F]
113
114	98x	args = list(...)
115	98x	if(is.null(args$square))
116	93x	square = F
117		else
118	5x	square = args$square
119
120	98x	names = args$names
121
122	98x	if(is.null(args$simplify))
123	94x	simplify = ifelse(nrow(x) > 1, F, T)
124		else
125	4x	simplify = args$simplify
126
127	98x	if(square) {
128	5x	upperTriSize = ncol(rows)
129	5x	number = ( (ceiling(sqrt(2upperTriSize)) ^ 2) ) - (2upperTriSize)
130	5x	codes = unique(unlist(sapply(colnames(rows), strsplit, split = " & ")))
131	5x	cm = sapply(seq(nrow(rows)), function(unit) {
132	99x	m = matrix(NA, number, number, dimnames = list(codes, codes))
133	99x	m[upper.tri(m)] = as.numeric(rows[unit,])
134	99x	m
135	5x	}, simplify = F)
136
137	5x	if(simplify) {
138	2x	cm = cm[[1]]
139		} else {
140	3x	names(cm) = names
141		}
142		} else {
143	93x	cm = as.matrix(rows)
144	93x	rownames(cm) = names
145		}
146
147	98x	cm
148		}

1		#####
2		#'
3		#' @title Wrapper to generate an ENA model
4		#'
5		#' @description Generates an ENA model by constructing a dimensional reduction
6		#' of adjacency (co-occurrence) vectors as defined by the supplied
7		#' conversations, units, and codes.
8		#'
9		#' @details This function generates an ena.set object given a data.frame, units,
10		#' conversations, and codes. After accumulating the adjacency (co-occurrence)
11		#' vectors, computes a dimensional reduction (projection), and calculates node
12		#' positions in the projected ENA space. Returns location of the units in the
13		#' projected space, as well as locations for node positions, and normalized
14		#' adjacency (co-occurrence) vectors to construct network graphs. Includes options
15		#' for returning statistical tests between groups of units.
16		#'
17		#' @param data data.frame with containing metadata and coded columns
18		#' @param codes vector, numeric or character, of columns with codes
19		#' @param units vector, numeric or character, of columns representing units
20		#' @param conversation vector, numeric or character, of columns to segment conversations by
21		#' @param metadata vector, numeric or character, of columns with additional meta information for units
22		#' @param model character, the ENA model to construct: \code{EndPoint} (default) produces a single adjacency vector per unit summing co-occurrences across all lines; \code{AccumulatedTrajectory} produces one adjacency vector per unit per conversation, where each successive conversation accumulates prior ones; \code{SeparateTrajectory} produces one adjacency vector per unit per conversation, each modeled independently
23		#' @param weight.by "binary" is default, can supply a function to call (e.g. sum)
24		#' @param window MovingStanzaWindow (default) or Conversation
25		#' @param window.size.back integer, number of lines back from each line to include in the stanza window (default: 1)
26		#' @param window.size.forward integer, number of lines forward from each line to include in the stanza window (default: 0). Set to model bidirectional co-occurrence within a window.
27		#' @param include.meta logical, if TRUE (default) unit metadata is attached to the resulting ENAdata object and accessible via the set; set to FALSE to omit metadata from the model output
28		#' @param groupVar vector, character, of column name containing group identifiers.
29		#' If column contains at least two unique values, will generate model using a means rotation (a dimensional reduction maximizing the variance between the means of the two groups)
30		#' @param groups vector, character, of values of groupVar column used for means rotation or statistical tests
31		#' @param runTest logical, TRUE will run a Student's t-Test and a Wilcoxon test for groups defined by the groups argument
32		#' @param ... Additional parameters passed to model generation, including \code{mask} (an optional binary matrix of size ncol(codes) x ncol(codes) where 0 suppresses co-occurrence modeling between a pair of codes; see \code{\link{ena.accumulate.data}})
33		#'
34		#'
35		#' @return ena.set object
36		#####
37		ena.set.creator = function(
38		data,
39		codes,
40		units,
41		conversation,
42		metadata = NULL,
43		model = c("EndPoint", "AccumulatedTrajectory", "SeparateTrajectory"),
44		weight.by = "binary",
45		window = c("MovingStanzaWindow", "Conversation"),
46		window.size.back = 1,
47		window.size.forward = 0,
48		include.meta = TRUE,
49		groupVar = NULL,
50		groups = NULL,
51		runTest = FALSE,
52		...
53		) {
54	23x	data <- data.table::data.table(data)
55
56	23x	model = match.arg(model)
57	23x	window = match.arg(window)
58	23x	accum = ena.accumulate.data(
59	23x	units = data[, ..units, drop = FALSE],
60	23x	conversation = data[, ..conversation, drop = FALSE],
61	23x	metadata = data[, ..metadata, drop = FALSE],
62	23x	codes = data[, ..codes, drop = FALSE],
63	23x	window = window,
64	23x	window.size.back = window.size.back,
65	23x	window.size.forward = window.size.forward,
66	23x	weight.by = weight.by,
67	23x	model = model,
68	23x	include.meta = include.meta,
69		...
70		);
71
72	23x	accum$model$raw.input <- as.data.table(data);
73	23x	accum$model$raw.input$ENA_UNIT <- merge_columns_c(accum$model$raw.input, units)
74	23x	group1 = NULL
75	23x	group2 = NULL
76	23x	group1.rows = NULL
77	23x	group2.rows = NULL
78
79	23x	set_params = list(...)
80	23x	set_params$enadata = accum
81
82		### make set if no group column is specified
83	23x	if(is.null(groupVar)) {
84	12x	if(runTest == TRUE) {
85	1x	warning("Group variable and groups not specified. Unable to run test")
86		}
87		}
88
89		### make set if group column is specified, but groups are not
90	11x	else if(is.null(groups) == TRUE) {
91	4x	unique.groups = unique(as.character(data[[groupVar]]))
92
93	4x	if(length(unique.groups) == 1) {
94	2x	warning("Group variable only contains one unique value. ENAset has been created without means rotation")
95
96	2x	if(runTest == TRUE) {
97	1x	warning("Multiple groups not specified. Unable to run test")
98		}
99		}
100
101		else{
102	2x	group1 = unique.groups[1]
103	2x	group2 = unique.groups[2]
104
105	2x	message(paste0("No groups specified. Defaulting to means rotation using first two unique group values of group variable: ",group1," and ",group2))
106
107	2x	set_params$rotation.by = ena.rotate.by.mean
108	2x	set_params$rotation.params = list(accum$meta.data[[groupVar]] == group1, accum$meta.data[[groupVar]] == group2)
109
110	2x	if(runTest == TRUE) {
111	1x	warning(paste0("No groups specified. Running test on the first two unique group values of the group variable: ",group1," and ",group2))
112		}
113		}
114		}
115	7x	else if(length(groups) == 1) {
116	2x	message("Only one group value specified. ENAset has been created without means rotation")
117
118	2x	if(runTest == TRUE) {
119	1x	warning("Multiple groups not specified. Unable to run test")
120		}
121		}
122		else {
123	5x	group1 = groups[1]
124	5x	group2 = groups[2]
125
126	5x	if(length(groups) > 2) {
127	2x	warning(paste0("Only two groups are allowed for means rotation. ENAset has been created using a means rotation on the first two groups given: ",group1," and ",group2))
128		}
129
130	5x	groups.missing = groups[which(!groups %in% data[[groupVar]])]
131	5x	if(length(groups.missing) > 0) {
132	1x	stop(paste("Group column does not contain supplied group value(s): ", groups.missing))
133		}
134
135	4x	if(runTest == TRUE) {
136	1x	if(length(groups) > 2) {
137	1x	warning(paste0("More than two groups specified. Running test on the first two groups: ",group1," and ",group2))
138		}
139		}
140		}
141
142	22x	if(!any(is.null(c(group1, group2)))) {
143	6x	set_params$rotation.by = ena.rotate.by.mean
144	6x	set_params$rotation.params = list(accum$meta.data[[groupVar]] == group1, accum$meta.data[[groupVar]] == group2)
145
146	6x	group1.rows = accum$meta.data[[groupVar]] == group1
147	6x	group2.rows = accum$meta.data[[groupVar]] == group2
148		}
149
150	22x	set = do.call(ena.make.set, set_params)
151
152		if(
153	22x	runTest == TRUE &&
154	22x	!any(is.null(c(group1.rows, group2.rows)))
155		) {
156	2x	group1.dim1 = as.matrix(set$points)[group1.rows,1]
157	2x	group2.dim1 = as.matrix(set$points)[group2.rows,1]
158	2x	group1.dim2 = as.matrix(set$points)[group1.rows,2]
159	2x	group2.dim2 = as.matrix(set$points)[group2.rows,2]
160
161	2x	set$tests = list(
162	2x	wilcox.test = list(
163	2x	test.dim1 = wilcox.test(x = group1.dim1, y = group2.dim1),
164	2x	test.dim2 = wilcox.test(x = group1.dim2, y = group2.dim2)
165		),
166	2x	t.test = list(
167	2x	test.dim1 = t.test(x = group1.dim1, y = group2.dim1),
168	2x	test.dim2 = t.test(x = group1.dim2, y = group2.dim2)
169		)
170		)
171		} else {
172	20x	set$tests = NULL
173		}
174
175	22x	return(set)
176		}

1		#' Title
2		#'
3		#' @param set TBD
4		#' @param dimension_name_1 TBD
5		#' @param dimension_name_2 TBD
6		#'
7		#' @return TBD
8		#' @export
9		move_nodes_to_unit_circle<-function(
10		set,
11		dimension_name_1 = colnames(as.matrix(set$rotation$nodes))[1],
12		dimension_name_2 = colnames(as.matrix(set$rotation$nodes))[2]
13		) {
14		# get node position on the specified two dimensions
15	!	dimension_names = c(dimension_name_1,dimension_name_2)
16	!	node_position = set$rotation$nodes[,..dimension_names]
17		# compute the length of each node position vector on the two dimensional plane
18	!	length_list = sqrt(node_position[,1]^2+node_position[,2]^2)
19		# compute the re-scaling coefficient for each non-zero node vector
20	!	non_zero_lengths = which(length_list!=0)
21	!	length_list[non_zero_lengths] = max(length_list)/length_list[non_zero_lengths]
22		# move nodes to the circle
23	!	for(i in non_zero_lengths)
24		{
25	!	set$rotation$nodes[[dimension_name_1]][i]=as.numeric(set$rotation$nodes[[dimension_name_1]][i]*length_list[i])
26	!	set$rotation$nodes[[dimension_name_2]][i]= as.numeric(set$rotation$nodes[[dimension_name_2]][i]*length_list[i])
27		}
28		# compute the node weights so that the centroids could be computed
29	!	codeNames = set$rotation$codes
30	!	row_counts = set$connection.counts
31	!	node_weights = data.frame(matrix(0,nrow=nrow(row_counts),ncol=length(codeNames)))
32
33	!	for(i in 1:(length(codeNames)-1))
34		{
35	!	for(j in (i+1):length(codeNames))
36		{
37	!	connection_name = paste0(codeNames[i]," & ",codeNames[j])
38	!	x = row_counts[,..connection_name]/2
39	!	node_weights[,i]=node_weights[,i]+x
40	!	node_weights[,j]=node_weights[,j]+x
41		}
42		}
43	!	rs = rowSums(node_weights)
44	!	rs_1 = which(rs!=0)
45	!	node_weights[rs_1,]=node_weights[rs_1,]/rs[rs_1]
46		# finally, recompute centroids
47	!	centroids = as.matrix(node_weights)%*%as.matrix(set$rotation$nodes)
48	!	for(j in 1:ncol(centroids))
49		{
50	!	set$model$centroids[,j+1] = centroids[,j]
51		}
52	!	return(set)
53		}
54
55
56		#' Title
57		#'
58		#' @param set TBD
59		#' @param dimension_name_1 TBD
60		#' @param dimension_name_2 TBD
61		#'
62		#' @return TBD
63		#' @export
64		move_nodes_to_unit_circle_with_equal_space <- function(
65		set,
66		dimension_name_1 = colnames(as.matrix(set$rotation$nodes))[1],
67		dimension_name_2 = colnames(as.matrix(set$rotation$nodes))[2]
68		) {
69		# get node position on the specified two dimensions
70	!	dimension_names = c(dimension_name_1,dimension_name_2)
71	!	node_position = set$rotation$nodes[,..dimension_names]
72
73		# compute the length of each node position vector on the two dimensional plane
74	!	length_list = sqrt(node_position[,1]^2+node_position[,2]^2)
75
76		# find non-zero node positions
77	!	non_zero_lengths = which(length_list!=0)
78	!	node_position_non_zero = node_position[non_zero_lengths,]
79
80		# divide the angle
81	!	rotation_angle = 2*pi/nrow(node_position_non_zero)
82
83		# order the nodes along the circle
84	!	node_position_non_zero$id = c(1:nrow(node_position_non_zero))
85	!	node_position_non_zero_upper = node_position_non_zero[which(node_position_non_zero[[dimension_name_2]]>=0),]
86	!	node_position_non_zero_lower = node_position_non_zero[which(node_position_non_zero[[dimension_name_2]]<0),]
87	!	node_position_non_zero_upper = node_position_non_zero_upper[order(node_position_non_zero_upper[[dimension_name_1]],decreasing = TRUE),]
88	!	node_position_non_zero_lower = node_position_non_zero_lower[order(node_position_non_zero_lower[[dimension_name_1]],decreasing = FALSE),]
89	!	node_position_non_zero_sorted = rbind(node_position_non_zero_upper,node_position_non_zero_lower)
90
91		# find which has the max length
92	!	max_i = which(length_list[non_zero_lengths]==max(length_list))[1]
93	!	first_i = which(node_position_non_zero_sorted$id==max_i)[1]
94
95		# find the coordinates of the fixed node
96	!	x1=node_position_non_zero_sorted[[dimension_name_1]][first_i];
97	!	y1=node_position_non_zero_sorted[[dimension_name_2]][first_i];
98
99		# rotate the ordered nodes
100	!	for(i in 1:nrow(node_position_non_zero_sorted)) {
101	!	ind = (first_i+i-1)%%nrow(node_position_non_zero_sorted)
102
103	!	if(ind==0) {
104	!	ind = nrow(node_position_non_zero_sorted)
105		}
106
107	!	angle = (i-1)*rotation_angle
108	!	x2 = x1cos(angle)-y1sin(angle)
109	!	y2 = x1sin(angle)+y1cos(angle)
110	!	node_position_non_zero_sorted[[dimension_name_1]][ind]=x2
111	!	node_position_non_zero_sorted[[dimension_name_2]][ind]=y2
112		}
113		# match the order of the original data
114	!	node_position_non_zero_sorted = node_position_non_zero_sorted[order(node_position_non_zero_sorted$id,decreasing = FALSE),]
115	!	node_position[non_zero_lengths,]=node_position_non_zero_sorted[,..dimension_names]
116	!	set$rotation$nodes[,dimension_names]=node_position
117
118		# compute the node weights so that the centroids could be computed
119	!	codeNames = set$rotation$codes
120	!	row_counts = set$connection.counts
121	!	node_weights = data.frame(matrix(0,nrow=nrow(row_counts),ncol=length(codeNames)))
122
123	!	for(i in 1:(length(codeNames)-1)) {
124	!	for(j in (i+1):length(codeNames)) {
125	!	connection_name = paste0(codeNames[i]," & ",codeNames[j])
126	!	x = row_counts[,..connection_name]/2
127	!	node_weights[,i]=node_weights[,i]+x
128	!	node_weights[,j]=node_weights[,j]+x
129		}
130		}
131
132	!	rs = rowSums(node_weights)
133	!	rs_1 = which(rs!=0)
134	!	node_weights[rs_1,]=node_weights[rs_1,]/rs[rs_1]
135
136		# finally, recompute centroids
137	!	centroids = as.matrix(node_weights)%*%as.matrix(set$rotation$nodes)
138
139	!	for(j in 1:ncol(centroids)) {
140	!	set$model$centroids[,j+1] = centroids[,j]
141		}
142
143	!	return(set);
144		}

1		#' @title with.ena.matrix
2		#' @description This function sets up a context using the provided data (typically an ENA matrix),
3		#' allowing the evaluation of an expression (`expr`) with access to both the matrix and
4		#' its metadata. Optionally, a custom matrix `V` and other arguments can be supplied.
5		#'
6		#' @param data An ENA matrix or data frame containing the data to be used.
7		#' @param expr An R expression to be evaluated within the context of the ENA matrix.
8		#' @param ... Additional arguments, including an optional custom matrix `V` and other parameters.
9		#'
10		#' @details
11		#' - If a custom matrix `V` is provided in `...`, it will be used; otherwise, `data` is converted to a matrix.
12		#' - Metadata columns are coerced to numeric if they are character vectors.
13		#' - The expression is evaluated with access to both the matrix (`V`) and metadata.
14		#'
15		#' @return The result of evaluating `expr` in the constructed context.
16		#'
17		#' @export
18		with.ena.matrix <- function(data, expr, ...) {
19	!	dot_args <- list(...);
20
21		# Points
22	!	V <- NULL;
23	!	if(length(dot_args) > 0 && !is.null(dot_args$V)) {
24	!	print("- using custom V matrix")
25	!	V <- dot_args$V;
26		}
27		else {
28	!	V <- as.matrix(data);
29		}
30
31		# Meta data
32	!	x <- unclass(data);
33	!	l <- lapply(x, function(i_val) {
34		# i_val <- get(i);
35	!	if(is.character(i_val)) {
36	!	i_val <- as.numeric(as.factor(i_val));
37		}
38	!	return(i_val);
39		});
40
41		# frm <- dot_args$frm;
42		# if(!is(frm, "formula")) {
43		# frm <- formula(frm);
44		# }
45
46	!	l$V <- V;
47		# with(l, {
48		# lm(formula = frm)
49		# })
50
51	!	ll <- c(l, dot_args);
52	!	eval(substitute(expr), ll, enclos = parent.frame());
53		# lm(formula = frm, data = l)
54		}
55
56		###
57		#' @title ENA Rotate by regression (second way)
58		#'
59		#' @description This function allows user to provide a regression formula for rotation on x and optionally on y.
60		#' If regression formula for y is not provide, svd is applied to the residual data deflated by x to get y coordinates.
61		#' The regression formula should use ENA points as major predictors and a binary or numerical variable as dependent variable.
62		#' Control and interaction variables are allowed to be included as predictors in the formula.
63		#'
64		#' @param enaset An \code{\link{ENAset}}
65		#' @param params list of parameters, may include:
66		#' x_var: Regression formula for x direction, such as "lm(formula= Condition ~ V + GameHalf + Condition : GameHalf)",
67		#' where V always stands for the ENA points.
68		#' y_var: Regression formula, similar to x_var for y direction (optional).
69		#'
70		#' @export
71		#' @return \code{\link{ENARotationSet}}
72		ena.rotate.by.hena.regression_2 = function( enaset, params ) {
73
74		# check arguments
75	!	if ( !is.list(params) \|\| is.null(params$x_var) ) {
76	!	stop("params must be provided as a list() and provide `x_var`")
77		}
78
79	!	x <- formula(params$x_var);
80
81	!	if (is.null(enaset$points.normed.centered)) {
82	!	p <- as.matrix(enaset$model$points.for.projection);
83		}
84		else {
85	!	p <- as.matrix(enaset$points.normed.centered);
86		}
87
88		#get variables
89	!	V <- as.matrix(p);
90	!	n <- ncol(V);
91
92		#regress to get v1 using x regression formula
93		# attach(enaset$meta.data,warn.conflicts = F)
94		# v1 <- eval(parse(text = x))$coefficients;
95		# v1_res <- with(enaset$model$points.for.projection, NULL, formula = x);
96	!	v1_res <- with.ena.matrix(enaset$model$points.for.projection, {
97	!	prm_var <- params$x_var;
98	!	prm <- if(is.character(prm_var))
99	!	prm_var
100		else
101	!	enquote(prm_var)
102		;
103	!	vars <- all.vars(formula(prm));
104	!	all_exists <- sapply(vars, function(x) x == "V" \|\| exists(x))
105	!	if(!all(all_exists)) {
106	!	stop(paste0("The following columns in the formula are not found in the unique metadata for the units: ", paste0(vars[!all_exists], collapse = ", ")))
107		}
108	!	lm(formula(prm));
109		});
110	!	v1 <- v1_res$coefficients;
111
112		# remove intercept
113	!	if(is.null(dim(v1))) {
114	!	v1 <- v1[2:(n+1)];
115		}
116		else {
117	!	v1 <- v1[2,];
118		}
119
120		# make v1 a unit vector
121	!	norm_v1 <- sqrt(sum(v1 * v1));
122	!	if (norm_v1 != 0) {
123	!	v1 <- v1 / norm_v1;
124		}
125
126		# name v1 vector
127	!	if(is.na(all.vars(x)[2])) {
128	!	xName <- names(v1)[1];
129		}
130		else {
131	!	xName <- all.vars(x)[2];
132		}
133
134		# Save v1
135	!	R <- matrix(c(v1), ncol = 1);
136	!	colnames(R) <- c(paste0(xName,"_reg"));
137
138		#deflate matrix by x dimension
139	!	A <- as.matrix(p);
140	!	defA <- as.matrix(A) - as.matrix(A) %% v1 %% t(v1);
141
142		#if y formula is given, regress by y formula
143	!	if (!is.null(params$y_var)) {
144	!	y <- formula(params$y_var);
145
146		# regress to get v2 vector using formula y
147	!	V <- defA;
148
149	!	v2_res <- with.ena.matrix(enaset$model$points.for.projection, {
150	!	prm_var <- params$y_var;
151	!	prm <- if(is.character(prm_var))
152	!	prm_var
153		else
154	!	enquote(prm_var)
155		;
156	!	vars <- all.vars(formula(prm));
157	!	all_exists <- sapply(vars, function(x) x == "V" \|\| exists(x))
158	!	if(!all(all_exists)) {
159	!	stop(paste0("The following columns in the formula are not found in the unique metadata for the units: ", paste0(vars[!all_exists], collapse = ", ")))
160		}
161	!	lm(formula(prm));
162		});
163	!	v2 <- v2_res$coefficients;
164	!	v2 <- v2[2:length(v2)];
165
166		#make v2 a unit vector
167	!	norm_v2 <- sqrt(sum(v2 * v2));
168
169	!	if (norm_v2 != 0) {
170	!	v2 <- v2 / norm_v2;
171		}
172
173		#name v2 vector
174	!	if(is.na(all.vars(y)[2])) {
175	!	yName <- names(v2)[1];
176		}
177		else {
178	!	yName <- all.vars(y)[2];
179		}
180
181		# save both v1 and v2
182	!	R <- cbind(v1, v2);
183	!	colnames(R) <- c(paste0(xName,"_reg"), paste0(yName,"_reg"));
184
185		#deflat by v2
186	!	defA <- as.matrix(defA) - as.matrix(defA) %% v2 %% t(v2);
187		}
188
189		# get svd for deflated points
190	!	svd_result <- prcomp(defA, retx=FALSE, scale=FALSE, center=FALSE, tol=0);
191	!	svd_v <- svd_result$rotation;
192
193		# Merge rotation vectors
194	!	vcount <- ncol(R);
195	!	colNamesR <- colnames(R);
196	!	combined <- cbind(R, svd_v[, 1:(ncol(svd_v) - vcount)]);
197	!	colnames(combined) <- c(
198	!	colNamesR,
199	!	paste0("SVD", ((vcount + 1):ncol(combined)))
200		);
201
202		#create rotation set
203	!	rotation_set <- ENARotationSet$new(
204	!	node.positions = NULL,
205	!	rotation = combined,
206	!	codes = enaset$rotation$codes,
207	!	eigenvalues = NULL
208		)
209
210	!	return(rotation_set);
211		}
212

1		#' @title ENAplot Class
2		#'
3		#' @description
4		#' The ENAplot R6 class provides a structure for visualizing ENAset objects using plotly.
5		#' It encapsulates the ENAset data, the plotly visualization, and related plotting parameters.
6		#'
7		#' @section Fields:
8		#' \describe{
9		#' \item{enaset}{The \code{\link{ENAset}} object from which the ENAplot was constructed.}
10		#' \item{plot}{The plotly object used for data visualization.}
11		#' \item{axes}{Axes information for the plot (TBD).}
12		#' \item{point}{Point information for the plot (TBD).}
13		#' \item{palette}{Color palette used for plotting (TBD).}
14		#' \item{plotted}{Indicates whether the plot has been rendered (TBD).}
15		#' }
16		#'
17		#' @examples
18		#' # Example usage:
19		#' # enaplot <- ENAplot$new(enaset = myENAset)
20		#'
21		#' @docType class
22		#' @importFrom R6 R6Class
23		#' @import data.table
24		#' @export
25		#'
26		#' @field enaset - The \code{\link{ENAset}} object from which the ENAplot was constructed
27		#' @field plot - The plotly object used for data visualization
28		#' @field axes A list or object specifying the axes configuration for the ENA plot, such as axis labels, limits, or scaling.
29		#' @field point A structure representing the data points to be plotted, including coordinates and visual properties.
30		#' @field palette A set of colors or a function defining the color scheme used for plotting elements in the ENA plot.
31		#' @field plotted A logical or status indicator showing whether the plot has been rendered or updated.
32		#' @field showticklabels Logical. Indicates whether to show tick labels on the axes.
33		#' @field autosize Logical. Indicates whether the plot should automatically resize.
34		#' @field automargin Logical. Indicates whether the plot should automatically adjust margins.
35		#' @field axispadding Numeric. Padding factor for the axes.
36		ENAplot = R6::R6Class("ENAplot",
37
38		public = list(
39
40		## Public Functions ----
41		#' Create ENApolot
42		#'
43		#' @param enaset An ENA set object containing the data to be plotted.
44		#' @param title The title of the plot.
45		#' @param dimension.labels Labels for the dimensions shown in the plot.
46		#' @param font.size Numeric value specifying the font size for plot text.
47		#' @param font.color Color value for the plot text.
48		#' @param font.family Font family to use for plot text.
49		#' @param scale.to Numeric value to scale the plot axes.
50		#' @param showticklabels Logical; whether to display axis tick labels.
51		#' @param autosize Logical; whether the plot should automatically size itself.
52		#' @param automargin Logical; whether the plot should automatically adjust margins.
53		#' @param axispadding Numeric value specifying padding around axes.
54		#' @param ... Additional arguments passed to the plotting function. #'
55		#'
56		#' @return ENAplot
57		initialize = function(
58		enaset = NULL,
59
60		title = "ENA Plot",
61
62		dimension.labels = c("",""),
63
64		font.size = 14,
65		font.color = "#000000",
66		font.family = "Arial",
67		scale.to = "network",
68		...
69		) {
70	35x	if (is(enaset, "ENAset")) {
71	1x	warning(paste0("Usage of ENAset objects will be deprecated ",
72	1x	"and potentially removed altogether in future versions."))
73
74	1x	enaset <- ena.set(enaset);
75		}
76
77	35x	code.cols = !colnames(enaset$line.weights) %in% colnames(enaset$meta.data)
78
79	35x	args = list(...);
80	35x	if(!is.null(args$multiplier)) {
81	1x	private$multiplier = args$multiplier
82		}
83	35x	if(!is.null(args$point.size)) {
84	1x	self$point$size = args$point.size
85		}
86	35x	if(!is.null(args$showticklabels)) {
87	!	self$showticklabels = args$showticklabels
88		}
89	35x	if(!is.null(args$axispadding)) {
90	!	self$axispadding = args$axispadding
91		}
92	35x	if(!is.null(args$autosize)) {
93	!	self$autosize = args$autosize
94		}
95	35x	if(!is.null(args$automargin)) {
96	!	self$automargin = args$automargin
97		}
98	35x	self$enaset <- list(
99	35x	connection.counts = data.table::copy(enaset$connection.counts),
100	35x	meta.data = data.table::copy(enaset$meta.data),
101	35x	model = list(
102	35x	model.type = enaset$model$model.type,
103	35x	raw.input = data.table::copy(enaset$model$raw.input),
104	35x	row.connection.counts = data.table::copy(enaset$model$row.connection.counts),
105	35x	unit.labels = enaset$model$unit.labels,
106	35x	points.for.projection = data.table::copy(enaset$model$points.for.projection),
107	35x	centroids = data.table::copy(enaset$model$centroids),
108	35x	variance = enaset$model$variance
109		),
110	35x	points = data.table::copy(enaset$points),
111	35x	line.weights = data.table::copy(enaset$line.weights),
112	35x	rotation = list(
113	35x	adjacency.key = data.table::copy(enaset$rotation$adjacency.key),
114	35x	codes = enaset$rotation$codes,
115	35x	rotation.matrix = data.table::copy(enaset$rotation$rotation.matrix),
116	35x	center.vec = enaset$rotation$center.vec,
117	35x	nodes = data.table::copy(enaset$rotation$nodes)
118		),
119	35x	plots = list()
120		);
121	35x	self$title <- title;
122
123	35x	private$dimension.labels <- dimension.labels;
124	35x	private$font.size <- font.size;
125	35x	private$font.color <- font.color;
126	35x	private$font.family <- font.family;
127	35x	private$font = list (
128	35x	size = private$font.size,
129	35x	color = private$font.color,
130	35x	family = private$font.family
131		);
132	35x	self$plot <- plotly::plot_ly(
133	35x	mode = "markers",
134	35x	type ="scatter"
135		);
136
137	35x	self$plot <- plotly::config(p = self$plot, displayModeBar = args$displayModeBar);
138
139	35x	if (is.list(scale.to)) {
140	3x	max.axis = max(abs(as.matrix(enaset$points)))*self$axispadding
141	3x	if(is.null(scale.to$x)) {
142	1x	axis.range.x = c(-max.axis, max.axis)
143		}
144		else {
145	2x	axis.range.x = scale.to$x
146		}
147	3x	if(is.null(scale.to$y)) {
148	1x	axis.range.y = c(-max.axis, max.axis)
149		}
150		else {
151	2x	axis.range.y = scale.to$y
152		}
153		}
154		else {
155	32x	if(is.character(scale.to) && scale.to == "points") {
156	1x	max.axis = max(abs(as.matrix(enaset$points)))*self$axispadding
157		}
158	31x	else if (is.numeric(scale.to)) {
159	1x	max.axis = tail(scale.to, 1)
160		}
161		else {
162	30x	max.axis = max(abs(as.matrix(enaset$rotation$nodes)))*self$axispadding;
163		}
164	32x	axis.range.x = axis.range.y = c(-max.axis, max.axis)
165		}
166
167	35x	graph.axis <- list(
168	35x	titlefont = private$font,
169	35x	showgrid = F,
170	35x	zeroline = T,
171	35x	showticklabels = self$showticklabels,
172	35x	showgrid = T
173		# range=c(-max.axis,max.axis)
174		);
175	35x	if(!is.null(args$ticks)) {
176	1x	graph.axis$showticklabels = T;
177	1x	graph.axis$ticks = args$ticks$location;
178	1x	graph.axis$tickcolor = args$ticks$color;
179	1x	graph.axis$tickangle = args$ticks$angle;
180		}
181	35x	self$axes$x = graph.axis
182	35x	self$axes$x$title = dimension.labels[1];
183	35x	self$axes$x$range = axis.range.x
184	35x	self$axes$y = graph.axis
185	35x	self$axes$y$title = dimension.labels[2];
186	35x	self$axes$y$range = axis.range.y
187
188	35x	self$plot = plotly::layout(
189	35x	self$plot,
190	35x	title = title,
191	35x	xaxis = self$axes$x,
192	35x	yaxis = self$axes$y,
193	35x	autosize = self$autosize,
194	35x	font = list (
195	35x	size = 12,
196	35x	color = private$font.color,
197	35x	family = private$font.family
198		)
199		);
200		},
201
202		#' Print ENA plot
203		#'
204		#' @return
205		print = function() {
206	!	print(self$plot);
207		},
208
209		#' Get property from object
210		#'
211		#' @param x character key to retrieve from object
212		#' @return value from object at x
213		get = function(x) {
214	382x	return(private[[x]])
215		},
216
217		## Public Properties ----
218		enaset = NULL,
219		title = "ENA Plot",
220		plot = NULL,
221		axes = list(
222		x = NULL, y = NULL
223		),
224		point = list(
225		size = 5
226		),
227		showticklabels = F,
228		autosize = F,
229		automargin = T,
230		axispadding = 1.2,
231		palette = c("#386CB0", "#F0027F", "#7FC97F", "#BEAED4",
232		"#FDC086","#FFFF99", "#BF5B17"),
233		plotted = list(
234		points = list(), networks = list(),
235		trajectories = list(), means = list()
236		)
237		),
238
239		private = list(
240		####
241		## Private Properties
242		####
243		dimension.labels = c("X","Y"),
244
245		font = list(),
246		font.size = 14,
247		font.color = "#000000",
248		font.family = "Arial",
249		#plot.color = I("black"),
250
251		multiplier = 5
252		####
253		## END: Private Properties
254		####
255		)
256		)

1		## ── qeviz interactive plot integration ───────────────────────────────────────
2		##
3		## Public API
4		## ena.plot.interactive() — create an interactive qeviz htmlwidget
5		## ena.export.html() — write a self-contained HTML file
6		## enaInteractiveOutput() — Shiny output binding
7		## renderEnaInteractive() — Shiny render function
8		##
9		## Internal helpers
10		## .ena_to_model_data() — convert ena.set to qeviz ModelData list
11		## .ena_frame() — build a QEFrame list from a data.frame
12		## .ena_group_ci() — 95% t-interval bounds per group
13		## .ena_group_outlier() — IQR-based outlier bounds per group
14		## ─────────────────────────────────────────────────────────────────────────────
15
16
17		# ── Internal helpers ──────────────────────────────────────────────────────────
18
19		#' Build a QEFrame list from a plain data.frame.
20		#' @noRd
21		.ena_frame <- function(df) {
22	!	list(
23	!	data = lapply(seq_len(nrow(df)), function(i) as.list(df[i, , drop = FALSE])),
24	!	types = as.list(setNames(
25	!	sapply(df, function(col) {
26	!	if (is.numeric(col)) "numeric"
27	!	else if (is.integer(col)) "integer"
28	!	else "character"
29		}),
30	!	names(df)
31		))
32		)
33		}
34
35		#' Compute per-group 95% CI bounding boxes (t-interval on group mean).
36		#' Returns a data.frame with columns: group, {dim}.low, {dim}.high for each dim.
37		#' @noRd
38		.ena_group_ci <- function(points_df, group_col, dim_cols, conf_level = 0.95) {
39	!	groups <- unique(points_df[[group_col]])
40	!	rows <- lapply(groups, function(g) {
41	!	sub <- points_df[points_df[[group_col]] == g, dim_cols, drop = FALSE]
42	!	n <- nrow(sub)
43	!	if (n < 2L) return(NULL)
44	!	means <- colMeans(sub, na.rm = TRUE)
45	!	sds <- apply(sub, 2, sd, na.rm = TRUE)
46	!	t_val <- qt((1 + conf_level) / 2, df = n - 1L)
47	!	row <- as.list(
48	!	c(
49	!	group = g,
50	!	setNames(
51	!	as.numeric(rbind(means - t_val * sds / sqrt(n),
52	!	means + t_val * sds / sqrt(n))),
53	!	as.vector(rbind(paste0(dim_cols, ".low"), paste0(dim_cols, ".high")))
54		)
55		)
56		)
57	!	as.data.frame(row, stringsAsFactors = FALSE)
58		})
59	!	rows <- Filter(Negate(is.null), rows)
60	!	if (length(rows) == 0L) return(NULL)
61	!	do.call(rbind, rows)
62		}
63
64		#' Compute per-group IQR-based outlier bounding boxes.
65		#' Returns a data.frame with columns: group, {dim}.low, {dim}.high for each dim.
66		#' @noRd
67		.ena_group_outlier <- function(points_df, group_col, dim_cols, iqr_factor = 1.5) {
68	!	groups <- unique(points_df[[group_col]])
69	!	rows <- lapply(groups, function(g) {
70	!	sub <- points_df[points_df[[group_col]] == g, dim_cols, drop = FALSE]
71	!	if (nrow(sub) < 1L) return(NULL)
72	!	row <- list(group = g)
73	!	for (d in dim_cols) {
74	!	q1 <- quantile(sub[[d]], 0.25, na.rm = TRUE)
75	!	q3 <- quantile(sub[[d]], 0.75, na.rm = TRUE)
76	!	iqr <- q3 - q1
77	!	row[[paste0(d, ".low")]] <- as.numeric(q1 - iqr_factor * iqr)
78	!	row[[paste0(d, ".high")]] <- as.numeric(q3 + iqr_factor * iqr)
79		}
80	!	as.data.frame(row, stringsAsFactors = FALSE)
81		})
82	!	rows <- Filter(Negate(is.null), rows)
83	!	if (length(rows) == 0L) return(NULL)
84	!	do.call(rbind, rows)
85		}
86
87		#' Convert an ena.set to the ModelData list expected by qeviz.
88		#' @noRd
89		.ena_to_model_data <- function(set,
90		group_col = NULL,
91		dim_cols = c("SVD1", "SVD2"),
92		include_ci = TRUE,
93		conf_level = 0.95,
94		iqr_factor = 1.5) {
95
96		# ── nodes ──────────────────────────────────────────────────────────────────
97	!	node_pos <- as.data.frame(set$rotation$nodes)[, c("code", dim_cols), drop = FALSE]
98	!	nodes <- .ena_frame(node_pos)
99
100		# ── edges ──────────────────────────────────────────────────────────────────
101		# connection.counts has metadata columns (ena.metadata class) followed by
102		# edge-weight columns (ena.co.occurrence class). Edge column names use the
103		# rENA " & " separator; qeviz expects "." — rename them here.
104	!	cc <- as.data.frame(set$connection.counts)
105	!	is_edge <- sapply(cc, function(x) inherits(x, "ena.co.occurrence"))
106	!	edge_cc <- cc[, is_edge, drop = FALSE]
107	!	names(edge_cc) <- gsub(" & ", ".", names(edge_cc), fixed = TRUE)
108	!	edge_cc$QEUNIT <- as.character(cc$ENA_UNIT)
109	!	edge_cc <- edge_cc[, c("QEUNIT", setdiff(names(edge_cc), "QEUNIT")), drop = FALSE]
110	!	edges <- .ena_frame(edge_cc)
111
112		# ── points ─────────────────────────────────────────────────────────────────
113	!	pts <- as.data.frame(set$points)
114	!	keep_cols <- c("ENA_UNIT", group_col, dim_cols)
115	!	pts <- pts[, keep_cols[keep_cols %in% names(pts)], drop = FALSE]
116	!	names(pts)[names(pts) == "ENA_UNIT"] <- "QEUNIT"
117	!	if (!is.null(group_col) && group_col %in% names(pts)) {
118	!	pts[[group_col]] <- as.character(pts[[group_col]])
119		}
120	!	for (d in dim_cols) {
121	!	if (d %in% names(pts)) pts[[d]] <- as.numeric(pts[[d]])
122		}
123	!	points <- .ena_frame(pts)
124
125	!	result <- list(
126	!	nodes = nodes,
127	!	edges = edges,
128	!	points = points,
129	!	updated = as.numeric(Sys.time()) * 1000,
130	!	id_col = "QEUNIT",
131	!	node_id_col = "code",
132	!	x_col = dim_cols[1L],
133	!	y_col = dim_cols[2L],
134	!	group_col = group_col
135		)
136
137		# ── groups frame — pre-computed means + optional CI bounds ────────────────
138	!	if (!is.null(group_col) && group_col %in% names(pts)) {
139	!	groups_unique <- unique(pts[[group_col]])
140
141	!	means_rows <- lapply(groups_unique, function(g) {
142	!	sub <- pts[pts[[group_col]] == g, dim_cols, drop = FALSE]
143	!	means <- colMeans(sub, na.rm = TRUE)
144	!	as.data.frame(
145	!	as.list(c(group = g, setNames(as.numeric(means), dim_cols))),
146	!	stringsAsFactors = FALSE
147		)
148		})
149	!	groups_df <- do.call(rbind, Filter(Negate(is.null), means_rows))
150
151	!	if (include_ci) {
152	!	ci_df <- .ena_group_ci(pts, group_col, dim_cols, conf_level)
153	!	if (!is.null(ci_df)) {
154	!	groups_df <- merge(groups_df, ci_df, by = "group", all.x = TRUE)
155	!	groups_df <- groups_df[match(groups_unique, groups_df$group), , drop = FALSE]
156		}
157		}
158
159	!	result$groups <- .ena_frame(groups_df)
160
161		# Deprecated: outlier frame retained for backward compat
162	!	out_df <- .ena_group_outlier(pts, group_col, dim_cols, iqr_factor)
163	!	if (!is.null(out_df)) result$outlier <- .ena_frame(out_df)
164		}
165
166	!	result
167		}
168
169
170		# ── Public API ────────────────────────────────────────────────────────────────
171
172		#' Interactive ENA plot using qeviz
173		#'
174		#' Renders an interactive ENA plot inside RStudio, R Markdown / Quarto, and
175		#' Shiny using the qeviz visualization library.
176		#'
177		#' @param set An \code{\link{ena.make.set}} result.
178		#' @param group_col Character. Name of the grouping column in \code{set$points}
179		#' (e.g. \code{"Condition"}). Controls point colours and group
180		#' mean networks.
181		#' @param group Character. Which group's mean network to display. Defaults
182		#' to the first group.
183		#' @param unit Character. A specific unit ID to display its individual
184		#' network instead of a group mean.
185		#' @param compare Character. Second group or unit for a subtraction view
186		#' (\code{group} minus \code{compare}).
187		#' @param also Character. Second group for an overlay view (both networks
188		#' drawn simultaneously).
189		#' @param dim_cols Character vector of two dimension names to plot.
190		#' Default \code{c("SVD1", "SVD2")}.
191		#' @param label_nodes \code{"on"} \| \code{"off"} \| \code{"auto"} \| \code{"click"}.
192		#' Visibility of code-node labels. Default \code{"on"}.
193		#' @param label_means Visibility of group-mean labels. Default \code{"on"}.
194		#' @param label_points Visibility of unit-point labels. Default \code{"off"}.
195		#' @param confidence Logical. Include 95\% CI bounds in the groups frame.
196		#' Default \code{TRUE}.
197		#' @param outlier Logical. Draw IQR-based outlier boxes. Default \code{TRUE}.
198		#' @param scale_points Logical. Rescale unit points to match the node coordinate
199		#' space. Default \code{TRUE}.
200		#' @param conf_level Numeric. Confidence level for CI boxes. Default \code{0.95}.
201		#' @param iqr_factor Numeric. IQR multiplier for outlier boxes. Default \code{1.5}.
202		#' @param width,height Widget dimensions in pixels. \code{NULL} uses htmlwidgets
203		#' sizing policy defaults (700 × 650).
204		#'
205		#' @return An \code{htmlwidget} object that renders in RStudio Viewer, R Markdown,
206		#' Quarto, and Shiny.
207		#'
208		#' @examples
209		#' \dontrun{
210		#' data(RS.data)
211		#' codeNames <- c("Data", "Technical.Constraints", "Performance.Parameters",
212		#' "Client.and.Consultant.Requests", "Design.Reasoning", "Collaboration")
213		#' accum <- ena.accumulate.data(
214		#' units = RS.data[, c("UserName", "Condition")],
215		#' conversation = RS.data[, c("Condition", "GroupName")],
216		#' codes = RS.data[, codeNames],
217		#' window.size.back = 4
218		#' )
219		#' set <- ena.make.set(enadata = accum)
220		#'
221		#' # Basic plot coloured by Condition
222		#' ena.plot.interactive(set, group_col = "Condition")
223		#'
224		#' # Show only FirstGame mean network
225		#' ena.plot.interactive(set, group_col = "Condition", group = "FirstGame")
226		#'
227		#' # Subtraction: FirstGame minus SecondGame
228		#' ena.plot.interactive(set, group_col = "Condition",
229		#' group = "FirstGame", compare = "SecondGame")
230		#' }
231		#'
232		#' @export
233		ena.plot.interactive <- function(
234		set,
235		group_col = NULL,
236		group = NULL,
237		unit = NULL,
238		compare = NULL,
239		also = NULL,
240		dim_cols = c("SVD1", "SVD2"),
241		label_nodes = "on",
242		label_means = "on",
243		label_points = "off",
244		confidence = TRUE,
245		outlier = TRUE,
246		scale_points = TRUE,
247		conf_level = 0.95,
248		iqr_factor = 1.5,
249		width = NULL,
250		height = NULL
251		) {
252	!	if (!requireNamespace("htmlwidgets", quietly = TRUE)) {
253	!	stop("The 'htmlwidgets' package is required. Install with: install.packages('htmlwidgets')")
254		}
255
256	!	model <- .ena_to_model_data(
257	!	set,
258	!	group_col = group_col,
259	!	dim_cols = dim_cols,
260	!	include_ci = isTRUE(confidence),
261	!	conf_level = conf_level,
262	!	iqr_factor = iqr_factor
263		)
264
265	!	x <- list(
266	!	model = model,
267	!	options = list(
268	!	group = group,
269	!	unit = unit,
270	!	compare = compare,
271	!	also = also,
272	!	labelNodes = label_nodes,
273	!	labelMeans = label_means,
274	!	labelPoints = label_points,
275	!	outlier = if (isFALSE(outlier)) "false" else NULL,
276	!	scalePoints = if (isFALSE(scale_points)) "false" else NULL
277		)
278		)
279
280	!	htmlwidgets::createWidget(
281	!	name = "qeviz",
282	!	x = x,
283	!	width = width,
284	!	height = height,
285	!	package = "rENA",
286	!	sizingPolicy = htmlwidgets::sizingPolicy(
287	!	viewer.padding = 5,
288	!	browser.fill = TRUE,
289	!	knitr.figure = FALSE,
290	!	knitr.defaultWidth = 700,
291	!	knitr.defaultHeight = 650
292		)
293		)
294		}
295
296
297		#' Export a self-contained interactive ENA plot as HTML
298		#'
299		#' Writes a single \code{.html} file containing the qeviz bundle and embedded
300		#' model data. No R, no Python, and no server are required to open the file —
301		#' share it with collaborators, attach it to a paper submission, or archive it
302		#' as supplementary material.
303		#'
304		#' @param set An \code{\link{ena.make.set}} result.
305		#' @param file Output file path. Default \code{"ena_plot.html"}.
306		#' @param group_col Character. Grouping column in \code{set$points}.
307		#' @param ... Additional arguments passed to \code{\link{ena.plot.interactive}}
308		#' (e.g. \code{group}, \code{compare}, \code{label_nodes}).
309		#' @param width,height Plot dimensions in pixels. Default 700 × 600.
310		#' @param selfcontained Logical. Inline the qeviz bundle in the HTML file.
311		#' Default \code{TRUE}. Set to \code{FALSE} to reference the
312		#' bundle via a relative path (smaller file, not portable).
313		#'
314		#' @return The resolved absolute path of the written file (invisibly).
315		#'
316		#' @examples
317		#' \dontrun{
318		#' set <- ena.make.set(enadata = accum)
319		#' ena.export.html(set, "model.html", group_col = "Condition")
320		#' }
321		#'
322		#' @export
323		ena.export.html <- function(
324		set,
325		file = "ena_plot.html",
326		group_col = NULL,
327		...,
328		width = 700L,
329		height = 600L,
330		selfcontained = TRUE
331		) {
332	!	if (!requireNamespace("htmlwidgets", quietly = TRUE)) {
333	!	stop("The 'htmlwidgets' package is required. Install with: install.packages('htmlwidgets')")
334		}
335
336	!	widget <- ena.plot.interactive(
337	!	set,
338	!	group_col = group_col,
339	!	width = width,
340	!	height = height,
341		...
342		)
343
344	!	abs_file <- normalizePath(file, mustWork = FALSE)
345	!	htmlwidgets::saveWidget(widget, abs_file, selfcontained = selfcontained)
346	!	message("Written: ", abs_file)
347	!	invisible(abs_file)
348		}
349
350
351		#' Shiny output binding for interactive ENA plots
352		#'
353		#' @param outputId Shiny output ID.
354		#' @param width,height CSS dimensions. Defaults: \code{"100\%"}, \code{"600px"}.
355		#' @export
356		enaInteractiveOutput <- function(outputId, width = "100%", height = "600px") {
357	!	htmlwidgets::shinyWidgetOutput(outputId, "qeviz", width, height, package = "rENA")
358		}
359
360
361		#' Shiny render function for interactive ENA plots
362		#'
363		#' @param expr Expression that returns an \code{\link{ena.plot.interactive}} widget.
364		#' @param env Environment for \code{expr}. Default: \code{parent.frame()}.
365		#' @param quoted Logical. Is \code{expr} already quoted? Default \code{FALSE}.
366		#' @export
367		renderEnaInteractive <- function(expr, env = parent.frame(), quoted = FALSE) {
368	!	if (!quoted) expr <- substitute(expr)
369	!	htmlwidgets::shinyRenderWidget(expr, enaInteractiveOutput, env, quoted = TRUE)
370		}

1		##
2		#' @title Generate ENA Set
3		#'
4		#' @description Generates an ENA model by constructing a dimensional reduction of adjacency (co-occurrence) vectors in an ENA data object
5		#'
6		#' @details This function generates an ENAset object from an ENAdata object. Takes
7		#' the adjacency (co-occurrence) vectors from enadata, computes a dimensional
8		#' reduction (projection), and calculates node positions in the projected ENA
9		#' space. Returns location of the units in the projected space, as well as
10		#' locations for node positions, and normalized adjacency (co-occurrence) vectors
11		#' to construct network graphs
12		#'
13		#' @export
14		#'
15		#' @param enadata \code{\link{ENAdata}} that will be used to generate an ENA model
16		#' @param dimensions The number of dimensions to include in the dimensional reduction
17		#' @param norm.by A function to be used to normalize adjacency (co-occurrence) vectors before computing the dimensional reduction, default: sphere_norm_c()
18		#' @param rotation.by A function to be used to compute the dimensional reduction, default: ena.svd()
19		#' @param rotation.params (optional) A character vector containing additional parameters for the function in rotation.by, if needed
20		#' @param rotation.set A previously-constructed ENARotationSet object to use for the dimensional reduction
21		#' @param endpoints.only A logical variable which determines whether to only show endpoints for trajectory models
22		#' @param center.align.to.origin A logical variable when TRUE (default) determines aligns both point center and centroid center to the origin
23		#' @param node.position.method A function to be used to determine node positions based on the dimensional reduction, default: lws.position.es()
24		#' @param as.list R6 objects will be deprecated, but if this is TRUE, the original R6 object will be returned, otherwise a list with class `ena.set`
25		#' @param ... additional parameters addressed in inner function
26		#'
27		#' @examples
28		#' data(RS.data)
29		#'
30		#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
31		#' 'Client.and.Consultant.Requests','Design.Reasoning','Collaboration');
32		#'
33		#' accum = ena.accumulate.data(
34		#' units = RS.data[,c("UserName","Condition")],
35		#' conversation = RS.data[,c("Condition","GroupName")],
36		#' metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre","CONFIDENCE.Post")],
37		#' codes = RS.data[,codeNames],
38		#' window.size.back = 4
39		#' )
40		#'
41		#' set = ena.make.set(
42		#' enadata = accum
43		#' )
44		#'
45		#' set.means.rotated = ena.make.set(
46		#' enadata = accum,
47		#' rotation.by = ena.rotate.by.mean,
48		#' rotation.params = list(
49		#' accum$meta.data$Condition=="FirstGame",
50		#' accum$meta.data$Condition=="SecondGame"
51		#' )
52		#' )
53		#'
54		#' @seealso \code{\link{ena.accumulate.data}}, \code{\link{ENAset}}
55		#'
56		#' @return \code{\link{ENAset}} class object that can be further processed for analysis or plotting
57		##
58		ena.make.set <- function(
59		enadata,
60		dimensions = 2,
61		norm.by = fun_sphere_norm,
62		rotation.by = ena.svd,
63		rotation.params = NULL,
64		rotation.set = NULL,
65		endpoints.only = TRUE,
66		center.align.to.origin = TRUE,
67		node.position.method = lws.positions.sq,
68		as.list = TRUE,
69		...
70		) {
71	69x	if (as.list == F) {
72	14x	warning(paste0("Usage of ENAdata and ENAset objects will be deprecated ",
73	14x	"and potentially removed altogether in future versions."))
74
75	14x	if (!is(enadata, "ENAdata")) {
76	1x	stop(paste0("Use of ena.make.set with as.list=FALSE requires `enadata` ",
77	1x	"be an ENAdata object. Re-run the accumulation with as.list=FALSE"))
78		}
79
80	13x	set <- ENAset$new(
81	13x	enadata = enadata,
82	13x	dimensions = dimensions,
83	13x	rotation.by = ifelse(
84	13x	!is.null(rotation.by) && identical(rotation.by, ena.svd),
85	13x	ena.svd.R6,
86	13x	rotation.by
87		),
88	13x	rotation.params = rotation.params,
89	13x	rotation.set = rotation.set,
90	13x	norm.by = norm.by,
91	13x	node.position.method = ifelse(
92	13x	identical(node.position.method, lws.positions.sq),
93	13x	lws.positions.sq.R6,
94	13x	node.position.method
95		),
96	13x	endpoints.only = endpoints.only,
97	13x	center.align.to.origin = center.align.to.origin,
98		...
99		)
100	13x	return(set$process());
101		}
102		else {
103	55x	if ("ENAdata" %in% class(enadata)) {
104	1x	warning(paste0("Usage of ENAdata objects will be deprecated and ",
105	1x	"potentially removed altogether in future versions. See ",
106	1x	"ena.accumulate.data() or ena.set()."))
107
108	1x	enadata <- ena.set(enadata)
109		}
110
111	55x	enadata$`_function.params`$center.align.to.origin <- center.align.to.origin;
112	55x	enadata$`_function.params`$rotation.by <- rotation.by;
113	55x	enadata$`_function.params`$rotation.params <- rotation.params;
114
115		###
116		# Convert the string vector of code names to their corresponding
117		# co-occurence names
118		#####
119	55x	code_columns <- svector_to_ut(enadata$rotation$codes)
120
121		###
122		# Normalize the raw data using self$function.params$norm.by,
123		# which defaults to calling rENA::dont_sphere_norm_c
124		#####
125	55x	line.weights <- norm.by(as.matrix(enadata$connection.counts))
126	55x	colnames(line.weights) <- code_columns
127
128	55x	line.weights.dt <- data.table::as.data.table(line.weights)
129	55x	for (i in seq(ncol(line.weights.dt)))
130	810x	set(line.weights.dt, j = i,
131	810x	value = as.ena.co.occurrence(line.weights.dt[[i]]))
132
133	55x	enadata$line.weights <- cbind(enadata$meta.data, line.weights.dt)
134	55x	class(enadata$line.weights) <- c("ena.line.weights", "ena.matrix",
135	55x	class(enadata$line.weights))
136		#####
137
138		###
139		# Center the normed data
140		#####
141		# if ( inherits(rotation.set, "ena.rotation.set") ) {
142
143		# if ( !is.null(rotation.by) && is.null(rotation.set) ) {
144		# points.for.projection <- center_data_c(line.weights)
145		# }
146	55x	if ( !is.null(rotation.set) ) {
147	5x	if( inherits(rotation.set, "ena.rotation.set") ) {
148	4x	if(center.align.to.origin) {
149	4x	points.for.projection <- line.weights
150
151	4x	non_zero_rows <- rowSums(as.matrix(line.weights)) != 0;
152	4x	if(sum(non_zero_rows) > 0) {
153	4x	points.for.projection[rowSums(as.matrix(line.weights))!=0,] <- center.projection(lws = line.weights[rowSums(as.matrix(line.weights))!=0,], rotation = rotation.set);
154		}
155		else {
156	!	stop("There were no co-occurrences of codes for any of the units within the model as defined.");
157		}
158		}
159		else {
160	!	points.for.projection <- center.projection(lws = line.weights, rotation = rotation.set);
161		}
162		}
163		else {
164	1x	stop("Supplied rotation.set is not an instance of ENARotationSet");
165		}
166		}
167		else {
168	50x	if(center.align.to.origin) {
169	49x	points.for.projection <- line.weights
170
171	49x	non_zero_rows <- rowSums(as.matrix(line.weights))!=0;
172	49x	if(sum(non_zero_rows) > 0) {
173	49x	points.for.projection[rowSums(as.matrix(line.weights))!=0,] <- center_data_c(line.weights[rowSums(as.matrix(line.weights))!=0,])
174		}
175		else {
176	!	stop("There were no co-occurrences of codes for any of the units within the model as defined.");
177		}
178		}
179		else {
180	1x	points.for.projection <- center_data_c(line.weights)
181		}
182		}
183
184	53x	colnames(points.for.projection) <- code_columns;
185	53x	enadata$model$points.for.projection = data.table::as.data.table(points.for.projection)
186	53x	for (i in seq(ncol(enadata$model$points.for.projection))) {
187	780x	set(
188	780x	enadata$model$points.for.projection,
189	780x	j = i,
190	780x	value = as.ena.co.occurrence(enadata$model$points.for.projection[[i]])
191		)
192		}
193	53x	enadata$model$points.for.projection <- as.ena.matrix(cbind(
194	53x	enadata$meta.data,
195	53x	enadata$model$points.for.projection
196	53x	), "ena.points")
197		#####
198
199		###
200
201		###
202		# Generate and Assign the rotation set
203		#####
204	53x	if (!is.null(rotation.by) && is.null(rotation.set)) {
205	49x	rotation <- do.call(rotation.by, list(enadata, rotation.params))
206		# added by Carl, 2026.1.6
207	48x	if(is.null(rotation))
208		{
209	!	stop("Unable to create a rotation set")
210		}
211		#
212	48x	enadata$rotation.matrix <- as.data.table(rotation$rotation, keep.rownames = "codes")
213	48x	for (i in seq(ncol(enadata$rotation.matrix))) {
214	753x	if(i == 1) {
215	48x	set(enadata$rotation.matrix,
216	48x	j = i, value = as.ena.metadata(enadata$rotation.matrix[[i]])
217		)
218		}
219		else {
220	705x	set(enadata$rotation.matrix,
221	705x	j = i, value = as.ena.dimension(enadata$rotation.matrix[[i]])
222		)
223		}
224		}
225	48x	class(enadata$rotation.matrix) <- c("ena.rotation.matrix", class(enadata$rotation.matrix))
226
227	48x	enadata$rotation$rotation.matrix <- enadata$rotation.matrix
228	48x	enadata$rotation$eigenvalues <- rotation$eigenvalues;
229	48x	if(center.align.to.origin) {
230	47x	enadata$rotation$center.vec = colMeans(line.weights[rowSums(as.matrix(line.weights))!=0,]) # ADD CENTERING VEC HERE
231		}
232		else {
233	1x	enadata$rotation$center.vec = colMeans(line.weights) # ADD CENTERING VEC HERE
234		}
235		}
236	4x	else if (!is.null(rotation.set)) {
237	3x	if (is(rotation.set, "ena.rotation.set")) {
238	3x	enadata$rotation.matrix <- rotation.set$rotation.matrix
239	3x	enadata$rotation$rotation.matrix <- rotation.set$rotation.matrix
240	3x	enadata$rotation$nodes <- rotation.set$nodes;
241	3x	enadata$rotation$eigenvalues <- rotation.set$eigenvalues
242	3x	enadata$rotation$center.vec = rotation.set$center.vec # ADD CENTERING VEC HERE
243		}
244		else {
245	!	stop("Supplied rotation.set is not an instance of ENARotationSet")
246		}
247		}
248		else {
249	1x	stop("Unable to find or create a rotation set")
250		}
251		#####
252
253		###
254		# Generate the rotated points
255		#####
256	51x	if (!is.null(enadata$rotation.matrix)) {
257	50x	points <- points.for.projection %*% as.matrix(enadata$rotation.matrix)
258	50x	points.dt <- as.data.table(points)
259	50x	for (i in seq(ncol(points.dt))) {
260	735x	set(points.dt, j = i, value = as.ena.dimension(points.dt[[i]]))
261		}
262	50x	if(grepl(x = enadata$model$model.type, pattern = "Trajectory")) {
263	2x	enadata$points <- cbind(enadata$trajectories, points.dt)
264		}
265		else {
266	48x	enadata$points <- cbind(enadata$meta.data, points.dt)
267		}
268	50x	enadata$points <- as.ena.matrix(enadata$points, "ena.points")
269		}
270		else {
271	1x	stop(paste0("There is no rotation matrix, if you supplied a custom ",
272	1x	"rotation.set, be sure it contains a rotation.matrix"))
273		}
274		#####
275
276		###
277		# Calculate node positions
278		# - The supplied methoed is responsible is expected to return a list
279		# with two keys, "node.positions" and "centroids"
280		#####
281	50x	if (exists("rotation") && !is.null(rotation) && is.null(rotation.set)) {
282	48x	positions <- node.position.method(enadata)
283
284	48x	if (all(names(positions) %in% c("node.positions", "centroids"))) {
285	47x	enadata$rotation$nodes <- as.data.table(positions$node.positions)
286	47x	colnames(enadata$rotation$nodes) <- colnames(points)
287	47x	rownames(enadata$rotation$nodes) <- enadata$rotation$codes
288
289	47x	for (i in seq(ncol(enadata$rotation$nodes))) {
290	690x	set(enadata$rotation$nodes, j = i,
291	690x	value = as.ena.dimension(enadata$rotation$nodes[[i]]))
292		}
293	47x	enadata$rotation$nodes <- data.table(
294	47x	code = structure(enadata$rotation$codes,
295	47x	class = c("code", class(enadata$rotation$codes))),
296	47x	enadata$rotation$nodes
297		)
298	47x	class(enadata$rotation$nodes) = c("ena.nodes",
299	47x	class(enadata$rotation$nodes))
300
301	47x	enadata$model$centroids <- as.data.table(positions$centroids)
302	47x	for (i in seq(ncol(enadata$model$centroids))) {
303	690x	set(enadata$model$centroids, j = i,
304	690x	value = as.ena.dimension(enadata$model$centroids[[i]])
305		)
306		}
307	47x	colnames(enadata$model$centroids) <- colnames(as.matrix(enadata$rotation.matrix))
308	47x	enadata$model$centroids = cbind(
309	47x	data.table(unit = enadata$model$unit.labels),
310	47x	enadata$model$centroids
311		)
312	47x	set(enadata$model$centroids, j = 1L,
313	47x	value = as.ena.metadata(enadata$model$centroids[[1L]])
314		)
315	47x	enadata$model$centroids <- as.ena.matrix(enadata$model$centroids)
316		}
317		else {
318	1x	stop(paste0("The node position method didn't return back the ",
319	1x	"expected objects:\n",
320	1x	"\tExpected: c('node.positions','centroids')\n",
321	1x	"\tReceived: ", names(positions), sep = ""))
322		}
323	2x	} else if (!is.null(rotation.set)) {
324	2x	enadata$rotation$nodes <- rotation.set$nodes
325		}
326
327	49x	if (is.null(enadata$rotation$nodes)) {
328	1x	stop("Unable to determine the node positions either by calculating
329	1x	them using `node.position.method` or using a supplied
330	1x	`rotation.set`")
331		}
332		#####
333
334		###
335		# Variance
336		#####
337	48x	var_rot_data <- var(points)
338	48x	diagonal_variance <- as.vector(diag(var_rot_data))
339	48x	enadata$model$variance <- diagonal_variance / sum(diagonal_variance)
340	48x	names(enadata$model$variance) <- colnames(enadata$rotation$rotation.matrix)[-1]
341		#####
342
343	48x	enadata$plots <- list() #default = ena.plot(enadata, ...))
344		# class(enadata$model$plot) <- c("ena.plot", class(enadata$model$plot))
345
346	48x	enadata$`_function.params`$norm.by <- norm.by
347	48x	return(enadata)
348		}
349		}

1		#####
2		#' @title Plot of ENA trajectories
3		#'
4		#' @description Function used to plot trajectories
5		#'
6		#' @export
7		#'
8		#' @param enaplot \code{\link{ENAplot}} object to use for plotting
9		#' @param points dataframe of matrix - first two column are X and Y coordinates, each row is a point in a trajectory
10		#' @param by vector used to subset points into individual trajectories, length nrow(points)
11		#' @param names character vector - labels for each trajectory of points, length length(unique(by))
12		#' @param labels character vector - point labels, length nrow(points)
13		#' @param labels.show A character choice: Always, Hover, Both. Default: Both
14		# @param confidence.interval A character that determines which confidence interval type to use, choices: none, box, crosshair, default: none
15		# @param outlier.interval A character that determines which outlier interval type to use, choices: none, box, crosshair, default: none
16		# @param confidence.interval.values A matrix/dataframe where columns are CI x and y values for each point
17		# @param outlier.interval.values A matrix/dataframe where columns are OI x and y values for each point
18		#' @param colors A character vector, that determines marker color, default NULL results in
19		#' alternating random colors. If single color is supplied, it will be used for all
20		#' trajectories, otherwise the length of the supplied color vector should be equal
21		#' to the length of the supplied names (i.e a color for each trajectory being plotted)
22		#' @param shape A character which determines the shape of markers, choices: square, triangle, diamond, circle, default: circle
23		#' @param label.offset A numeric vector of an x and y value to offset labels from the coordinates of the points
24		#' @param label.font.size An integer which determines the font size for labels, default: enaplot$font.size
25		#' @param label.font.color A character which determines the color of label font, default: enaplot$font.color
26		#' @param label.font.family A character which determines font type, choices: Arial, Courier New, Times New Roman, default: enaplot$font.family
27		#' @param default.hidden A logical indicating if the trajectories should start hidden (click on the legend to show them) Default: FALSE
28		#'
29		#' @seealso \code{\link{ena.plot}}
30		#'
31		#' @examples
32		#' data(RS.data)
33		#'
34		#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
35		#' 'Client.and.Consultant.Requests','Design.Reasoning','Collaboration');
36		#'
37		#' accum = ena.accumulate.data(
38		#' units = RS.data[,c("UserName","Condition")],
39		#' conversation = RS.data[,c("GroupName","ActivityNumber")],
40		#' metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre","CONFIDENCE.Post","C.Change")],
41		#' codes = RS.data[,codeNames],
42		#' window.size.back = 4,
43		#' model = "A"
44		#' );
45		#'
46		#' set = ena.make.set(accum);
47		#'
48		#' ### get mean network plots
49		#' first.game.lineweights = as.matrix(set$line.weights$Condition$FirstGame)
50		#' first.game.mean = colMeans(first.game.lineweights)
51		#'
52		#' second.game.lineweights = as.matrix(set$line.weights$Condition$SecondGame)
53		#' second.game.mean = colMeans(second.game.lineweights)
54		#'
55		#' subtracted.network = first.game.mean - second.game.mean
56		#'
57		#' # Plot dimension 1 against ActivityNumber metadata
58		#' dim.by.activity = cbind(
59		#' as.matrix(set$points)[,1],
60		#' set$trajectories$ActivityNumber * .8/14-.4 #scale down to dimension 1
61		#' )
62		#'
63		#' plot = ena.plot(set)
64		#' plot = ena.plot.network(plot, network = subtracted.network, legend.name="Network")
65		#' plot = ena.plot.trajectory(
66		#' plot,
67		#' points = dim.by.activity,
68		#' names = unique(set$model$unit.label),
69		#' by = set$trajectories$ENA_UNIT
70		#' );
71		#' print(plot)
72		#'
73		#' @return The \code{\link{ENAplot}} provided to the function, with its plot updated to include the trajectories
74		#####
75		ena.plot.trajectory = function(
76		enaplot,
77		points,
78		by = NULL,
79		labels = NULL, #unique(enaplot$enaset$enadata$units),
80		labels.show = c("Always","Hover","Both"),
81		names = NULL,
82		label.offset = NULL,
83		label.font.size = enaplot$get("font.size"),
84		label.font.color = enaplot$get("font.color"),
85		label.font.family = c("Arial", "Courier New", "Times New Roman"),
86		shape = c("circle", "square", "triangle-up", "diamond"),
87		colors = NULL,
88		default.hidden = F
89		) {
90	!	if(!is.character(label.font.family)) {
91	!	label.font.size = enaplot$get("font.family");
92		}
93	!	labels.show <- match.arg(labels.show);
94	!	shape <- match.arg(shape);
95
96	!	if(is.null(by)) {
97	!	by <- list(all = rep(T, nrow(points)));
98		}
99	!	if(!is(points, "data.table")) {
100	!	points <- data.table::as.data.table(points);
101		}
102	!	if(length(colors) == 1)
103	!	colors <- rep(colors, length(names))
104
105	!	mode <- "lines+markers+text";
106	!	hoverinfo <- "x+y";
107	!	tbl <- data.table::data.table(points);
108	!	if (!is.null(labels)) {
109	!	if (labels.show %in% c("Always","Both"))
110	!	mode <- paste0(mode,"+text");
111	!	if (labels.show %in% c("Hover","Both"))
112	!	hoverinfo <- paste0(hoverinfo,"+text");
113
114	!	tbl = data.table::data.table(points, labels = labels);
115		}
116
117	!	if(!is.null(by)) {
118	!	if(is.character(by) && length(by) == nrow(tbl))
119	!	by <- as.factor(by)
120
121	!	dfdt_trajs <- tbl[,{ data.table::data.table(lines = list(.SD)) }, by = by]
122		} else {
123	!	dfdt_trajs <- tbl[,{ data.table::data.table(lines = list(.SD)) }]
124		}
125
126	!	valid_label_offsets = c("top left","top center","top right","middle left",
127	!	"middle center","middle right","bottom left","bottom center",
128	!	"bottom right")
129	!	if(!all(label.offset %in% valid_label_offsets))
130	!	stop(sprintf( "Unrecognized label.offsets: %s",
131	!	paste(unique(label.offset[!(label.offset %in% valid_label_offsets)]),
132	!	collapse = ", ") ))
133
134	!	if(length(label.offset) == 1)
135	!	label.offset = rep(label.offset, nrow(dfdt_trajs))
136
137	!	if (!is.null(colors) &&
138	!	length(colors) > 1 && length(colors) != length(names)
139		) {
140	!	stop("Length of the colors must be 1 or the same length as by")
141		}
142
143	!	for (x in 1:nrow(dfdt_trajs)) {
144	!	d <- remove_meta_data(dfdt_trajs[x,]$lines[[1]])
145	!	d.names <- colnames(d)
146	!	enaplot$plot = plotly::add_trace(
147	!	enaplot$plot,
148	!	data = d,
149	!	x = as.formula(paste0("~", d.names[1])),
150	!	y = as.formula(paste0("~", d.names[2])),
151	!	name = names[x],
152	!	mode = mode,
153	!	text = dfdt_trajs[x,]$lines[[1]]$labels,
154	!	textposition = label.offset[x],
155	!	hoverinfo = hoverinfo,
156	!	showlegend = T,
157	!	line = list (
158	!	color = if(!is.null(colors)) colors[x] else NULL
159		),
160	!	marker = list (
161	!	symbol = shape
162	!	,color = if(!is.null(colors)) colors[x] else NULL
163		),
164	!	textfont = list (
165	!	family = label.font.family,
166	!	size = label.font.size,
167	!	color = label.font.color
168		),
169	!	visible = ifelse(default.hidden, "legendonly", T)
170		);
171		}
172
173	!	enaplot$plotted$trajectories[[
174	!	length(enaplot$plotted$trajectories) + 1
175	!	]] <- dfdt_trajs
176
177	!	return(enaplot);
178		}

1		#' @title hENA rotation for ENA
2		#'
3		#' @description hENA rotation function.
4		#'
5		#' @param enaset ena set
6		#' @param params list of parameters
7		#'
8		#' @return ena set
9		#' @export
10		ena.rotation.h <- function(
11		enaset,
12		params
13		) {
14		# check arguments
15	!	if ( !is.list(params) \|\| is.null(params$x_var) ) {
16	!	stop("params must be provided as a list() and provide `x_var`")
17		}
18	!	x_var = params$x_var;
19	!	y_var = params$y_var;
20	!	control_vars = params$control_vars;
21	!	centering = ifelse(!is.null(params$centering), params$centering, TRUE);
22	!	include_xy = ifelse(!is.null(params$include_xy), params$include_xy, FALSE);
23	!	formula = params$formula;
24
25		# get centered data
26	!	if (!is.null(enaset$model$points.for.projection)) {
27	!	data = data.table::copy(enaset$model$points.for.projection)
28		}
29		else {
30	!	data = data.table::copy(enaset$points.normed.centered)
31		}
32
33		# Prep
34	!	value_vars = colnames(as.matrix(data))
35	!	data.table::set(x = data, j = value_vars, value = data[, lapply(.SD, function(x) x - mean(x)), .SDcols = value_vars])
36
37		# dummy code x_var
38	!	if (!is.numeric(data[[x_var]])) {
39	!	x_var_f = paste0(x_var,"_f")
40	!	data[[x_var_f]] = data.table::rleidv(x = data, cols = x_var) - 1
41	!	x_var = x_var_f;
42		}
43
44		# dummy code y_var
45	!	if (!is.null(y_var) && !is.numeric(data[[y_var]])) {
46	!	y_var_f = paste0(y_var,"_f")
47	!	data[[y_var_f]] = data.table::rleidv(x = data, cols = y_var) - 1
48	!	y_var = y_var_f;
49		}
50	!	both_vars = c(x_var, y_var)
51
52		# centering x_var and y_var
53	!	if ( centering ) {
54	!	data[, c(both_vars) := lapply(.SD, function(x) x - mean(x)), .SDcols = c(both_vars)]
55		}
56
57		# prepare regression formula
58	!	f = paste(c(both_vars, control_vars), collapse = " + ")
59
60	!	if ( include_xy ) {
61	!	xy_var = paste(both_vars, collapse = "_");
62	!	data[[xy_var]] = data[[x_var]] * data[[y_var]];
63	!	f = paste(c(f, xy_var), collapse = " + ");
64		}
65
66	!	if (!is.null(formula)) {
67	!	f = formula;
68		}
69
70		# run regression models and get slope variables
71	!	v = matrix(sapply(value_vars, function(v) {
72	!	formula = as.formula(paste0("data$`", v, "` ~ ", f));
73	!	lm(formula, data = data)$coefficients[seq_along(both_vars) + 1];
74	!	}), ncol = length(both_vars), byrow = TRUE)
75
76		# Prep deflation
77	!	R = NULL;
78	!	'..value_vars' = NULL;
79	!	A = as.matrix(data[, ..value_vars]);
80
81		# Normalize x rotation vector
82	!	v1 = v[, 1, drop = FALSE];
83	!	norm_v1 = sqrt(sum( v1 * v1 ));
84	!	if (norm_v1 != 0) {
85	!	v1 = v1 / norm_v1;
86	!	R = v1;
87		}
88	!	defA = as.matrix(A) - as.matrix(A) %% v1 %% t(v1);
89
90		# Normalize y rotation vector, if applicable
91	!	v2 = NULL;
92	!	if (!is.null(y_var)) {
93	!	v2 = v[, 2]
94	!	v2 = as.numeric(v2) - as.numeric(t(v2) %% v1) v1;
95	!	norm_v2 = sqrt(sum( v2 * v2 ));
96
97	!	if (norm_v2 != 0) {
98	!	v2 = v2 / norm_v2;
99	!	if( is.null(R) ) {
100	!	R = matrix(c(v2), ncol = 1)
101		}
102		else {
103	!	R = matrix(c(R, v2), ncol = 2)
104		}
105		}
106
107	!	defA = defA - defA %% v2 %% t(v2);
108		}
109
110		# get svd for deflated points
111		# svd_result = svd(defA)
112		# svd_v = svd_result$v;
113	!	svd_result = prcomp(defA, retx=FALSE, scale=FALSE, center=FALSE, tol=0)
114	!	svd_v = svd_result$rotation
115
116		# Merge rotation vectors
117	!	vcount = ncol(R);
118	!	combined = cbind(R, svd_v[, 1:(ncol(svd_v) - vcount)]);
119
120	!	colnames(combined) = c(
121	!	paste(c("x","y")[seq_len(vcount)], both_vars[seq_len(vcount)], sep = "_"),
122	!	paste0("SVD", ((vcount + 1):ncol(combined)))
123		);
124
125		# put into ENARotationSet
126		# browser()
127	!	rotation_set <- ENARotationSet$new(
128	!	node.positions = NULL,
129	!	rotation = combined,
130	!	codes = enaset$rotation$codes,
131	!	eigenvalues = svd_result$sdev ^ 2
132		)
133
134		# Done
135	!	return(rotation_set)
136		}

1		plot_nodes <- function(...) {
2	20x	enaplot$plot <- plotly::add_trace(
3	20x	enaplot$plot,
4	20x	type = "scatter",
5	20x	data = nodes,
6	20x	x = ~X1,
7	20x	y = ~X2,
8	20x	mode = mode,
9	20x	textposition = label.offset[rows.to.keep],
10	20x	marker = list(
11	20x	color = "#000000",
12	20x	size = abs(nodes$weight),
13	20x	line = list(
14	20x	width = 0
15		)
16		#,name = labels[i] #rownames(nodes)[i]
17		),
18	20x	textfont = list (
19	20x	family = label.font.family,
20	20x	size = label.font.size,
21	20x	color = label.font.color
22		),
23	20x	text = labels[rows.to.keep], #rownames(nodes),
24	20x	legendgroup = legend.name,
25	20x	name = legend.name,
26	20x	showlegend = show.legend,
27	20x	hoverinfo = 'none'
28		);
29
30	20x	return(enaplot$plot);
31		}
32
33		plot_edges <- function(...) {
34	20x	if (length(network.edges.shapes) > 0 ) {
35	20x	enaplot$plotted$networks[[length(enaplot$plotted$networks) + 1]] <- network.edges.shapes
36
37	20x	for (n in 1:length(network.edges.shapes)) {
38	276x	e = network.edges.shapes[[n]];
39
40	276x	name = NULL;
41	276x	show.legend = F;
42	276x	this.name = paste(e$nodes[1],e$nodes[2], sep=".")
43	276x	if(legend.include.edges) {
44	!	name = this.name;
45	!	show.legend = T;
46		}
47
48	276x	enaplot$plot = plotly::add_trace(
49	276x	enaplot$plot,
50	276x	type = "scatter",
51	276x	mode = "lines",
52	276x	data = data.frame(X1=c(e$x0,e$x1), X2=c(e$y0,e$y1)),
53	276x	x = ~X1, y = ~X2,
54	276x	line = e$line,
55	276x	opacity = e$opacity,
56	276x	legendgroup = if(legend.include.edges == T) this.name else legend.name,
57	276x	showlegend = show.legend,
58	276x	name = name
59		)
60		}
61		}
62
63	20x	return(enaplot$plot);
64		}
65
66		##
67		#' @title Plot an ENA network
68		#'
69		#' @description Plot an ENA network: nodes and edges
70		#'
71		#' @details lots a network graph, including nodes (taken from codes in the ENAplot) and the edges (provided in network)
72		#'
73		#' @export
74		#'
75		#' @param enaplot \code{\link{ENAplot}} object to use for plotting
76		#' @param network dataframe or matrix containing the edge weights for the network graph; typically comes from ENAset$line.weights
77		#' @param node.positions matrix containing the positiions of the nodes. Defaults to enaplot$enaset$node.positions
78		#' @param adjacency.key matrix containing the adjacency key for looking up the names and positions
79		#' @param colors A String or vector of colors for positive and negative line weights. E.g. red or c(pos= red, neg = blue), default: c(pos= red, neg = blue)
80		#' @param edge_type A String representing the type of line to draw, either "line", "dash", or "dot"
81		#' @param show.all.nodes A Logical variable, default: true
82		#' @param threshold A vector of numeric min/max values, default: c(0,Inf) plotting . Edge weights below the min value will not be displayed; edge weights above the max value will be shown at the max value.
83		#' @param thin.lines.in.front A logical, default: true
84		#' @param layers ordering of layers, default: c("nodes", "edges")
85		#' @param thickness A vector of numeric min/max values for thickness, default: c(min(abs(network)), max(abs(network)))
86		#' @param opacity A vector of numeric min/max values for opacity, default: thickness
87		#' @param saturation A vector of numeric min/max values for saturation, default: thickness
88		#' @param scale.range A vector of numeric min/max to scale from, default: c(0.1,1) or if min(network) is 0, c(0,1)
89		#' @param node.size A lower and upper bound used for scaling the size of the nodes, default c(0, 20)
90		#' @param labels A character vector of node labels, default: code names
91		#' @param label.offset A character vector of representing the positional offset relative to the respective node. Defaults to "middle right" for all nodes. If a single values is provided, it is used for all positions, else the length of the
92		#' @param label.font.size An integer which determines the font size for graph labels, default: enaplot$font.size
93		#' @param label.font.color A character which determines the color of label font, default: enaplot$font.color
94		#' @param label.font.family A character which determines font type, choices: Arial, Courier New, Times New Roman, default: enaplot$font.family
95		#' @param legend.name A character name used in the plot legend. Not included in legend when NULL (Default), if legend.include.edges is TRUE will always be "Nodes"
96		#' @param legend.include.edges Logical value indicating if the edge names should be included in the plot legend. Forces legend.name to be "Nodes"
97		#' @param scale.weights Logical indicating to scale the supplied network
98		#' @param ... Additional parameters
99		#'
100		#' @seealso \code{\link{ena.plot}}, \code{\link{ena.plot.points}}
101		#' @importFrom scales rescale
102
103		#' @examples
104		#' data(RS.data)
105		#'
106		#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
107		#' 'Client.and.Consultant.Requests','Design.Reasoning','Collaboration');
108		#'
109		#' accum = ena.accumulate.data(
110		#' units = RS.data[,c("UserName","Condition")],
111		#' conversation = RS.data[,c("Condition","GroupName")],
112		#' metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre","CONFIDENCE.Post")],
113		#' codes = RS.data[,codeNames],
114		#' window.size.back = 4
115		#' )
116		#'
117		#' set = ena.make.set(
118		#' enadata = accum,
119		#' rotation.by = ena.rotate.by.mean,
120		#' rotation.params = list(
121		#' accum$meta.data$Condition=="FirstGame",
122		#' accum$meta.data$Condition=="SecondGame"
123		#' )
124		#' )
125		#'
126		#' plot = ena.plot(set)
127		#'
128		#' ### Subset rotated points and plot Condition 1 Group Mean
129		#' as.matrix(set$points$Condition$FirstGame)
130		#'
131		#' first.game.points = as.matrix(set$points$Condition$FirstGame)
132		#' plot = ena.plot.group(plot, first.game.points, labels = "FirstGame",
133		#' colors = "red", confidence.interval = "box")
134		#'
135		#' ### Subset rotated points and plot Condition 2 Group Mean
136		#' second.game.points = as.matrix(set$points$Condition$SecondGame)
137		#' plot = ena.plot.group(plot, second.game.points, labels = "SecondGame",
138		#' colors = "blue", confidence.interval = "box")
139		#'
140		#' ### get mean network plots
141		#' first.game.lineweights = as.matrix(set$line.weights$Condition$FirstGame)
142		#' first.game.mean = colMeans(first.game.lineweights)
143		#'
144		#' second.game.lineweights = as.matrix(set$line.weights$Condition$SecondGame)
145		#' second.game.mean = colMeans(second.game.lineweights)
146		#'
147		#' subtracted.network = first.game.mean - second.game.mean
148		#' plot = ena.plot.network(plot, network = subtracted.network)
149		#' print(plot)
150		#'
151		#' @return The \code{\link{ENAplot}} provided to the function, with its plot updated to include the nodes and provided connecting lines.
152		##
153		ena.plot.network = function(
154		enaplot = NULL,
155		network = NULL,
156		node.positions = enaplot$enaset$rotation$nodes,
157		adjacency.key = NULL, #enaplot$enaset$enadata$adjacency.matrix,
158		colors = c(pos=enaplot$palette[1], enaplot$palette[2]),
159		edge_type = "line", #c("line", "dash", "dot"),
160		show.all.nodes = T,
161		threshold = c(0),
162		thin.lines.in.front = T,
163		layers = c("nodes", "edges"),
164
165		thickness = c(min(abs(network)), max(abs(network))),
166		opacity = thickness,
167		saturation = thickness,
168		scale.range = c(ifelse(min(network)==0, 0, 0.1), 1),
169
170		node.size = c(3,10),
171
172		labels = NULL,
173		label.offset = "middle right",
174		label.font.size = enaplot$get("font.size"),
175		label.font.color = enaplot$get("font.color"),
176		label.font.family = enaplot$get("font.family"),
177		legend.name = NULL,
178		legend.include.edges = F,
179		scale.weights = F,
180		...
181		) {
182	20x	expected_codes <- choose(nrow(node.positions), 2)
183	20x	if(expected_codes != length(network)) {
184		# browser()
185	!	if(is.data.frame(network) && ncol(as.matrix(network)) == expected_codes) {
186	!	network = as.vector(as.matrix(network))
187		}
188		else {
189	!	stop(paste0("Network vector needs to be of length ", choose(nrow(node.positions), 2)))
190		}
191		}
192	20x	node.rows <- NULL
193	20x	if(is(node.positions, "ena.nodes")) {
194	18x	if(is.null(adjacency.key)) {
195	18x	adjacency.key <- namesToAdjacencyKey(node.positions$code)
196		}
197	18x	node.rows <- node.positions$code
198
199	18x	if(is.null(labels)) {
200	18x	labels <- node.positions$code
201		}
202		}
203		else {
204	2x	if(is.matrix(node.positions)) {
205	2x	node.positions <- as.data.frame(node.positions)
206		}
207	2x	adjacency.key <- namesToAdjacencyKey(rownames(node.positions))
208	2x	node.rows <- rownames(node.positions)
209	2x	if(is.null(labels)) {
210	2x	labels <- rownames(node.positions)
211		}
212		}
213	20x	args = list(...);
214	20x	network.edges.shapes = list();
215	20x	edge_type = match.arg(arg = edge_type, choices = c("line", "dash", "dot"));
216
217	20x	nodes = data.frame(as.matrix(node.positions));
218	20x	colnames(nodes) = paste0("X", seq(colnames(nodes)))
219	20x	nodes$weight = rep(0, nrow(nodes))
220	20x	nodes$color = "black";
221
222		# Handle label parameters
223	20x	if(length(label.offset) == 1) {
224	20x	label.offset = rep(label.offset[1], length(labels))
225		}
226	20x	if(length(label.offset) != length(labels)) {
227	!	stop("length(label.offset) must be equal to 1 or length(labels)")
228		}
229
230		# Handle legend parameters
231	20x	if(legend.include.edges == T && !is.null(legend.name)) {
232	!	legend.name = "Nodes"
233		}
234
235	20x	network.scaled = network;
236	20x	if(!is.null(threshold)) {
237	20x	multiplier.mask = ((network.scaled >= 0) * 1) - ((network.scaled < 0) * 1)
238	20x	if(length(threshold) == 1) {
239	20x	threshold[2] = Inf;
240		}
241	!	else if(threshold[2] < threshold[1]) {
242	!	stop("Minimum threshold value must be less than the maximum value.");
243		}
244
245	20x	if(threshold[1] > 0) {
246		# network.scaled = network.scaled[sizes > threshold[1]]
247	!	network.scaled[abs(network.scaled) < threshold[1]] = 0
248		}
249	20x	if(threshold[2] < Inf && any(abs(network.scaled) > threshold[2])) {
250	!	to.threshold = abs(network.scaled) > threshold[2]
251	!	network.scaled[to.threshold] = threshold[2]
252	!	network.scaled[to.threshold] = network.scaled[to.threshold] * multiplier.mask[to.threshold]
253		}
254		}
255	20x	network.thickness = abs(network.scaled);
256	20x	network.saturation = abs(network.scaled);
257	20x	network.opacity = abs(network.scaled);
258
259	20x	network.to.keep = (network != 0) * 1
260	20x	if(scale.weights == T) {
261	!	network.scaled = network * (1 / max(abs(network)));
262	!	network.thickness = scales::rescale(x = abs(network.scaled), to = scale.range, from = thickness);
263		}
264	20x	network.scaled = network.scaled * network.to.keep
265	20x	network.thickness = network.thickness * network.to.keep
266
267	20x	network.saturation = scales::rescale(x = abs(network.scaled), to = scale.range, from = saturation);
268	20x	network.opacity = scales::rescale(x = abs(network.scaled), to = scale.range, from = opacity);
269
270	20x	pos.inds = as.numeric(which(network.scaled >=0));
271	20x	neg.inds = as.numeric(which(network.scaled < 0));
272
273	20x	colors.hsv = rgb2hsv(col2rgb(colors))
274
275	20x	if(ncol(colors.hsv) == 1) {
276	15x	colors.hsv[[4]] = colors.hsv[1] + 0.5;
277	15x	if(colors.hsv[4] > 1) {
278	2x	colors.hsv[4] = colors.hsv[4] - 1;
279		}
280
281	15x	colors.hsv[[5]] = colors.hsv[2];
282	15x	colors.hsv[[6]] = colors.hsv[3];
283	15x	dim(colors.hsv) = c(3,2);
284		}
285
286	20x	mat = as.matrix(adjacency.key);
287	20x	for (i in 1:length(network)) {
288	276x	v0 <- nodes[node.rows==mat[1,i], ];
289	276x	v1 <- nodes[node.rows==mat[2,i], ];
290	276x	nodes[node.rows==mat[1,i],]$weight = nodes[node.rows==mat[1,i],]$weight + abs(network.thickness[i]);
291	276x	nodes[node.rows==mat[2,i],]$weight = nodes[node.rows==mat[2,i],]$weight + abs(network.thickness[i]);
292
293	276x	color = NULL
294	276x	if(i %in% pos.inds) {
295	266x	color = colors.hsv[,1];
296		} else {
297	10x	color = colors.hsv[,2];
298		}
299	276x	color[2] = network.saturation[i];
300
301	276x	edge_shape = list(
302	276x	type = "line",
303	276x	opacity = network.opacity[i],
304	276x	nodes = c(mat[,i]),
305	276x	line = list(
306	276x	name = "test",
307	276x	color= hsv(color[1],color[2],color[3]),
308	276x	width= abs(network.thickness[i]) * enaplot$get("multiplier"),
309	276x	dash = edge_type
310		),
311	276x	x0 = as.numeric(v0[1]),
312	276x	y0 = as.numeric(v0[2]),
313	276x	x1 = as.numeric(v1[1]),
314	276x	y1 = as.numeric(v1[2]),
315	276x	layer = "below",
316	276x	size = as.numeric(abs(network.scaled[i]))
317		);
318	276x	network.edges.shapes[[i]] = edge_shape
319		};
320
321	20x	if(thin.lines.in.front) {
322	20x	network.edges.shapes = network.edges.shapes[rev(order(sapply(network.edges.shapes, "[[", "size")))]
323		}
324		else {
325	!	network.edges.shapes = network.edges.shapes[order(sapply(network.edges.shapes, "[[", "size"))]
326		}
327
328	20x	rows.to.keep = rep(T, nrow(nodes))
329	20x	if(show.all.nodes == F) {
330	!	rows.to.keep = nodes$weight != 0
331		# nodes = nodes[rownames(nodes) %in% unique(as.character(sapply(network.edges.shapes, "[[", "nodes"))), ]
332		}
333	20x	nodes = nodes[rows.to.keep,];
334	20x	mode = "markers+text"
335	20x	if(!is.null(args$labels.hide) && args$labels.hide == T) {
336	!	mode="markers"
337		}
338	20x	if( any(nodes$weight > 0)) {
339	20x	nodes$weight = scales::rescale((nodes$weight * (1 / max(abs(nodes$weight)))), node.size) # * enaplot$get("multiplier"));
340		}
341		else {
342	!	nodes$weight = node.size[2]
343		}
344
345	20x	show.legend = !is.null(legend.name);
346	20x	if(legend.include.edges) {
347	!	if(is.null(legend.name)) {
348	!	legend.name = "Nodes"
349		}
350	!	show.legend = T;
351		}
352
353		# browser()
354	20x	environment(plot_nodes) <- environment()
355	20x	environment(plot_edges) <- environment()
356
357	20x	for(layer in layers) {
358	40x	enaplot$plot <- do.call(what = paste0("plot_", layer), args = list())
359		}
360
361	20x	enaplot
362		}

1		#' Find metadata columns
2		#'
3		#' @param x data.table (or frame) to search for columns of class ena.metadata
4		#'
5		#' @return logical vector
6		#' @export
7		find_meta_cols <- function(x) {
8	727x	sapply(x, is, class2 = "ena.metadata")
9		}
10
11		#' Find code columns
12		#'
13		#' @param x data.table (or frame) to search for columns of class ena.co.occurrence
14		#'
15		#' @return logical vector
16		#' @export
17		find_code_cols <- function(x) {
18	5x	grepl("adjacency.code", x = names(x)) \| sapply(x, function(col) {
19	90x	is(col, class2 = "ena.co.occurrence")
20		})
21		}
22
23		#' Find Binary Columns
24		#'
25		#' Identifies columns in a data.frame or data.table that are binary (i.e., contain only two unique values), optionally including logical columns.
26		#'
27		#' @param x A data.frame or data.table to search for binary columns.
28		#' @param include_logical Logical. If TRUE, logical columns are also considered binary. Default is FALSE.
29		#'
30		#' @return A character vector of column names that are binary, or NULL if none are found.
31		#' @export
32		#'
33		#' @examples
34		#' df <- data.frame(a = c(0, 1, 1), b = c(TRUE, FALSE, TRUE), c = c(1, 2, 3))
35		#' find_binary_cols(df)
36		#' find_binary_cols(df, include_logical = TRUE)
37		find_binary_cols <- function(x, include_logical = FALSE) {
38	!	nm <- colnames(x)[sapply(x, is_binary_col, include_logical)];
39	!	if(length(nm) > 0) nm else NULL;
40		}
41
42		#' Find dimension columns
43		#'
44		#' @param x data.table (or frame) to search for columns of class ena.dimension
45		#'
46		#' @return logical vector
47		#' @export
48		find_dimension_cols <- function(x) {
49	6x	sapply(x, is, class2 = "ena.dimension")
50		}
51
52		#' Remove meta columns from data.table
53		#'
54		#' @param x [TBD]
55		#'
56		#' @return data.table withe columns of class ena.meta.data removed
57		#' @export
58		remove_meta_data <- function(x) {
59	493x	as.data.frame(x)[, !find_meta_cols(x), drop = F]
60		}
61
62		#' Extract metadata easily
63		#'
64		#' @param x [TBD]
65		#' @param i [TBD]
66		#'
67		#' @return [TBD]
68		#' @export
69		"$.ena.metadata" <- function(x, i) {
70		#browser()
71	22x	parts <- unlist(strsplit(
72	22x	x = as.character(sys.call())[2], split = "\\$"
73	22x	))[1:2]
74
75	22x	set <- get(parts[1], envir = parent.frame())
76	22x	m <- set[[parts[2]]][x == i, ]
77	22x	m
78		}
79
80		#' Extract line.weignts easily
81		#'
82		#' @param x [TBD]
83		#' @param i [TBD]
84		#'
85		#' @return [TBD]
86		#' @export
87		"$.line.weights" <- function (x, i) {
88	1x	vals <- x[[which(colnames(x) == i)]]
89
90	1x	vals
91		}
92
93		#' Extract points easily
94		#'
95		#' @param x [TBD]
96		#' @param i [TBD]
97		#'
98		#' @return [TBD]
99		#' @export
100		"$.ena.points" <- function (x, i) {
101	27x	vals <- x[[which(colnames(x) == i)]]
102
103	27x	vals
104		}
105
106		#' Extract from ena.matrix easily using metadata
107		#'
108		#' @param x [TBD]
109		#' @param i [TBD]
110		#'
111		#' @return [TBD]
112		#' @export
113		"$.ena.matrix" <- function (x, i) {
114	18x	vals <- x[[which(colnames(x) == i)]]
115
116	18x	vals
117		}
118
119		#' Multiply ena.matrix objects
120		#' Element-wise multiplication of dimension columns in an ena.matrix by another ena.matrix or numeric matrix.
121		#' If e2 is an ena.matrix, it is converted to a standard matrix before multiplication.
122		#' The multiplication is applied only to the dimension columns of e1, while other columns remain
123		#' unchanged.
124		#'
125		#' @param e1 An ena.matrix object whose dimension columns will be multiplied.
126		#' @param e2 An ena.matrix or numeric matrix to multiply with the dimension columns of
127		#' e1.
128		#'
129		#' @return An ena.matrix object with the dimension columns of e1 multiplied by e2.
130		#' @exportS3Method "*" ena.matrix
131		"*.ena.matrix" <- function (e1, e2) {
132	!	e2m <- e2
133	!	if(is(e2, "ena.matrix")) {
134	!	e2m <- as.matrix(e2)
135		}
136
137	!	dim_cols <- colnames(e1)[find_dimension_cols(e1)]
138	!	e1[, (dim_cols) := Map(function(col, mult) col * mult, .SD, as.data.frame(e2m)), .SDcols = dim_cols]
139		}
140
141		# "$.ena.plot" <- function(x, i) {
142		# browser()
143		# }
144		# "[[.ena.plot" <- function(x, i) {
145		# browser()
146		# }
147		#' @export
148		.DollarNames.ena.metadata <- function(x, pattern = "") {
149	1x	unique(x)
150		}
151
152		# "[.ena.matrix" = function(x, ...)
153		# {
154		# browser()
155		# original.class = class(x)[1]
156		# class(x) = class(x)[-1]
157		# x = x[...]
158		#
159		# # y = as.data.frame(x)
160		# }
161
162		#' @export
163		summary.ena.set <- function(object, ...) {
164	1x	x <- object
165	1x	print_dims <- function(n = 2) {
166	2x	cat("\t", paste("Dimension", 1:n, collapse = "\t"), "\n")
167		}
168	1x	cat("Units: ", nrow(x$points), "\t\t")
169	1x	cat("Codes: ", length(x$rotation$codes), "\n")
170
171	1x	cat("Variance: \n")
172	1x	print_dims()
173	1x	cat("\t", paste(round(x$model$variance[1:2], 3), collapse = "\t\t"), "\n\n")
174
175	1x	cat("Eigenvalues: \n")
176	1x	print_dims()
177	1x	cat("\t", paste(round(
178	1x	x$rotation$eigenvalues[1:2], 3), collapse = "\t\t"), "\n\n")
179
180	1x	cat("Correlations: \n")
181	1x	cors <- ena.correlations(x)
182	1x	rownames(cors) <- paste("Dimension", 1:2)
183	1x	print(cors)
184		}
185		# as.data.frame.ena.connections <- function(x) {
186		# class(x) = class(x)[-1]
187		# y = as.data.frame(x)
188		# y
189		# }
190		# format.co.occurrence = format.metadata = function(x, justify = "none") {
191		# y = as.character(x)
192		# format(y, justify = justify)
193		# }
194
195		#' Title
196		#'
197		#' @param x [TBD]
198		#' @param ... [TBD]
199		#' @param plot [TBD]
200		#' @param set [TBD]
201		#'
202		#' @return [TBD]
203		#' @export
204		print.ena.set <- function(x, ..., plot = FALSE, set = TRUE) {
205	1x	x.unclass <- unclass(x)
206
207		if(
208	1x	!is.null(x.unclass$`_plot_op`) &&
209	1x	x.unclass$`_plot_op` == T
210		) {
211	!	base::print(x.unclass$plots)
212		}
213		else {
214	1x	if(plot == FALSE) {
215	1x	x.unclass$plots <- NULL
216		}
217	1x	base::print(x.unclass)
218		}
219
220	1x	invisible(x);
221		}
222
223		#' Title
224		#'
225		#' @param x [TBD]
226		#' @param by [TBD]
227		#' @param model [TBD]
228		#' @param ... [TBD]
229		#'
230		#' @return [TBD]
231		#' @export
232		as_trajectory <- function(x,
233		by = x$`_function.params`$conversation[1],
234		model = c("AccumulatedTrajectory", "SeperateTrajectory"),
235		...
236		) {
237	2x	model = match.arg(model)
238	2x	orig_args = x$`_function.params`
239	2x	orig_args$model = model
240
241	2x	more_args <- list(...)
242	2x	for(arg in names(more_args)) {
243	1x	orig_args[[arg]] <- more_args[[arg]]
244		}
245		#c(mean, more.args[!names(more.args) %in% names(mean)])
246
247	2x	do.call(ena, orig_args)
248		}
249
250		#' Title
251		#'
252		#' @param x [TBD]
253		#' @param by [TBD]
254		#' @param ... [TBD]
255		#'
256		#' @return [TBD]
257		#' @export
258		project_in <- function(x, by = NULL, ...) {
259	5x	if(is.null(by)) {
260	1x	stop("A second parameter (ena.set or rotation.set) is required")
261		}
262
263	4x	rotation.set <- NULL
264	4x	if(is(by, "ena.set")) {
265	2x	rotation.set <- by$rotation
266	2x	} else if(is(by, "ena.rotation.set")) {
267	2x	rotation.set <- by
268		}
269
270	4x	if(!identical(x$rotation$adjacency.key, rotation.set$adjacency.key)) {
271	1x	stop("Rotation sets must have identical adjacency keys")
272		}
273
274	3x	x$rotation.matrix <- rotation.set$rotation.matrix
275	3x	x$rotation$rotation.matrix <- rotation.set$rotation.matrix
276	3x	x$rotation$nodes <- rotation.set$nodes;
277	3x	x$rotation$eigenvalues <- rotation.set$eigenvalues
278
279	3x	points <- as.matrix(x$model$points.for.projection) %*% as.matrix(x$rotation.matrix)
280	3x	points.dt <- as.data.table(points)
281	3x	for (i in seq(ncol(points.dt))) {
282	45x	set(points.dt, j = i, value = as.ena.dimension(points.dt[[i]]))
283		}
284	3x	if(grepl(x = x$model$model.type, pattern = "Trajectory")) {
285	1x	x$points <- cbind(x$trajectories, points.dt)
286		} else {
287	2x	x$points <- cbind(x$meta.data, points.dt)
288		}
289	3x	x$points <- as.ena.matrix(x$points, "ena.points")
290
291	3x	.return(x, invisible = T)
292		}
293
294		#' Title
295		#'
296		#' @param x [TBD]
297		#' @param on [TBD]
298		#'
299		#' @return [TBD]
300		#' @export
301		means_rotate <- function(x, on = NULL) {
302	3x	groupVar = NULL
303	3x	groups = NULL
304	3x	if(is.null(on)) {
305	1x	col_counts = as.numeric(x$model$raw.input[, lapply(.SD, function(s) {
306	2x	length(unique(s))
307		}),
308	1x	.SDcols = c(x$`_function.params`$units)
309		])
310	1x	groupVar = x$`_function.params`$units[order(col_counts) == 1]
311	1x	group_vars = unique(x$model$raw.input[[groupVar]])
312	1x	if(!is.null(levels(group_vars))) {
313	!	groups = levels(group_vars)[1:2]
314		}
315		else {
316	1x	groups = group_vars[1:2]
317		}
318		# on_grps = list()
319		# on_grps[[on]] = sapply(on_vals, function(v) {
320		# x$meta.data[[on]] == v
321		# }, simplify = F)
322	2x	} else if(!is.null(names(on))) {
323	1x	groupVar = names(on)
324	1x	groups = on[[groupVar]]
325		}
326
327	3x	if(is.null(groupVar) \|\| is.null(groups)) {
328	1x	stop("Unable to determine groups for rotation.")
329		}
330
331	2x	orig_args <- x$`_function.params`
332	2x	orig_args$groupVar = groupVar
333	2x	orig_args$groups = groups
334	2x	new_set <- do.call(ena, orig_args)
335	2x	new_set$plots <- x$plots
336	2x	invisible(new_set)
337		}
338
339		.return <- function(x, invisible = T, from_plot = F) {
340	3x	x$`_plot_op` = from_plot
341	3x	if(isTRUE(from_plot)) {
342
343		}
344
345	3x	if(invisible == T) {
346	3x	invisible(x)
347		}
348		else {
349	!	return(x)
350		}
351		}
352
353		is_logical_col <- function(col) {
354	!	n_cols = col == TRUE \| col == FALSE;
355	!	is_col <- is.logical(col) & all(n_cols);
356
357	!	return(is_col);
358		}
359
360		is_binary_col <- function(col, include_logical = TRUE) {
361	!	n_cols = col == 1 \| col == 0;
362	!	is_col <- is.numeric(col) && all(is.wholenumber(col) & all(n_cols));
363
364	!	if(isTRUE(include_logical)) {
365	!	is_col <- is_col \| is_logical_col(col);
366		}
367
368	!	return(is_col);
369		}
370
371	!	is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol
372
373
374		#' Extract points easily
375		#'
376		# @param x [TBD]
377		# @param i [TBD]
378		# @param j [TBD]
379		# @param ... Passed to `[.data.table`
380		# @param with.meta logical, currently defaults to TRUE, which includes the metadata columns.
381		#
382		# @return [TBD]
383		# @export
384		# "[.ena.matrix" <- function (x, i, j, by, keyby, ..., with.meta = TRUE) {
385		# orig.class <- class(x)
386		# x.unclass <- data.table::as.data.table(unclass(x))
387		#
388		# if(with.meta == FALSE) {
389		# x.nometa <- x.unclass[, !find_meta_cols(x.unclass), with = F]
390		# x_ <- x.nometa[i, ..j, ...]
391		# }
392		# else {
393		# x_ <- x.unclass[i, j, by = by, keyby = keyby, ...]
394		# # if (!is.null(j)) {
395		# # x_ <- x_[, ..j]
396		# # }
397		# }
398		# class(x_) <- orig.class
399		# x_
400		# }
401

1		##
2		#' @title Accumulate data from a data frame into a set of adjacency (co-occurrence) vectors
3		#'
4		#' @description This function initializes an ENAdata object, processing conversations from coded data to generate adjacency (co-occurrence) vectors
5		#'
6		#' @details ENAData objects are created using this function. This accumulation receives
7		#' separate data frames for units, codes, conversation, and optionally, metadata. It
8		#' iterates through the data to create an adjacency (co-occurrence) vector corresponding
9		#' to each unit - or in a trajectory model multiple adjacency (co-occurrence) vectors for
10		#' each unit.
11		#'
12		#' In the default MovingStanzaWindow model, co-occurrences between codes are
13		#' calculated for each line k in the data between line k and the window.size.back-1 previous
14		#' lines and window.size.forward-1 subsequent lines in the same conversation as line k.
15		#'
16		#' In the Conversation model, co-occurrences between codes are calculated across all lines in
17		#' each conversation. Adjacency (co-occurrence) vectors are constructed for each unit u by
18		#' summing the co-occurrences for the lines that correspond to u.
19		#'
20		#' Options for how the data is accumulated are endpoint, which produces one adjacency (co-occurrence)
21		#' vector for each until summing the co-occurrences for all lines, and two trajectory models:
22		#' AccumulatedTrajectory and SeparateTrajectory. Trajectory models produce an adjacency
23		#' (co-occurrence) model for each conversation for each unit. In a SeparateTrajectory model,
24		#' each conversation is modeled as a separate network. In an AccumulatedTrajectory model, the
25		#' adjacency (co-occurrence) vector for the current conversation includes the co-occurrences
26		#' from all previous conversations in the data.
27		#'
28		#' @export
29		#'
30		#' @param units A data frame where the columns are the properties by which units will be identified
31		#' @param conversation A data frame where the columns are the properties by which conversations will be identified
32		#' @param codes A data frame where the columns are the codes used to create adjacency (co-occurrence) vectors
33		#' @param metadata (optional) A data frame with additional columns of metadata to be associated with each unit in the data
34		#' @param model A character, choices: EndPoint (or E), AccumulatedTrajectory (or A), or SeparateTrajectory (or S); default: EndPoint. Determines the ENA model to be constructed
35		#' @param weight.by (optional) A function to apply to values after accumulation
36		#' @param mask (optional) A binary matrix of size ncol(codes) x ncol(codes). 0s in the mask matrix row i column j indicates that co-occurrence will not be modeled between code i and code j
37		#' @param window A character, choices are Conversation (or C), MovingStanzaWindow (MSW, MS); default MovingStanzaWindow. Determines how stanzas are constructed, which defines how co-occurrences are modeled
38		#' @param window.size.back A positive integer, Inf, or character (INF or Infinite), default: 1. Determines, for each line in the data frame, the number of previous lines in a conversation to include in the stanza window, which defines how co-occurrences are modeled
39		#' @param window.size.forward (optional) A positive integer, Inf, or character (INF or Infinite), default: 0. Determines, for each line in the data frame, the number of subsequent lines in a conversation to include in the stanza window, which defines how co-occurrences are modeled
40		#' @param ... additional parameters addressed in inner function
41		#' @param include.meta Locigal indicating if unit metadata should be attached to the resulting ENAdata object, default is TRUE
42		#' @param as.list R6 objects will be deprecated, but if this is TRUE, the original R6 object will be returned, otherwise a list with class `ena.set`
43		#'
44		#' @seealso \code{\link{ENAdata}}, \code{\link{ena.make.set}}
45		#'
46		#' @return \code{\link{ENAdata}} object with data [adjacency (co-occurrence) vectors] accumulated from the provided data frames.
47		#'
48		##
49		ena.accumulate.data <- function(
50		units = NULL,
51		conversation = NULL,
52		codes = NULL,
53		metadata = NULL,
54		model = c("EndPoint", "AccumulatedTrajectory", "SeparateTrajectory"),
55		weight.by = "binary",
56		window = c("MovingStanzaWindow", "Conversation"),
57		window.size.back = 1,
58		window.size.forward = 0,
59		mask = NULL,
60		include.meta = T,
61		as.list = T,
62		...
63		) {
64	49x	if (is.null(units) \|\| is.null(conversation) \|\| is.null(codes)) {
65	1x	stop("Accumulation requires: units, conversation, and codes");
66		}
67	48x	if (nrow(units) != nrow(conversation) \|\| nrow(conversation) != nrow(codes)) {
68	1x	stop("Data Frames do not have the same number of rows");
69		}
70
71	47x	df <- cbind(units, conversation);
72	47x	df <- cbind(df, codes);
73
74	47x	metadata <- data.table::as.data.table(metadata)
75	47x	if (!is.null(metadata) && nrow(metadata) == nrow(df)) {
76	7x	df <- cbind(df, metadata);
77		}
78
79	47x	model <- match.arg(model)
80	47x	window <- match.arg(window)
81
82	47x	units.by <- colnames(units);
83	47x	conversations.by <- colnames(conversation);
84	47x	if (identical(window, "Conversation")) {
85	1x	conversations.by <- c(conversations.by, units.by);
86	1x	window.size.back <- window;
87		}
88	46x	else if (identical(window, "MovingStanzaWindow")) {
89	46x	if( grepl(pattern = "inf", x = window.size.back, ignore.case = T)) {
90	3x	window.size.back <- Inf
91		}
92	46x	if( grepl(pattern = "inf", x = window.size.forward, ignore.case = T)) {
93	1x	window.size.forward <- Inf
94		}
95		}
96
97	47x	data <- ENAdata$new(
98	47x	file = df,
99	47x	units = units,
100	47x	units.by = units.by,
101	47x	conversations.by = conversations.by,
102	47x	codes = codes,
103	47x	window.size.back = window.size.back,
104	47x	window.size.forward = window.size.forward,
105	47x	weight.by = weight.by,
106	47x	model = model,
107	47x	mask = mask,
108	47x	include.meta = include.meta,
109		...
110		);
111	47x	data$process()
112
113	47x	data$function.call <- sys.call()
114
115	47x	if(as.list) {
116	45x	data <- ena.set(data)
117		} else {
118	2x	warning(paste0("Usage of R6 data objects is deprecated and may be removed ",
119	2x	"entirely in a future version. Consider upgrading to the new data ",
120	2x	"object."))
121		}
122
123	47x	data
124		}

1		#' Connection counts as square matrix
2		#'
3		#' @param x ena.set or ena.connections (i.e. set$connection.counts)
4		#'
5		#' @return matrix
6		#' @export
7		connection.matrix <- function(x) {
8	4x	if(is(x, "ena.set")) {
9	2x	connections <- x$connection.counts
10		} else {
11	2x	connections <- x
12		}
13	4x	if(!is(connections, "ena.connections")) {
14	1x	stop("Unable to find connections. `x` must be connections from an ena.set or an ena.set")
15		}
16
17	3x	simplify <- (nrow(connections) == 1)
18	3x	cm <- as.matrix(connections, square = T, simplify = simplify)
19	3x	if(simplify == FALSE && is.list(cm))
20	2x	names(cm) <- connections$ENA_UNIT
21
22	3x	return(cm);
23		}

1		###
2		#' Calculate the correlations
3		#'
4		#' @description Calculate both Spearman and Pearson correlations for the
5		#' provided ENAset
6		#'
7		#' @param enaset ENAset to view methods of
8		#' @param tool c("rENA","webENA")
9		#' @param tool.version as.character(packageVersion(tool))
10		#' @param comparison character string representing the comparison used, c(NULL, "parametric", "non-parametric"). Default NULL
11		#' @param comparison.groups Groups that were used for the comparison
12		#' @param sig.dig Integer for the number of digits to round to
13		#' @param output_dir Where to save the output file
14		#' @param type c("file","stream") File will save to a file in output_dir, Stream returns the contents directly
15		#' @param theory Logical indicating whether to include theory in the writeup
16		#' @param methods Logical indicating whether to include methods in the writeup
17		#' @param params additional parameters for rmarkdown::render
18		#' @param output_file character
19		#' @param output_format character
20		#'
21		#' @export
22		#'
23		#' @return String representing the methods used to generate the model
24		ena.writeup <- function(
25		enaset,
26		tool = "rENA", tool.version = as.character(packageVersion(tool)),
27		comparison = NULL, comparison.groups = NULL, sig.dig = 2,
28		output_dir = getwd(), type = c("file","stream"), theory = T, methods = T,
29		params = NULL, output_file = NULL, output_format = NULL
30		) {
31	!	if(is.null(enaset$`_function.params`$weight.by))
32	!	enaset$`_function.params`$weight.by <- enaset$`_function.params`$args$weight.by
33
34	!	type = match.arg(type, choices = c("file","stream"), several.ok = FALSE)
35
36	!	if(type == "file") {
37	!	output_format = "word_document"
38		}
39
40	!	file = rmarkdown::render(system.file("rmd","methods.rmd", package="rENA"), output_dir = output_dir,
41	!	knit_root_dir = output_dir, intermediates_dir = output_dir, quiet = TRUE,
42	!	params = params, output_file = output_file
43	!	,output_format = output_format
44		# ,output_format = ifelse(type == "file", rENA::methods_report, rENA::methods_report_stream)
45		)
46
47	!	if(type == "file")
48	!	file
49	!	else if (type == "stream" && endsWith(file, ".plain"))
50	!	readChar(file, file.info(file)$size)
51		}
52
53		#' @title methods_report
54		#' @description Methods report for rmarkdwon
55		#' @param toc [TBD]
56		#' @param toc_depth [TBD]
57		#' @param fig_width [TBD]
58		#' @param fig_height [TBD]
59		#' @param keep_md [TBD]
60		#' @param md_extensions [TBD]
61		#' @param pandoc_args [TBD]
62		#'
63		#' @export
64		methods_report <- function(toc = FALSE,
65		toc_depth = 3,
66		fig_width = 5,
67		fig_height = 4,
68		keep_md = FALSE,
69		md_extensions = NULL,
70		pandoc_args = NULL) {
71
72		# knitr options and hooks
73	!	knitr <- rmarkdown::knitr_options(
74	!	opts_chunk = list(dev = 'png',
75	!	dpi = 96,
76	!	fig.width = fig_width,
77	!	fig.height = fig_height)
78		)
79
80		# build pandoc args
81	!	args <- c("--standalone")
82
83		# table of contents
84	!	args <- c(args, rmarkdown::pandoc_toc_args(toc, toc_depth))
85
86		# pandoc args
87	!	args <- c(args, pandoc_args)
88
89	!	preserved_chunks <- character()
90
91		# pre_processor <- function(metadata, input_file, runtime, knit_meta,
92		# files_dir, output_dir) {
93		# preserved_chunks <<- extract_preserve_chunks(input_file, knitr::extract_raw_output)
94		# NULL
95		# }
96
97		# post_processor <- function(metadata, input_file, output_file, clean, verbose) {
98		# output_str <- readLines(output_file, encoding = 'UTF-8')
99		# output_res <- knitr::restore_raw_output(output_str, preserved_chunks)
100		# if (!identical(output_str, output_res))
101		# writeLines(enc2utf8(output_res), output_file, useBytes = TRUE)
102		# output_file
103		# }
104
105		# return output format
106	!	rmarkdown::output_format(
107	!	knitr = knitr,
108	!	pandoc = rmarkdown::pandoc_options(to = "docx",
109	!	from = rmarkdown::from_rmarkdown(extensions = md_extensions),
110	!	args = args),
111	!	keep_md = keep_md
112		# ,pre_processor = pre_processor,
113		# post_processor = post_processor
114		)
115		}
116
117		#' @title methods_report_stream
118		#' @description Methods report for rmarkdwon
119		#' @param toc [TBD]
120		#' @param toc_depth [TBD]
121		#' @param fig_width [TBD]
122		#' @param fig_height [TBD]
123		#' @param keep_md [TBD]
124		#' @param md_extensions [TBD]
125		#' @param pandoc_args [TBD]
126		#'
127		#' @export
128		methods_report_stream <- function(toc = FALSE,
129		toc_depth = 3,
130		fig_width = 5,
131		fig_height = 4,
132		keep_md = FALSE,
133		md_extensions = NULL,
134		pandoc_args = NULL) {
135
136		# knitr options and hooks
137	!	knitr <- rmarkdown::knitr_options(
138	!	opts_chunk = list(dev = 'png',
139	!	dpi = 96,
140	!	fig.width = fig_width,
141	!	fig.height = fig_height)
142		)
143
144		# build pandoc args
145	!	args <- c("--standalone")
146
147		# table of contents
148	!	args <- c(args, rmarkdown::pandoc_toc_args(toc, toc_depth))
149
150		# pandoc args
151	!	args <- c(args, pandoc_args)
152
153	!	preserved_chunks <- character()
154
155		# return output format
156	!	rmarkdown::output_format(
157	!	knitr = knitr,
158	!	pandoc = rmarkdown::pandoc_options(to = "plain",
159	!	from = rmarkdown::from_rmarkdown(extensions = md_extensions),
160	!	args = args),
161	!	keep_md = keep_md
162		)
163		}

1		#####
2		#' @title Wrapper to generate plots of units, groups, and networks
3		#'
4		#' @description Plots individual units, all units, groups of units, networks, and network subtractions
5		#'
6		#' @details This function includes options to plots individual units, all units,
7		#' groups of units, networks, and network subtractions, given an ena.set objects. Plots are stored
8		#' on the supplied ena.set object.
9		#'
10		#'
11		#' @param set an ena.set object
12		#' @param groupVar vector, character, of column name containing group identifiers.
13		#' @param groups vector, character, of values of groupVar column you wish to plot. Maxium of two groups allowed.
14		#' @param points logical, TRUE will plot points (default: FALSE)
15		#' @param mean logical, TRUE will plot the mean position of the groups defined in the groups argument (default: FALSE)
16		#' @param network logical, TRUE will plot networks (default: TRUE)
17		#' @param networkMultiplier numeric, scaling factor for non-subtracted networks (default: 1)
18		#' @param subtractionMultiplier numeric, scaling factor for subtracted networks (default: 1)
19		#' @param unit vector, character, name of a single unit to plot
20		#' @param colors vector, character, of colors for groups or points. For two-group models supply two values (group1, group2); for single-group or no-group models supply one value. Defaults to "blue"/"red" for two groups and "black" otherwise.
21		#' @param confidence.interval character, style of confidence interval shown on mean points: "box" (default), "crosshairs", or "none"
22		#' @param print.plots logical, TRUE will show plots in the Viewer (default: FALSE)
23		#' @param ... Additional parameters passed to set creation and plotting functions
24		#' @export
25		#' @return ena.set object
26		#####
27		ena.plotter = function(
28		set,
29		groupVar = NULL,
30		groups = NULL,
31		points = FALSE,
32		mean = FALSE,
33		network = TRUE,
34		networkMultiplier = 1,
35		subtractionMultiplier = 1,
36		unit = NULL,
37		colors = NULL,
38		confidence.interval = "box",
39		print.plots = F,
40		...
41		) {
42	13x	data = set$connection.counts;
43
44		# set$plots[[length(set$plots)]] <- plot
45		# plot <- set$plots[[length(set$plots)]]
46	13x	if(is.null(unit) == FALSE) {
47	!	plot = ena.plot(enaset = set,title = unit)
48
49	!	if(any(set$points$ENA_UNIT == unit) == FALSE){
50	!	stop("Unit does not exist!")
51		}
52
53	!	point.row = set$points$ENA_UNIT == unit
54	!	point = as.matrix(set$points)[point.row,]
55	!	point.lw = as.matrix(set$line.weights)[point.row,]*networkMultiplier
56
57	!	unit.color = if (!is.null(colors)) colors[1] else "black"
58	!	plot = ena.plot.points(enaplot = plot,points = point, colors = unit.color)
59	!	plot = ena.plot.network(enaplot = plot, network = point.lw, colors = unit.color)
60
61	!	set$plots[[length(set$plots) + 1]] <- plot
62
63	!	if(print.plots == TRUE) {
64	!	print(set$plots[[length(set$plots)]])
65		}
66
67	!	return(set)
68		}
69
70	13x	if(is.null(groupVar) == TRUE) {
71	11x	plot = ena.plot(enaset = set, title = "All Units")
72	11x	base.color = if (!is.null(colors)) colors[1] else "black"
73
74	11x	if(network == TRUE) {
75	11x	lineweights = as.matrix(set$line.weights)
76	11x	mean.lineweights = colMeans(lineweights) * networkMultiplier
77
78	11x	plot = ena.plot.network(plot, network = mean.lineweights, colors = base.color)
79		}
80
81	11x	if(points == TRUE) {
82	!	points.for.plot = as.matrix(set$points)
83
84	!	plot = ena.plot.points(enaplot = plot,points = points.for.plot,colors = base.color)
85		}
86
87	11x	if(mean == TRUE) {
88	!	points.for.plot = as.matrix(set$points)
89
90	!	plot = ena.plot.group(plot, points.for.plot, colors = base.color, labels = "Mean", confidence.interval = confidence.interval)
91		}
92
93	11x	else if(TRUE %in% c(network,points, mean) == FALSE) {
94	!	stop("You must set at least one of points, mean, or network to TRUE to obtain a plot.")
95		}
96
97	11x	set$plots[[length(set$plots) + 1]] <- plot
98
99	11x	if(print.plots == TRUE) {
100	!	print(set$plots)
101		}
102
103	11x	return(set)
104		}
105	2x	else if(is.null(groups) == TRUE) {
106	!	unique.groups = unique(data[[groupVar]])
107
108	!	if(length(unique.groups) == 1){
109	!	warning("No groups specified and group variable only contains one unique value. Generating plot for one group.")
110
111	!	group = unique.groups
112
113	!	group.rows = set$points[[groupVar]] == group
114	!	g.plot = ena.plot(enaset = set, title = group)
115	!	base.color = if (!is.null(colors)) colors[1] else "black"
116
117	!	if(network == TRUE) {
118	!	g.lw = as.matrix(set$line.weights)[group.rows, , drop = FALSE]
119	!	g.mean.lw = colMeans(g.lw) * networkMultiplier
120	!	g.plot = ena.plot.network(g.plot, network = g.mean.lw, colors = base.color)
121		}
122
123	!	if(points == TRUE) {
124	!	g.points.for.plot = as.matrix(set$points)[group.rows, , drop = FALSE]
125	!	g.plot = ena.plot.points(enaplot = g.plot,points = g.points.for.plot,colors = base.color)
126		}
127
128	!	if(mean == TRUE) {
129	!	g.points.for.plot = as.matrix(set$points)[group.rows, , drop = FALSE]
130	!	g.plot = ena.plot.group(g.plot, g.points.for.plot, colors = base.color, labels = group, confidence.interval = confidence.interval)
131		}
132
133	!	else if(TRUE %in% c(network,points, mean) == FALSE) {
134	!	stop("You must set at least one of points, mean, or network to TRUE to obtain a plot.")
135		}
136	!	set$plots[[length(set$plots) + 1]] <- g.plot
137
138	!	if(print.plots == TRUE) {
139	!	print(set$plots)
140		}
141
142	!	return(set)
143		}
144		else {
145	!	group1 = unique.groups[1]
146	!	group2 = unique.groups[2]
147
148	!	warning(paste0("No groups specified. Generating plots of first two unique values of group variable: ",group1," and ",group2))
149
150	!	set = ena.plot.subtraction(set = set,
151	!	groupVar = groupVar,
152	!	group1 = group1,
153	!	group2 = group2,
154	!	points = points,
155	!	mean = mean,
156	!	network = network,
157	!	networkMultiplier = networkMultiplier,
158	!	subtractionMultiplier = subtractionMultiplier,
159	!	group1.color = if (!is.null(colors)) colors[1] else "blue",
160	!	group2.color = if (!is.null(colors)) colors[2] else "red",
161	!	confidence.interval = confidence.interval)
162
163
164	!	if(print.plots == TRUE) {
165	!	print(set$plots)
166		}
167
168	!	return(set)
169		}
170		}
171	2x	else if(length(groups) == 1) {
172	!	group = groups
173
174	!	if(any(data[[groupVar]] == group) == FALSE){
175	!	stop("Group column does not contain group1 value!")
176		}
177
178	!	group.rows = set$points[[groupVar]] == group
179	!	g.plot = ena.plot(enaset = set, title = group)
180	!	base.color = if (!is.null(colors)) colors[1] else "black"
181
182	!	if(network == TRUE) {
183	!	g.lw = as.matrix(set$line.weights)[group.rows, , drop = FALSE]
184	!	g.mean.lw = colMeans(g.lw) * networkMultiplier
185
186	!	g.plot = ena.plot.network(g.plot, network = g.mean.lw, colors = base.color)
187		}
188
189	!	if(points == TRUE) {
190	!	g.points.for.plot = as.matrix(set$points)[group.rows, , drop = FALSE]
191	!	g.plot = ena.plot.points(enaplot = g.plot,points = g.points.for.plot,colors = base.color)
192		}
193
194	!	if(mean == TRUE) {
195	!	g.points.for.plot = as.matrix(set$points)[group.rows, , drop = FALSE]
196	!	g.plot = ena.plot.group(g.plot, g.points.for.plot, colors = base.color, labels = group, confidence.interval = confidence.interval)
197		}
198
199	!	else if(TRUE %in% c(network,points, mean) == FALSE) {
200	!	stop("You must set at least one of points, mean, or network to TRUE to obtain a plot.")
201		}
202	!	set$plots[[length(set$plots) + 1]] <- g.plot
203
204	!	if(print.plots == TRUE) {
205	!	print(set$plots)
206		}
207
208	!	return(set)
209		}
210	2x	else if (length(groups) >= 2) {
211	2x	if (length(groups) > 2) {
212	!	warning(paste0("More than two groups specified. Plotting the first two groups: ", groups))
213		}
214
215	2x	groups.missing = groups[which(!groups %in% data[[groupVar]])]
216	2x	if(length(groups.missing) > 0) {
217	!	stop(paste0("Group column does not contain group value(s): ", groups.missing))
218		}
219
220	2x	set = ena.plot.subtraction(
221	2x	set = set,
222	2x	groupVar = groupVar,
223	2x	group1 = groups[1],
224	2x	group2 = groups[2],
225	2x	points = points,
226	2x	mean = mean,
227	2x	network = network,
228	2x	networkMultiplier = networkMultiplier,
229	2x	subtractionMultiplier = subtractionMultiplier,
230	2x	group1.color = if (!is.null(colors)) colors[1] else "blue",
231	2x	group2.color = if (!is.null(colors)) colors[2] else "red",
232	2x	confidence.interval = confidence.interval,
233		...
234		)
235
236	2x	if(print.plots == TRUE) {
237	!	print(set$plots)
238		}
239
240	2x	return(set)
241		}
242		}
243

1		####
2		#' @title ENAset R6class
3		#'
4		#' @docType class
5		#' @importFrom R6 R6Class
6		#' @import data.table
7		#' @export
8
9		#' @field enadata An \code{\link{ENAdata}} object originally used to create the set
10		#' @field points.raw A data frame containing accumulated adjacency (co-occurrence) vectors per unit
11		#' @field points.normed.centered A data frame of centered normed accumulated adjacency (co-occurrence) vectors for each unit
12		#' @field points.rotated A data frame of point positions for number of dimensions specified in ena.make.set (i.e., the centered, normed, and rotated data)
13		#' @field line.weights A data frame of connections strengths per unit (Data frame of normed accumu- lated adjacency (co-occurrence) vectors for each unit)
14		#' @field node.positions - A data frame of positions for each code
15		#' @field codes - A vector of code names
16		#' @field rotation.set - An \code{\link{ENARotationSet}} object
17		#' @field variance - A vector of variance accounted for by each dimension specified
18		#' @field centroids - A matrix of the calculated centroid positions
19		#' @field function.call - The string representation of function called
20		#' @field function.params - A list of all parameters sent to function call
21		#' @field rotation_dists TBD
22		#' @field points.rotated.scaled TBD
23		#' @field points.rotated.non.zero TBD
24		#' @field line.weights.unrotated TBD
25		#' @field line.weights.non.zero TBD
26		#' @field correlations A data frame of spearman and pearson correlations for each dimension specified
27		#' @field center.align.to.origin - align point and centroid centers to origin
28		####
29		ENAset = R6::R6Class("ENAset",
30		public = list(
31
32
33		## Public Functions ----
34		#' Create ENAset
35		#'
36		#' @param enadata TBD
37		#' @param dimensions TBD
38		#' @param norm.by TBD
39		#' @param rotation.by TBD
40		#' @param rotation.params TBD
41		#' @param rotation.set TBD
42		#' @param node.position.method TBD
43		#' @param endpoints.only TBD
44		#' @param center.align.to.origin TBD
45		#' @param ... TBD
46		#'
47		#' @return ENAset
48		initialize = function(
49		enadata,
50		dimensions = 2,
51
52		norm.by = fun_sphere_norm,
53
54		rotation.by = ena.svd.R6,
55		rotation.params = NULL,
56		rotation.set = NULL,
57
58		#center.data = center_data_c, ### made local to run
59		node.position.method = lws.positions.sq.R6,
60		endpoints.only = TRUE,
61		center.align.to.origin = TRUE,
62		...
63		) {
64	13x	self$enadata <- enadata;
65
66	13x	private$dimensions <- dimensions;
67
68	13x	self$codes <- enadata$codes;
69
70	13x	self$function.call <- sys.call(-1);
71
72	13x	self$function.params$norm.by <- norm.by; #was sphere_norm
73		#self$function.params$center.data <- center.data;
74	13x	self$function.params$node.position.method <- node.position.method; #was position.method
75	13x	self$function.params$rotation.by <- rotation.by;
76	13x	self$function.params$rotation.params <- rotation.params;
77	13x	self$function.params$rotation.set <- rotation.set;
78	13x	self$function.params$endpoints.only <- endpoints.only;
79	13x	self$function.params$center.align.to.origin <- center.align.to.origin;
80	13x	private$args <- list(...);
81		},
82
83
84		#' Process ENAset
85		#'
86		#' @return ENASet
87		process = function() {
88	13x	return(private$run())
89		},
90
91		#' Get property from object
92		#'
93		#' @param x character key to retrieve from object
94		#' @return value from object at x
95		get = function(x = "enadata") {
96	!	return(private[[x]])
97		},
98
99		## Public Properties ----
100		rotation_dists = NULL, #leave for now - to be removed for a temp variable
101		enadata = NULL,
102		points.raw = NULL, #was data$raw
103		points.normed.centered = NULL, #was data$centered$normed
104		points.rotated = NULL, #was data$centered$rotated
105		points.rotated.scaled = NULL,
106		points.rotated.non.zero = NULL,
107		line.weights = NULL, #was data$normed
108		line.weights.non.zero = NULL,
109		line.weights.unrotated = NULL,
110		node.positions = NULL, #was nodes$positions$scaled
111		codes = NULL,
112		rotation.set = NULL, ## new - ENARotation object
113		correlations = NULL, #not formerly listed, comes from optimized node positions in egr.positions
114		variance = NULL, #was self$data$centered$latent
115		centroids = NULL,
116		center.align.to.origin = TRUE,
117		function.call = NULL, #new - string reping function call
118		function.params = list( #list containing parameters function was called with
119		norm.by = NULL,
120		node.position.method = NULL,
121		rotation.by = NULL,
122		rotation.params = NULL,
123		endpoints.only = NULL,
124		center.align.to.origin = TRUE
125		)
126		),
127
128		private = list(
129
130		## Private Properties ----
131		args = NULL,
132		data.original = NULL,
133		optim = NULL,
134
135		#moved from public
136		dimensions = 2,
137
138		## Private Functions ----
139		run = function() {
140	13x	df = self$enadata$adjacency.vectors;
141
142		# Backup of ENA data, this is not touched again.
143		#private$data.original = df[,grep("adjacency.code", colnames(df)), with=F];
144	13x	private$data.original = df;
145
146		# carry this out for node positioning
147	13x	self$function.params$center.align.to.origin = self$center.align.to.origin;
148
149		# Copy of the original data, this is used for all
150		# further operations. Unlike, `data.original`, this
151		# is likely to be overwritten.
152	13x	self$points.raw = data.table::copy(private$data.original);
153
154		###
155		# Normalize the raw data using self$function.params$norm.by,
156		# which defaults to calling rENA::.sphere_norm
157		###
158	13x	self$line.weights = self$function.params$norm.by(self$points.raw);
159
160		###
161		# Convert the string vector of code names to their corresponding
162		# co-occurence names and set as colnames for the self$line.weights
163		##
164	13x	codeNames_tri = svector_to_ut(self$enadata$codes);
165
166	13x	colnames(self$line.weights) = codeNames_tri;
167		# set the rownames to that of the original ENAdata file object
168	13x	rownames(self$line.weights) = rownames(df);
169
170	13x	attr(self$line.weights, opts$UNIT_NAMES) = attr(df, opts$UNIT_NAMES) #df[, .SD, with=T, .SDcols=self$enadata$get("unitsBy")];
171		###
172
173
174		###
175		# Center the normed data
176		# FIX - store as $data$centered
177		###
178		#### ISSUE
179	13x	if (self$center.align.to.origin) {
180		# only centers non-zero networks
181	13x	self$points.normed.centered = self$line.weights;
182
183	13x	non_zero_rows <- rowSums(as.matrix(self$line.weights)) != 0;
184	13x	self$points.normed.centered[non_zero_rows,] = center_data_c(self$line.weights[non_zero_rows,]);
185		}
186		else {
187	!	self$points.normed.centered = center_data_c(self$line.weights);
188		}
189	13x	colnames(self$points.normed.centered) = codeNames_tri;
190	13x	rownames(self$points.normed.centered) = rownames(df);
191	13x	attr(self$points.normed.centered, opts$UNIT_NAMES) = attr(self$enadata$adjacency.vectors.raw, opts$UNIT_NAMES)
192
193		###
194
195		###
196		# Generate and Assign the rotation set
197		###
198	13x	if(is.function(self$function.params$rotation.by) && is.null(self$function.params$rotation.set)) {
199	8x	self$rotation.set = do.call(self$function.params$rotation.by, list(self, self$function.params$rotation.params));
200		}
201	5x	else if (!is.null(self$function.params$rotation.set)) {
202	4x	if(is(self$function.params$rotation.set, "ENARotationSet")) {
203	2x	print("Using custom rotation.set.")
204
205	2x	self$rotation.set = self$function.params$rotation.set;
206		} else {
207	2x	stop("Supplied rotation.set is not an instance of ENARotationSet")
208		}
209		}
210		else {
211	1x	stop("Unable to find or create a rotation set")
212		}
213		###
214
215		###
216		# Generated the rotated points
217		###
218	10x	self$points.rotated = self$points.normed.centered %*% self$rotation.set$rotation;
219	10x	private$dimensions = min(private$dimensions, ncol(self$points.rotated))
220	10x	attr(self$points.rotated, opts$UNIT_NAMES) = attr(self$points.normed.centered, opts$UNIT_NAMES);
221		###
222
223		###
224		# Calculate node positions
225		# - The supplied methoed is responsible is expected to return a list
226		# with two keys, "node.positions" and "centroids"
227		###
228	10x	if(!is.null(self$rotation.set) && is.null(self$function.params$rotation.set)) {
229	8x	positions = self$function.params$node.position.method(self);
230	8x	if(all(names(positions) %in% c("node.positions","centroids"))) {
231	7x	self$node.positions = positions$node.positions
232	7x	self$centroids = positions$centroids
233
234	7x	self$rotation.set$node.positions = positions$node.positions
235		}
236		else {
237	1x	stop(paste(
238	1x	"The node position method didn't return back the expected objects:",
239	1x	"\tExpected: c('node.positions','centroids')",
240	1x	paste("\tReceived: ", names(positions), sep=""),
241	1x	sep = "\n"
242		));
243		}
244		}
245		else {
246	2x	if (!is.null(self$function.params$rotation.set) && !is.null(self$function.params$rotation.set$node.positions)) {
247	1x	self$node.positions = self$function.params$rotation.set$node.positions
248		}
249		else {
250	1x	stop("Unable to determine the node positions either by calculating
251	1x	them using `node.position.method` or using a supplied
252	1x	`rotation.set`");
253		}
254		}
255		###
256
257		###
258		# Variance
259		###
260	8x	variance.of.rotated.data = var(self$points.rotated)
261	8x	diagonal.of.variance.of.rotated.data = as.vector(diag(variance.of.rotated.data))
262	8x	self$variance = diagonal.of.variance.of.rotated.data/sum(diagonal.of.variance.of.rotated.data)
263
264	8x	return(self);
265		}
266		)
267		)

1		### centering for projection
2
3		center.projection = function(lws, rotation) {
4	4x	if(is.null(rotation) \|\| is.null(rotation$center.vec)) {
5	1x	stop("Supplied value for `rotation` does not have a center vector");
6		}
7	3x	mean_ <- rotation$center.vec;
8
9	3x	centered.lws <- t(lws) - mean_;
10
11	3x	return( t(centered.lws) );
12		}
13
14		# og_lws = as.matrix(set.new$line.weights)
15		# set.new$rotation$center.vec = colMeans(og_lws)
16		#
17		#
18		#
19		# test = center.projection(lws = og_lws,set.new)
20		# centered_og = rENA:::center_data_c(as.matrix(set.new$line.weights))
21		# #
22		# View(test == centered_og) ### DIFFERENCE IN ROUNDING
23		# View(round(test,3) == round(centered_og,3))

1		#' Compute Between-Group Scatter Matrix
2		#'
3		#' This function calculates the between-group scatter matrix (\code{SB}) for a given numeric matrix and grouping variable.
4		#'
5		#' @param A A numeric matrix of dimensions \code{m x n}, where rows represent observations and columns represent features.
6		#' @param g A grouping variable of length \code{m}, either a factor or a character vector, indicating group membership for each observation.
7		#'
8		#' @return A numeric matrix representing the between-group scatter matrix (\code{SB}).
9		#'
10		#' @details
11		#' The function computes the total mean of the matrix \code{A} and the mean for each group defined by \code{g}.
12		#' It then calculates the between-group scatter matrix by summing the outer product of the mean differences, weighted by the group sizes.
13		#'
14		#' @examples
15		#' # Example usage:
16		#' A <- matrix(rnorm(20), nrow = 5, ncol = 4)
17		#' g <- factor(c("A", "B", "A", "B", "A"))
18		#' SB <- rENA:::compute_SB(A, g)
19		compute_SB <- function(A, g) {
20	!	if (!is.matrix(A)) stop("A must be a numeric matrix.")
21	!	if (length(g) != nrow(A)) stop("g must have the same length as number of rows in A.")
22
23	2x	g <- as.factor(g);
24	2x	groups <- levels(g);
25	2x	n_features <- ncol(A);
26	2x	m <- nrow(A);
27
28		# Total mean
29	2x	mu_total <- colMeans(A);
30
31		# Initialize matrices
32	2x	SB <- matrix(0, n_features, n_features);
33
34	2x	for (grp in groups) {
35	4x	idx <- which(g == grp);
36	4x	A_grp <- A[idx, , drop = FALSE];
37	4x	n_g <- nrow(A_grp);
38	4x	mu_g <- colMeans(A_grp);
39
40		# Between-group component
41	4x	mean_diff <- matrix(mu_g - mu_total, ncol = 1);
42	4x	SB <- SB + n_g * (mean_diff %*% t(mean_diff));
43		}
44
45	2x	return(SB);
46		}
47
48
49		#' Generalized Means Rotation (GMR) with optional subsetting and interaction control
50		#'
51		#' Computes a rotation (direction) `r` representing the contribution of the
52		#' first column of `X` to the multivariate ENA matrix `V`. Supports optional
53		#' subsetting by `groups`, optional inclusion of interaction terms when
54		#' computing adjusted contributions.
55		#'
56		#' @param V Numeric ENA matrix (units × connections) ready for rotation.
57		#' @param X Data frame or matrix of predictors; the first column is the target.
58		#' @param groups Optional vector specifying target groups to subset. If `NULL`
59		#' (default), all rows are used.
60		#' @param alpha Elastic-net mixing parameter forwarded to `get_x1_main_effect`
61		#' (default `1` — Lasso).
62		#' @param lambda Lambda selection for `cv.glmnet` forwarded to
63		#' `get_x1_main_effect` (default `"lambda.min"`).
64		#' @param interactions Logical; if `TRUE` (default) interactions are included when computing the adjusted contribution.
65		#' @param verbose Logical; if `TRUE` (default) the function emits messages about
66		#' fails or successes.
67		#'
68		#' @return A numeric vector `r` (length = ncol(V)) giving the normalized
69		#' rotation direction. Attributes attached:
70		#' \describe{
71		#' \item{`target`}{The full-length target vector (un-subsetted).}
72		#' \item{`Vx1`}{The unadjusted fitted values (`lm(V ~ target)`) embedded in
73		#' a full-length matrix (rows outside subset filled with zeros).}
74		#' }
75		#' If no valid direction can be found (including SVD failure), returns `NULL`
76		#' and issues a warning.
77		#'
78		#' @examples
79		#' \dontrun{
80		#' set.seed(1)
81		#' V <- matrix(rnorm(200), nrow = 40)
82		#' X <- data.frame(group = rep(letters[1:4], each = 10),
83		#' x2 = rnorm(40), x3 = rnorm(40))
84		#' r_all <- gmr2(V, X)
85		#' r_subset <- gmr2(V, X, groups = c("a", "b"), interactions = TRUE)
86		#' }
87		#'
88		#' @seealso [get_x1_main_effect()]
89		#' @importFrom stats lm model.matrix
90		#' @importFrom glmnet cv.glmnet
91		#' @export
92
93		gmr <- function(V, X, groups = NULL, alpha = 1, lambda = "lambda.min",
94		interactions = TRUE, verbose = TRUE) {
95		# prepare a function for almost zero check
96	2x	is_zero <- function(x, tol = 1e-12) all(abs(x) < tol)
97		# get full target variable, namely, the first variable in X
98	2x	target_full <- X[[1]]
99	!	if (is.list(target_full)) target_full <- unlist(target_full, recursive = FALSE)
100	2x	target_full <- as.vector(target_full)
101
102		# --- Fail if target is constant ---
103	2x	unique_targets <- unique(target_full)
104	2x	if (length(unique_targets) == 1) {
105	!	warning("Target variable is constant; returning NULL.")
106	!	return(NULL)
107		}
108
109		# --- Subset by groups if selected groups are provided ---
110	2x	if (!is.null(groups)) {
111	!	valid_groups <- intersect(groups, unique(target_full))
112	!	if (length(valid_groups) > 1) {
113	!	subset_idx <- which(target_full %in% valid_groups)
114	!	V_sub <- V[subset_idx, , drop = FALSE]
115	!	X_sub <- X[subset_idx, , drop = FALSE]
116	!	target_sub <- target_full[subset_idx]
117		} else {
118	!	warning("Less than 2 valid groups selected; returning NULL.")
119	!	return(NULL)
120		}
121		} else { # use full data if no groups are selected
122	2x	V_sub <- V
123	2x	X_sub <- X
124	2x	target_sub <- target_full
125	2x	subset_idx <- NULL
126		}
127
128		# --- Base regression model ---
129	2x	model <- lm(V_sub ~ target_sub)
130	2x	Vx1_sub <- model$fitted.values
131
132		# --- Compute contributions via Lasso (if covariates exist) ---
133	2x	Vx_sub <- NULL
134	2x	if (ncol(X_sub) == 1) { # no corariates, use base model
135	!	Vx_sub <- Vx1_sub
136		} else { # covariates exist, use Lasso model
137	2x	Vx_sub <- get_x1_main_effect(V_sub, X_sub, alpha = alpha,
138	2x	lambda = lambda, include_interactions = interactions)
139		}
140	2x	if (is_zero(Vx_sub)) {
141	!	warning("Regression resulted in zeor contribution; returning NULL.")
142	!	return(NULL)
143		}
144		# --- Compute rotation direction r ---
145	2x	r <- NULL
146	2x	if (is.numeric(target_sub)) {
147	1x	if (verbose) message("Computing direction for numeric target...")
148	1x	model = model <- lm(Vx_sub ~ target_sub)
149	1x	beta <- model$coefficients[2,]
150	1x	if (is_zero(beta)) {
151	!	warning("Numerical target with zero beta; returning NULL.")
152	!	return(NULL)
153		} else {
154	1x	r <- beta
155		}
156		} else {
157	1x	if (verbose) message("Computing direction for categorical target...")
158	1x	sb <- compute_SB(Vx_sub, target_sub)
159	1x	r <- tryCatch(svd(sb)$v[, 1], error = function(e) NULL)
160		}
161
162		# --- Final SVD fallback if r is NULL or zero ---
163	2x	if (is.null(r) \|\| all(r == 0)) {
164	!	warning("Uable to compute any valid direction; returning NULL.")
165	!	return(NULL)
166		}
167
168		# --- Normalize ---
169	2x	r <- r / sqrt(sum(r^2))
170
171		# --- Build full-length Vx1 ---
172	2x	Vx1_full <- matrix(0, nrow = nrow(V), ncol = ncol(V))
173	2x	Vx1_full[subset_idx %\|\|% seq_len(nrow(V)), ] <- Vx_sub
174	2x	colnames(Vx1_full) <- colnames(V)
175
176		# --- Attach metadata ---
177	2x	attr(r, "target") <- target_full
178	2x	attr(r, "Vx1") <- Vx1_full
179		#attr(r, "fallback_stage") <- fallback_stage
180
181	2x	if (verbose) message(" gmr completed successfully ")
182	2x	return(r)
183		}
184		# the fallback mechanism is created but not used.
185		gmr_with_fallbacks <- function(V, X, groups = NULL, alpha = 1, lambda = "lambda.min",
186		interactions = TRUE, verbose = TRUE) {
187		# prepare a function for almost zero check
188	!	is_zero <- function(x, tol = 1e-12) all(abs(x) < tol)
189		# get full target variable
190	!	target_full <- X[[1]]
191	!	if (is.list(target_full)) target_full <- unlist(target_full, recursive = FALSE)
192	!	target_full <- as.vector(target_full)
193
194		# --- Early SVD fallback if target is constant ---
195	!	unique_targets <- unique(target_full)
196	!	if (length(unique_targets) == 1) {
197	!	if (verbose) message("Target variable is constant; falling back to SVD(V)")
198	!	r <- tryCatch(svd(V)$v[, 1], error = function(e) NULL)
199	!	fallback_stage <- "constant target SVD"
200
201	!	if (is.null(r)) {
202	!	warning("Unable to compute any valid direction; returning NULL.")
203	!	return(NULL)
204		}
205
206	!	r <- r / sqrt(sum(r^2))
207	!	Vx1_full <- matrix(0, nrow = nrow(V), ncol = ncol(V))
208	!	colnames(Vx1_full) <- colnames(V)
209
210	!	attr(r, "target") <- target_full
211	!	attr(r, "Vx1") <- Vx1_full
212	!	attr(r, "fallback_stage") <- fallback_stage
213	!	return(r)
214		}
215
216		# --- Subset by groups if provided ---
217	!	if (!is.null(groups)) {
218	!	valid_groups <- intersect(groups, unique(target_full))
219	!	if (length(valid_groups) > 1) {
220	!	subset_idx <- which(target_full %in% valid_groups)
221	!	V_sub <- V[subset_idx, , drop = FALSE]
222	!	X_sub <- X[subset_idx, , drop = FALSE]
223	!	target_sub <- target_full[subset_idx]
224		} else {
225	!	if (verbose) message("Less than 2 valid groups selected; using all rows instead")
226	!	V_sub <- V
227	!	X_sub <- X
228	!	target_sub <- target_full
229	!	subset_idx <- NULL
230		}
231		} else {
232	!	V_sub <- V
233	!	X_sub <- X
234	!	target_sub <- target_full
235	!	subset_idx <- NULL
236		}
237
238		# --- Base regression model ---
239	!	model <- lm(V_sub ~ target_sub)
240	!	Vx1_sub <- model$fitted.values
241
242		# --- Compute contributions via Lasso (if covariates exist) ---
243	!	Vx_sub <- NULL
244	!	fallback_stage <- NULL
245
246	!	if (ncol(X_sub) == 1) {
247	!	Vx_sub <- Vx1_sub
248	!	fallback_stage <- "no covariates"
249		} else {
250	!	Vx_sub <- get_x1_main_effect(V_sub, X_sub, alpha = alpha,
251	!	lambda = lambda, include_interactions = interactions)
252	!	if (is_zero(Vx_sub)) {
253	!	if (verbose) message("⚠️ Lasso with interactions gave zero contribution; trying without interactions.")
254	!	Vx_sub <- get_x1_main_effect(V_sub, X_sub, alpha = alpha,
255	!	lambda = lambda, include_interactions = FALSE)
256	!	fallback_stage <- "no interactions"
257		} else {
258	!	fallback_stage <- if (interactions) "with interactions" else "no interactions"
259		}
260
261	!	if (is_zero(Vx_sub)) {
262	!	if (verbose) message("⚠️ Lasso without interactions gave zero contribution; falling back to simple model.")
263	!	Vx_sub <- Vx1_sub
264	!	fallback_stage <- "no covariates"
265		}
266		}
267
268		# --- Compute rotation direction r ---
269	!	if (is.numeric(target_sub)) {
270	!	if (verbose) message("Computing direction for numeric target...")
271	!	model = model <- lm(Vx_sub ~ target_sub)
272	!	beta <- model$coefficients[2,]
273	!	if (is_zero(beta)) {
274	!	if (verbose) message("⚠️ Beta is zero; falling back to SVD(V_sub).")
275	!	r <- tryCatch(svd(Vx_sub)$v[, 1], error = function(e) NULL)
276	!	fallback_stage <- "SVD fallback"
277		} else {
278	!	r <- beta / sqrt(sum(beta^2))
279		}
280		} else {
281	!	if (verbose) message("Computing direction for categorical target...")
282	!	sb <- compute_SB(Vx_sub, target_sub)
283	!	r <- tryCatch(svd(sb)$v[, 1], error = function(e) NULL)
284	!	fallback_stage <- "SVD of SB"
285		}
286
287		# --- Final SVD fallback if r is NULL or zero ---
288	!	if (is.null(r) \|\| all(r == 0)) {
289	!	warning("⚠️ All levels failed; using SVD(V_sub)$v[,1] as final fallback.")
290	!	r <- tryCatch(svd(V_sub)$v[, 1], error = function(e) NULL)
291	!	fallback_stage <- "final SVD"
292		}
293
294	!	if (is.null(r)) {
295	!	warning("❌ Unable to compute any valid direction; returning NULL.")
296	!	return(NULL)
297		}
298
299		# --- Normalize ---
300	!	r <- r / sqrt(sum(r^2))
301
302		# --- Build full-length Vx1 ---
303	!	Vx1_full <- matrix(0, nrow = nrow(V), ncol = ncol(V))
304	!	Vx1_full[subset_idx %\|\|% seq_len(nrow(V)), ] <- Vx_sub
305	!	colnames(Vx1_full) <- colnames(V)
306
307		# --- Attach metadata ---
308	!	attr(r, "target") <- target_full
309	!	attr(r, "Vx1") <- Vx1_full
310	!	attr(r, "fallback_stage") <- fallback_stage
311
312	!	if (verbose) message("✅ gmr completed successfully (", fallback_stage, ").")
313
314	!	return(r)
315		}
316
317
318		gmr_bk1 <- function(V,X) {
319		# matrix, ENA set points for projection
320		# data frame containing all predictor variables, first as target
321	!	Vx <- NULL; # main effect of X1 adjusted for covariates
322	!	r <- NULL; # return direction
323	!	Vx1 <- NULL; # main effect of X1 without adjustment
324	!	target <- X[[1]] # always returns the column itself
325
326	!	if (is.list(target)) { # flatten if it's a list-column
327	!	target <- unlist(target, recursive = FALSE)
328		}
329	!	target <- as.vector(target) # ensure atomic
330
331	!	model <- lm(V ~ target)
332		#model <- lm(V ~ X[, 1]); # simple linear model on X[1]
333	!	Vx1 <- model$fitted.values;
334	!	if(ncol(X)==1) { # simple linear model if there is no covariates
335	!	Vx <- Vx1;
336		}
337		else { # Lasso model adjusted for covariates
338	!	Vx <- get_x1_main_effect(V,X);
339		}
340	!	if (is.numeric(target)) { # compute direction for numerical variable
341		# Reuse the coefficients from the initial model instead of rebuilding
342	!	print("target is numeric")
343	!	beta <- coef(model)[2,]; # Second coefficient is for the slope
344	!	r <- beta / sqrt(sum(beta^2));
345		}
346		else {
347	!	print("target is NOT numeric")
348	!	sb <- compute_SB(Vx, target);
349
350	!	r <- svd(sb)$v[, 1];
351
352		}
353		# project r to span of row vectors of V
354		#model <- lm(r ~ t(V) + 0)
355		#r<- Vx1 <- model$fitted.values;
356		#r <- t(V) %% coef(lm(r ~ t(V) + 0)); # Projection: r ~ V^T %% beta
357		#r <- r / sqrt(sum(r^2));
358	!	attr(r, "target") <- target
359	!	attr(r, "Vx1") <- Vx1# target contribution
360	!	return(r);
361		}
362
363		gmr2_bk <- function(V, X, groups = NULL) {
364	!	target_full <- X[[1]]
365	!	if (is.list(target_full)) target_full <- unlist(target_full, recursive = FALSE)
366	!	target_full <- as.vector(target_full)
367
368	!	subset_idx <- NULL
369	!	if (!is.null(groups)) {
370	!	if (all(groups %in% unique(target_full))) {
371	!	subset_idx <- which(target_full %in% groups)
372	!	V_sub <- V[subset_idx, , drop = FALSE]
373	!	X_sub <- X[subset_idx, , drop = FALSE]
374	!	target_sub <- target_full[subset_idx]
375		} else {
376	!	warning("Specified groups not found; using all data.")
377	!	V_sub <- V
378	!	X_sub <- X
379	!	target_sub <- target_full
380		}
381		} else {
382	!	V_sub <- V
383	!	X_sub <- X
384	!	target_sub <- target_full
385		}
386
387	!	model <- lm(V_sub ~ target_sub)
388	!	Vx1_sub <- model$fitted.values
389
390	!	if (ncol(X_sub) == 1) {
391	!	Vx_sub <- Vx1_sub
392		} else {
393	!	Vx_sub <- get_x1_main_effect(V_sub, X_sub)
394		}
395
396	!	if (is.numeric(target_sub)) {
397	!	beta <- coef(model)[2, ]
398	!	r <- beta / sqrt(sum(beta^2))
399		} else {
400	!	sb <- compute_SB(Vx_sub, target_sub)
401	!	r <- svd(sb)$v[, 1]
402		}
403
404		# Build full Vx1: fill subset rows, zeros elsewhere
405	!	Vx1_full <- matrix(0, nrow = nrow(V), ncol = ncol(V))
406	!	Vx1_full[subset_idx %\|\|% seq_len(nrow(V)), ] <- Vx1_sub
407	!	colnames(Vx1_full) <- colnames(V)
408
409	!	attr(r, "target") <- target_full
410	!	attr(r, "Vx1") <- Vx1_full
411	!	return(r)
412		}
413
414		#' Extract the Main Effect of X on V with Optional Interactions
415		#'
416		#' Computes the main-effect contribution of the first column of `X` (the
417		#' "target") to the multivariate ENA matrix `V`. The function fits penalized
418		#' regression models (via glmnet) and can optionally include interactions
419		#' between the target and other covariates. It returns the fitted contribution
420		#' matrix (units × connections).
421		#'
422		#' The function can compute contributions using either only main-effect columns
423		#' (no interactions) or main-effect plus all interaction columns that start
424		#' with the target name. If no matching columns are found or all fitted
425		#' coefficients are zero, the function returns a zero matrix and emits a
426		#' warning.
427		#'
428		#' @param V A numeric matrix (units × connections) of dependent variables.
429		#' @param X A data frame or matrix of predictors / covariates. The first
430		#' column is treated as the target variable whose contribution will be extracted.
431		#' @param alpha Elastic-net mixing parameter passed to `cv.glmnet`. `alpha = 1`
432		#' (default) is Lasso; `alpha = 0` is ridge.
433		#' @param lambda Character or numeric. Which lambda from the `cv.glmnet` fit to
434		#' use; e.g. `"lambda.min"` (default) or `"lambda.1se"`, or a numeric value.
435		#' @param include_interactions Logical; if `TRUE`, include main-effect columns
436		#' and all interaction columns that begin with the target name (default:
437		#' `FALSE`, only main-effect columns).
438		#'
439		#' @return A numeric matrix with the same dimensions as `V` containing the
440		#' estimated contribution of `X[,1]` to each response. If no columns are
441		#' matched or all coefficients are zero, a zero matrix is returned and a
442		#' warning is issued.
443		#'
444		#' @details
445		#' Internally this function builds `model.matrix(~ .^2, data = X)` to obtain
446		#' main effects and pairwise interactions. It sets a `penalty.factor` that
447		#' leaves the target-related columns unpenalized (0) and fits a multivariate
448		#' `glmnet` (`family = "mgaussian"`). The returned matrix is dense (numeric).
449		#'
450		#' @param ... Additional arguments are not used (kept for forward compatibility).
451		#'
452		#' @examples
453		#' \dontrun{
454		#' set.seed(1)
455		#' V <- matrix(rnorm(50), ncol = 5)
456		#' X <- data.frame(CONFIDENCE = rnorm(10), Condition = factor(rep(1:2, 5)))
457		#' # main effects only
458		#' Vx_main <- get_x1_main_effect(V, X, include_interactions = FALSE)
459		#' # include interactions
460		#' Vx_full <- get_x1_main_effect(V, X, include_interactions = TRUE, alpha = 0) # ridge
461		#' }
462		#'
463		#' @seealso [gmr2()] for the rotation routine that uses this function.
464		#' @importFrom stats lm model.matrix
465		#' @importFrom glmnet cv.glmnet
466		#' @export
467
468		get_x1_main_effect <- function(V, X, alpha = 1, lambda = "lambda.min", include_interactions = FALSE) {
469	3x	x1_name <- colnames(X)[1]
470
471		# 1. Formula & Model Matrix
472	3x	formula_str <- if (include_interactions) "~ .^2" else "~ ."
473	3x	mm <- model.matrix(as.formula(formula_str), data = X)[, -1, drop = FALSE]
474
475		# 2. Identify Main Effect Columns for x1
476	3x	safe_x1 <- gsub("([.\|()\\^{}+$*?]\|\\[\|\\])", "\\\\\\1", x1_name)
477	3x	x1_main_regex <- paste0("^", safe_x1, "[^:]*$")
478	3x	x1_cols <- grep(x1_main_regex, colnames(mm))
479
480	3x	if (length(x1_cols) == 0) {
481	!	warning("No main effect columns found for X[,1]; returning zeros.")
482	!	return(matrix(0, nrow = nrow(V), ncol = ncol(V), dimnames = list(NULL, colnames(V))))
483		}
484
485		# 3. Penalty Factors
486	3x	p <- ncol(mm)
487	3x	penalty_factors <- rep(1, p)
488	3x	penalty_factors[x1_cols] <- 0
489
490		# 4. Fitting Logic
491	3x	x1_contribution <- matrix(0, nrow = nrow(V), ncol = ncol(V), dimnames = list(NULL, colnames(V)))
492	3x	use_ols <- (p <= (nrow(X) - 10)) # Heuristic: Use OLS only if we have enough degrees of freedom
493
494	3x	if (!use_ols) {
495	!	fit <- tryCatch(
496		# We add lower.limits/upper.limits or tiny penalty to ensure x1 is NEVER zero if it has signal
497	!	glmnet::cv.glmnet(x = mm, y = V, family = "mgaussian",
498	!	alpha = alpha, penalty.factor = penalty_factors),
499	!	error = function(e) NULL
500		)
501
502	!	if (!is.null(fit)) {
503	!	coefs_list <- coef(fit, s = lambda)
504		# coefs_list is a list of sparse matrices (one per response)
505	!	for (i in seq_along(coefs_list)) {
506		# Extract coefs, skipping intercept ([1,])
507		# Force to numeric to avoid sparse matrix indexing issues
508	!	beta_all <- as.matrix(coefs_list[[i]])[-1, , drop = FALSE]
509	!	beta_x1 <- beta_all[x1_cols, , drop = FALSE]
510	!	x1_contribution[, i] <- mm[, x1_cols, drop = FALSE] %*% beta_x1
511		}
512	!	return(x1_contribution)
513		}
514	!	use_ols <- TRUE
515		}
516
517	3x	if (use_ols) {
518	3x	fit_ols <- lm(V ~ mm)
519		# as.matrix handles the 'incorrect number of dimensions' for single response
520	3x	beta_ols <- as.matrix(coef(fit_ols))[-1, , drop = FALSE]
521	3x	beta_x1_ols <- beta_ols[x1_cols, , drop = FALSE]
522
523		# Handle NAs that OLS produces for rank-deficient matrices
524	3x	beta_x1_ols[is.na(beta_x1_ols)] <- 0
525	3x	x1_contribution <- mm[, x1_cols, drop = FALSE] %*% beta_x1_ols
526		}
527
528	3x	return(x1_contribution)
529		}
530

1		##
2		#' @title Plot points on an ENAplot
3		#'
4		#' @description Plot all or a subset of the points of an ENAplot using the plotly plotting library
5		#'
6		#' @export
7		#'
8		#' @param enaplot \code{\link{ENAplot}} object to use for plotting
9		#' @param points A dataframe of matrix where the first two column are X and Y coordinates
10		#' @param point.size A data.frame or matrix where the first two column are X and Y coordinates of points to plot in a projected ENA space defined in ENAplot
11		#' @param labels A character vector of point labels, length nrow(points); default: NULL
12		#' @param confidence.interval A character determining markings to use for confidence intervals, choices: none, box, crosshair, default: none
13		#' @param outlier.interval A character determining markings to use for outlier interval, choices: none, box, crosshair, default: none
14		#' @param confidence.interval.values A matrix/dataframe where columns are CI x and y values for each point
15		#' @param outlier.interval.values A matrix/dataframe where columns are OI x and y values for each point
16		#' @param shape A character which determines the shape of point markers, choices: square, triangle, diamond, circle, default: circle
17		#' @param colors A character vector of the point marker colors; if one given it is used for all, otherwise must be same length as points; default: black
18		#' @param label.offset character: top left (default), top center, top right, middle left, middle center, middle right, bottom left, bottom center, bottom right
19		#' @param label.group A string used to group the labels in the legend. Items plotted with the same label.group will show/hide together when clicked within the legend.
20		#' @param label.font.size An integer which determines the font size for point labels, default: enaplot$font.size
21		#' @param label.font.color A character which determines the color of label font, default: enaplot$font.color
22		#' @param label.font.family A character which determines label font type, choices: Arial, Courier New, Times New Roman, default: enaplot$font.family
23		#' @param show.legend Logical indicating whether to show the point labels in the in legend
24		#' @param legend.name Character indicating the name to show above the plot legend
25		#' @param texts [TBD]
26		#' @param ... additional parameters addressed in inner function
27		#'
28		#'
29		#' @seealso \code{\link{ena.plot}}, \code{\link{ENAplot}}, \code{\link{ena.plot.group}}
30		#'
31		#' @examples
32		#' data(RS.data)
33		#'
34		#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
35		#' 'Client.and.Consultant.Requests','Design.Reasoning','Collaboration');
36		#'
37		#' accum = ena.accumulate.data(
38		#' units = RS.data[,c("UserName","Condition")],
39		#' conversation = RS.data[,c("Condition","GroupName")],
40		#' metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre","CONFIDENCE.Post")],
41		#' codes = RS.data[,codeNames],
42		#' window.size.back = 4
43		#' )
44		#'
45		#' set = ena.make.set(
46		#' enadata = accum,
47		#' rotation.by = ena.rotate.by.mean,
48		#' rotation.params = list(
49		#' accum$meta.data$Condition=="FirstGame",
50		#' accum$meta.data$Condition=="SecondGame"
51		#' )
52		#' )
53		#'
54		#' plot = ena.plot(set)
55		#'
56		#' group1.points = set$points[set$meta.data$Condition == "FirstGame",]
57		#' group2.points = set$points[set$meta.data$Condition == "SecondGame",]
58		#' plot = ena.plot.points(plot, points = group1.points);
59		#' plot = ena.plot.points(plot, points = group2.points);
60		#' print(plot);
61		#'
62		#' @return \code{\link{ENAplot}} The ENAplot provided to the function, with its plot updated to include the new points.
63		##
64		ena.plot.points = function(
65		enaplot,
66
67		points = NULL, #vector of unit names or row indices
68		point.size = enaplot$point$size,
69		labels = NULL, #unique(enaplot$enaset$enadata$unit.names),
70		label.offset = "top left",
71		label.group = NULL,
72
73		label.font.size = NULL, #enaplot$get("font.size"),
74		label.font.color = NULL, #enaplot$get("font.color"),
75		label.font.family = NULL, #enaplot$get("font.family"),
76
77		shape = "circle",
78		colors = NULL, # c("blue"), #rep(I("black"), nrow(points)),
79
80		confidence.interval.values = NULL,
81		confidence.interval = c("none", "crosshairs", "box"),
82
83		outlier.interval.values = NULL,
84		outlier.interval = c("none", "crosshairs", "box"),
85		show.legend = T,
86		legend.name = "Points",
87		texts = NULL,
88		...
89		) {
90		###
91		# Parameter Checking and Cleaning
92		###
93	15x	env = environment();
94	15x	for(n in c("font.size", "font.color", "font.family")) {
95	45x	if(is.null(get(paste0("label.",n))))
96	43x	env[[paste0("label.",n)]] = enaplot$get(n);
97		}
98
99	15x	if(is.null(points)) {
100		# stop("Must provide points to plot.")
101	5x	points = enaplot$enaset$points
102		}
103
104	15x	if(is(points, "numeric")){
105	1x	points = matrix(points);
106	1x	dim(points) = c(1,nrow(points))
107	1x	points.layout = data.table::data.table(points);
108		}
109	14x	else if (is.data.table(points)) {
110		# points.layout = remove_meta_data(points)
111	12x	points.layout = data.table::copy(points)
112		}
113		else {
114	2x	points.layout = data.table::data.table(points);
115		}
116
117	15x	if(!is.character(label.font.family)) {
118	1x	label.font.family = enaplot$get("font.family");
119		}
120
121	15x	confidence.interval = match.arg(confidence.interval);
122	15x	outlier.interval = match.arg(outlier.interval);
123
124		# shape = match.arg(shape);
125	15x	valid.shapes = c("circle", "square", "triangle-up", "diamond");
126	15x	if(!all(shape %in% valid.shapes))
127	1x	stop(sprintf( "Unrecognized shapes: %s", paste(unique(shape[!(shape %in% valid.shapes)]), collapse = ", ") ))
128	14x	if(length(shape) == 1)
129	14x	shape = rep(shape, nrow(points.layout))
130
131	14x	valid.label.offsets = c("top left","top center","top right","middle left","middle center","middle right","bottom left","bottom center","bottom right");
132	14x	if(!all(label.offset %in% valid.label.offsets))
133	1x	stop(sprintf( "Unrecognized label.offsets: %s", paste(unique(label.offset[!(label.offset %in% valid.label.offsets)]), collapse = ", ") ))
134	13x	if(length(label.offset) == 1)
135	13x	label.offset = rep(label.offset, nrow(points.layout))
136
137	13x	if(grepl("^c", confidence.interval) && grepl("^c", outlier.interval)) {
138	1x	message("Confidence Interval and Outlier Interval cannot both be crosshair");
139	1x	message("Plotting Outlier Interval as box");
140	1x	outlier.interval = "box";
141		}
142
143	13x	if(length(colors) == 1) {
144	1x	colors = rep(colors, nrow(points.layout))
145		}
146	13x	if(length(point.size) == 1)
147	13x	point.size = rep(point.size, nrow(points.layout))
148	13x	if(is.null(labels))
149	13x	show.legend = F
150		###
151		# END: Parameter Checking and Cleaning
152		###
153
154		###
155		# Set error value for CI\|OI crosshair on plot
156		###
157	13x	error = list(x = list(visible=T, type="data"), y = list(visible=T, type="data"));
158	13x	int.values = NULL;
159	13x	if(grepl("^c", confidence.interval) && !is.null(confidence.interval.values)) {
160	2x	int.values = confidence.interval.values;
161		}
162	11x	else if(grepl("^c", outlier.interval) && !is.null(outlier.interval.values)) {
163	1x	int.values = outlier.interval.values;
164		}
165	13x	error$x$array = int.values[, 1];
166	13x	error$y$array = int.values[, 2];
167		###
168		# END: Set error value for crosshair on plot
169		###
170
171		###
172		# Set box value for CI\|OI box on plot
173		#####
174	13x	box.values = NULL;
175	13x	if(grepl("^b", confidence.interval) && !is.null(confidence.interval.values)) {
176	1x	box.values = confidence.interval.values;
177	1x	box.label = "Conf. Int.";
178		}
179	13x	if(grepl("^b", outlier.interval) && !is.null(outlier.interval.values)) {
180	2x	box.values = outlier.interval.values;
181	2x	box.label = "Outlier Int.";
182		}
183		######
184		# END: Set box value for CI\|OI box on plot
185		###
186
187		###
188		# Plot
189		#####
190	13x	points.matrix = remove_meta_data(points.layout)
191	13x	colnames(points.matrix) = paste0("X", rep(1:ncol(points.matrix)));
192	13x	this.max = max(points.matrix);
193	13x	for(m in 1:nrow(points.matrix)) {
194	155x	enaplot$plot = plotly::add_trace(
195	155x	p = enaplot$plot,
196	155x	data = points.matrix[m,],
197	155x	type ="scatter",
198	155x	x = ~X1, y = ~X2,
199	155x	mode = "markers+text",
200	155x	marker = list(
201	155x	symbol = shape[m],
202	155x	color = colors[m],
203	155x	size = point.size[m]
204		),
205	155x	error_x = error$x, error_y = error$y,
206	155x	showlegend = show.legend,
207		# legendgroup = label.group,
208		# legendgroup = ifelse(!is.null(box.label), labels[1], NULL),
209	155x	name = labels[m],
210	155x	text = texts[m], #labels[m],
211	155x	textfont = list(
212	155x	family = label.font.family,
213	155x	size = label.font.size,
214	155x	color = label.font.color
215		),
216	155x	legendgroup = legend.name,
217	155x	textposition = label.offset[m],
218	155x	hoverinfo = "x+y+name"
219		)
220		}
221
222	13x	if(!is.null(box.values)) {
223	3x	boxv = data.frame(
224	3x	X1 = c(box.values[1,1], box.values[2,1], box.values[2,1], box.values[1,1] ,box.values[1,1]),
225	3x	X2 = c(box.values[1,2], box.values[1,2], box.values[2,2], box.values[2,2], box.values[1,2])
226		)
227	3x	this.max = max(boxv, this.max)
228	3x	enaplot$plot = plotly::add_trace(
229	3x	p = enaplot$plot,
230	3x	data = boxv,
231	3x	type = "scatter",
232	3x	x = ~X1, y = ~X2,
233	3x	mode = "lines",
234	3x	line = list(
235	3x	width = 1,
236	3x	color = colors[1],
237	3x	dash = "dash"
238		),
239		# "legendgroup" = labels[1],
240	3x	showlegend = show.legend,
241	3x	name = box.label
242		)
243		}
244
245	13x	if(this.max*1.2 > max(enaplot$axes$y$range)) {
246	1x	this.max = this.max * 1.2
247	1x	enaplot$axes$x$range = c(-this.max, this.max)
248	1x	enaplot$axes$y$range = c(-this.max, this.max)
249	1x	enaplot$plot = plotly::layout(
250	1x	enaplot$plot,
251	1x	xaxis = enaplot$axes$x,
252	1x	yaxis = enaplot$axes$y
253		);
254		}
255		#####
256		# END: Plot
257		###
258
259	13x	return(enaplot);
260		}
261

1		###
2		#' @title ENA Rotate by regression
3		#'
4		#' @description This function allows user to provide a regression formula for rotation on x and optionally on y.
5		#' If regression formula for y is not provide, svd is applied to the residual data deflated by x to get y coordinates.
6		#' The regression formula uses ENA dimensions are dependent variables.
7		#' The first predictor has to be two-group categorical, binary, or numerical.
8		#'
9		#' @param enaset An \code{\link{ENAset}}
10		#' @param params list of parameters, may include:
11		#' x_var: Regression formula for x direction, such as "lm(formula=V ~ Condition + GameHalf + Condition : GameHalf)",
12		#' where V always stands for the ENA points.
13		#' y_var: Regression formula, similar to x_var, for y direction (optional).
14		#'
15		#' @export
16		#' @return \code{\link{ENARotationSet}}
17		ena.rotate.by.hena.regression = function(enaset, params) {
18		# check arguments
19	!	if ( !is.list(params) \|\| is.null(params$x_var) ) {
20	!	stop("params must be provided as a list() and provide `x_var`")
21		}
22
23	!	x <- params$x_var;
24	!	y <- params$y_var;
25	!	points <- params$points;
26	!	fullNames <- params$fullNames;
27
28	!	if(is.null(fullNames)) {
29	!	fullNames = F;
30		}
31
32		#get points
33	!	if(!is.null(points)) {
34	!	p <- points
35		}
36	!	else if (is.null(enaset$points.normed.centered)) {
37	!	p <- as.matrix(enaset$model$points.for.projection);
38		}
39		else {
40	!	p <- as.matrix(enaset$points.normed.centered);
41		}
42
43		#regress to get v1 using x
44	!	V <- p;
45
46		# only works using attach()
47		# attach(enaset$meta.data,warn.conflicts = F)
48		# v1 = eval(parse(text = x))$coefficients[2,]
49
50		# v1 <- with(enaset$meta.data, {
51		# eval(parse(text = x))$coefficients[2,]
52		# });
53		# v1 <- with(enaset$model$points.for.projection, NULL, formula = x);
54	!	v1_res <- with.ena.matrix(enaset$model$points.for.projection, {
55	!	lm(formula(params$x_var));
56		});
57	!	v1 <- v1_res$coefficients[2,]
58
59		# make v1 a unit vector
60	!	norm_v1 <- sqrt(sum(v1 * v1));
61	!	if (norm_v1 != 0) {
62	!	v1 <- v1 / norm_v1;
63		}
64
65		# name v1 vector
66	!	if(is.na(all.vars(x)[2])) {
67	!	xName <- names(v1)[1];
68		}
69		else {
70	!	if(fullNames) {
71	!	warning("FullName param is likely wrong.")
72	!	xName <- parse(text = x)[[1]][["formula"]][[3]];
73		}
74		else {
75	!	xName <- all.vars(x)[2];
76		}
77		}
78
79		# Save v1
80	!	R <- matrix(c(v1), ncol = 1);
81	!	colnames(R) <- c(paste0(xName,"_reg"));
82
83		#deflate matrix by x dimension
84	!	A <- as.matrix(p)
85	!	defA <- as.matrix(A) - as.matrix(A) %% v1 %% t(v1)
86
87		#if y formula is given, regress by y formula
88	!	if (!is.null(y)) {
89
90		# regress to get v2 vector using formula y
91	!	V <- defA;
92
93		# Removed attach abvove
94		# v2 = eval(parse(text = y))$coefficients[2,]
95		# v2 <- with(enaset$meta.data, {
96		# eval(parse(text = y))$coefficients[2,]
97		# });
98		# v2 <- with(enaset$model$points.for.projection, NULL, formula = y, V = V);
99	!	v2_res <- with.ena.matrix(enaset$model$points.for.projection, {
100	!	lm(formula(params$y_var));
101		});
102	!	v2 <- v2_res$coefficients[2,]
103
104		#make v2 a unit vector
105
106	!	norm_v2 <- sqrt(sum(v2 * v2));
107	!	if (norm_v2 != 0) {
108	!	v2 <- v2 / norm_v2;
109		}
110
111		#name v2 vector
112	!	if(is.na(all.vars(y)[2])) {
113	!	yName <- names(v2)[1]
114		}
115		else {
116	!	if(fullNames) {
117	!	warning("FullName param is likely wrong.")
118	!	yName <- parse(text = y)[[1]][["formula"]][[3]];
119		}
120		else {
121	!	yName <- all.vars(y)[2]
122		}
123		}
124
125		# save both v1 and v2
126	!	R <- cbind(v1, v2);
127	!	colnames(R) <- c(paste0(xName,"_reg"), paste0(yName,"_reg"));
128
129		#deflat by v2
130	!	defA <- as.matrix(defA) - as.matrix(defA) %% v2 %% t(v2);
131		}
132
133		# get svd for deflated points
134	!	svd_result <- prcomp(defA, retx=FALSE, scale=FALSE, center=FALSE, tol=0);
135	!	svd_v <- svd_result$rotation;
136
137		# Merge rotation vectors
138	!	vcount <- ncol(R);
139	!	colNamesR <- colnames(R);
140	!	combined <- cbind(R, svd_v[, 1:(ncol(svd_v) - vcount)]);
141	!	colnames(combined) <- c(
142	!	colNamesR,
143	!	paste0("SVD", ((vcount + 1):ncol(combined)))
144		);
145
146		#create rotation set
147	!	rotation_set <- ENARotationSet$new(
148	!	node.positions = NULL,
149	!	rotation = combined,
150	!	codes = enaset$rotation$codes,
151	!	eigenvalues = NULL
152		)
153
154	!	return(rotation_set);
155		}

1		#' Correlation between distances in two ENA spaces
2		#'
3		#' @description
4		#' Calculates the Pearson correlation between the pairwise Euclidean distances
5		#' of points in two ENA spaces (\code{A} and \code{B}). For smaller datasets,
6		#' it computes the exact correlation. For larger datasets, it estimates the
7		#' correlation using a sampled subset of pairs.
8		#'
9		#' @param A A matrix or data frame representing the first ENA space (rows as points).
10		#' @param B A matrix or data frame representing the second ENA space (must have the same number of rows as A).
11		#' @param max_sample_size Numeric. The maximum number of pairwise distances to compute.
12		#' If the total possible pairs exceeds this value, sampling is used. Default is 100,000.
13		#'
14		#' @return A numeric value representing the Pearson correlation.
15		#' @importFrom stats dist cor sample.int
16		#' @export
17		ena_space_dist_corr <- function(A, B, max_sample_size = 100000) {
18	!	m <- nrow(A)
19
20	!	if (is.null(m) \|\| m == 0 \|\| nrow(B) != m) {
21	!	stop("The spaces must have the same non-zero number of rows.")
22		}
23
24		# Calculate total unique pairs m(m-1)/2
25	!	total_possible_pairs <- choose(m, 2)
26
27		# Use Exact if total pairs is less than limit
28	!	if (total_possible_pairs <= max_sample_size) {
29		# CASE 1: Small m - Exact calculation
30	!	d_A <- as.vector(dist(A))
31	!	d_B <- as.vector(dist(B))
32	!	return(cor(d_A, d_B, method = "pearson"))
33
34		} else {
35		# CASE 2: Large m - Simple Sample & Filter
36		# Sample indices with replacement
37	!	idx1 <- sample.int(m, max_sample_size, replace = TRUE)
38	!	idx2 <- sample.int(m, max_sample_size, replace = TRUE)
39
40		# Filter out identity pairs (per user instruction for large m)
41	!	keep <- idx1 != idx2
42	!	idx1 <- idx1[keep]
43	!	idx2 <- idx2[keep]
44
45		# Vectorized Euclidean Distance: sqrt(sum((x-y)^2))
46	!	dist_A <- sqrt(rowSums((A[idx1, , drop = FALSE] - A[idx2, , drop = FALSE])^2))
47	!	dist_B <- sqrt(rowSums((B[idx1, , drop = FALSE] - B[idx2, , drop = FALSE])^2))
48
49	!	return(cor(dist_A, dist_B, method = "pearson"))
50		}
51		}

1		#' Calculate the Peak Ground Diversity Window
2		#'
3		#' This function acts as a wrapper to identify the sliding window size that yields
4		#' the highest (peak) ground diversity/entropy for a given dataset.
5		#'
6		#' @param data A data.frame or data.table containing the conversation data.
7		#' @param codeNames A character vector of column names representing the binary codes.
8		#' @param conversation_cols A character vector of column names that define unique conversations.
9		#' @param max_window An integer specifying the maximum window size to evaluate. Default is 20.
10		#'
11		#' @return An integer representing the window size that maximizes ground entropy.
12		#' @export
13		#'
14		#' @examples
15		#' \dontrun{
16		#' peak_w <- ena.gd.window(data = my_data,
17		#' codeNames = c("CodeA", "CodeB"),
18		#' conversation_cols = "ConversationID",
19		#' max_window = 15)
20		#' }
21		ena.gd.window <- function(data, codeNames, conversation_cols, max_window = 20) {
22	2x	gd <- ena.ground.diversity(data, codeNames, conversation_cols, max_window)
23	2x	return(gd$peaks$peak_window)
24		}
25
26		#' Calculate Ground Diversity (Entropy) Curves and Peaks
27		#'
28		#' Evaluates the diversity of "ground types" (combinations of active codes)
29		#' across a series of sliding window sizes. It returns both the curves and the
30		#' peak window values.
31		#'
32		#' @param data A data.frame or data.table containing the conversation data.
33		#' @param codeNames A character vector of column names representing the binary codes.
34		#' @param conversation_cols A character vector of column names that define unique conversations.
35		#' @param max_window An integer specifying the maximum window size to evaluate. Default is 20.
36		#'
37		#' @return A list containing three elements:
38		#' \describe{
39		#' \item{curves}{A data.table containing aggregated entropy curves across windows.}
40		#' \item{peaks}{A data.table highlighting the window size(s) where entropy peaked.}
41		#' \item{conversation_curves}{A data.table showing calculated entropy per individual conversation per window.}
42		#' }
43		#' @export
44		#'
45		#' @examples
46		#' \dontrun{
47		#' gd_results <- ena.ground.diversity(data = my_data,
48		#' codeNames = c("CodeA", "CodeB"),
49		#' conversation_cols = "ConversationID")
50		#' }
51		ena.ground.diversity <- function(data, codeNames, conversation_cols, max_window = 20) {
52	4x	num_codes <- length(codeNames)
53
54	4x	counts <- ground.type.counts(
55	4x	data = data,
56	4x	codeNames = codeNames,
57	4x	conversation_cols = conversation_cols,
58	4x	max_window = max_window
59		)
60
61	3x	ground.entropy.curves(
62	3x	counts = counts,
63	3x	conversation_cols = conversation_cols,
64	3x	num_codes = num_codes,
65	3x	methods = "mean",
66	3x	normalize = TRUE
67		)
68		}
69
70		#' Calculate Entropy Curves from Ground Type Counts
71		#'
72		#' Internal helper function that takes ground type frequencies and computes Shannon entropy
73		#' across different aggregation methods (mean, weighted, or pooled) and window sizes.
74		#'
75		#' @param counts A data.table generated by \code{\link{ground.type.counts}}.
76		#' @param conversation_cols A character vector of column names defining unique conversations.
77		#' @param num_codes An integer representing the total number of unique codes (used for normalization base \eqn{2^C}).
78		#' @param group_cols A character vector of columns to group by (e.g., experimental conditions). Default is NULL.
79		#' @param methods A character vector specifying which aggregation methods to calculate. Options include "mean", "weighted", and "pooled". Default is all three.
80		#' @param normalize Logical; if TRUE (default), entropy is normalized to a \eqn{[0, 1]} scale.
81		#'
82		#' @return A list containing:
83		#' \describe{
84		#' \item{curves}{Aggregated entropy metrics across windows and methods.}
85		#' \item{peaks}{The optimal window size that maximized entropy for each method.}
86		#' \item{conversation_curves}{Entropy calculations mapped back to individual conversations.}
87		#' }
88		#' @keywords internal
89		ground.entropy.curves <- function(
90		counts,
91		conversation_cols,
92		num_codes,
93		group_cols = NULL,
94		methods = c("mean", "weighted", "pooled"),
95		normalize = TRUE
96		) {
97	3x	dt <- as.data.table(counts)
98
99	3x	if (is.null(group_cols)) {
100	3x	dt[, .group := "all"]
101	3x	group_cols <- ".group"
102		}
103
104	3x	log_base <- if (normalize) log(2^num_codes) else 1
105
106	3x	conv_entropy <- dt[
107		,
108		{
109	1170x	p <- count / sum(count)
110		.(
111	1170x	entropy = -sum(p * log(p)) / log_base,
112	1170x	n_unique = .N,
113	1170x	n_obs = unique(n_obs)
114		)
115		},
116	3x	by = c(conversation_cols, group_cols, "window")
117		]
118
119	3x	results <- list()
120
121		# --- Mean ---
122	3x	if ("mean" %in% methods) {
123	3x	tmp <- conv_entropy[
124		,
125		.(
126	3x	entropy = mean(entropy),
127	3x	n_unique = mean(n_unique),
128	3x	n_conversations = .N
129		),
130	3x	by = c(group_cols, "window")
131		]
132	3x	tmp[, method := "mean"]
133	3x	results[[length(results) + 1]] <- tmp
134		}
135
136		# --- Weighted ---
137	3x	if ("weighted" %in% methods) {
138	!	tmp <- conv_entropy[
139		,
140		.(
141	!	entropy = weighted.mean(entropy, n_obs),
142	!	n_unique = weighted.mean(n_unique, n_obs),
143	!	n_conversations = .N
144		),
145	!	by = c(group_cols, "window")
146		]
147	!	tmp[, method := "weighted"]
148	!	results[[length(results) + 1]] <- tmp
149		}
150
151		# --- Pooled ---
152	3x	if ("pooled" %in% methods) {
153	!	tmp <- dt[
154		,
155	!	.(count = sum(count)),
156	!	by = c(group_cols, "window", "ground_type")
157		][
158		,
159		{
160	!	p <- count / sum(count)
161		.(
162	!	entropy = -sum(p * log(p)) / log_base,
163	!	n_unique = .N
164		)
165		},
166	!	by = c(group_cols, "window")
167		]
168	!	tmp[, method := "pooled"]
169	!	results[[length(results) + 1]] <- tmp
170		}
171
172	3x	curves <- rbindlist(results, fill = TRUE)
173
174	3x	peaks <- curves[
175		,
176	3x	.SD[which.max(entropy)][1],
177	3x	by = c(group_cols, "method")
178		][
179		,
180		.(
181	3x	peak_window = window,
182	3x	peak_entropy = entropy,
183	3x	method = method
184		),
185	3x	by = group_cols
186		]
187
188	3x	list(
189	3x	curves = curves,
190	3x	peaks = peaks,
191	3x	conversation_curves = conv_entropy
192		)
193		}
194
195		#' Generate Binary-Encoded Ground Type Counts
196		#'
197		#' Compiles the frequencies of unique co-occurrence patterns ("ground types")
198		#' across multiple sliding window sizes, using fast column-wise rolling maximum operations.
199		#'
200		#' @param data A data.frame or data.table.
201		#' @param codeNames A character vector of column names representing the binary codes.
202		#' @param conversation_cols A character vector of column names that define unique conversations.
203		#' @param max_window An integer specifying the maximum window size to evaluate. Default is 20.
204		#'
205		#' @return A data.table containing the counts of ground types per conversation per window size.
206		#' @keywords internal
207		ground.type.counts <- function(
208		data,
209		codeNames,
210		conversation_cols,
211		max_window = 20
212		) {
213	5x	stopifnot(all(codeNames %in% names(data)))
214	5x	stopifnot(all(conversation_cols %in% names(data)))
215
216	5x	dt <- data.table::as.data.table(data)
217
218		# Ground-type encoding assumes each code is a binary presence/absence flag;
219		# non-binary values would corrupt the bit-weighted `ground_type` id.
220	5x	code_vals <- unique(as.vector(as.matrix(dt[, ..codeNames])))
221	5x	if (!all(code_vals %in% c(0, 1))) {
222	1x	stop("All codeNames columns must be binary (0/1); found non-binary values.")
223		}
224	4x	bit_weights <- 2^(seq_along(codeNames) - 1)
225
226	4x	out <- list()
227
228	4x	for (w in seq_len(max_window)) {
229	44x	temp_dt <- dt[, c(..conversation_cols, ..codeNames)]
230
231		# The rolling max of a binary code column is simply "is any code active in
232		# the window", i.e. frollsum(col, w) > 0. frollsum runs in C, whereas
233		# frollapply(col, w, max) invokes an R closure at every window position, so
234		# this is ~2 orders of magnitude faster for the same result.
235	44x	temp_dt[, (codeNames) := lapply(.SD, function(col) {
236	6436x	rolled <- as.numeric(data.table::frollsum(col, w, align = "right") > 0)
237	6436x	na_indices <- which(is.na(rolled))
238	6436x	if (length(na_indices) > 0) {
239	5830x	rolled[na_indices] <- cummax(col[na_indices])
240		}
241	6436x	return(rolled)
242	44x	}), by = conversation_cols, .SDcols = codeNames]
243
244	44x	g_matrix <- as.matrix(temp_dt[, ..codeNames])
245	44x	temp_dt[, ground_type := as.vector(g_matrix %*% bit_weights)]
246
247	44x	counts <- temp_dt[
248		,
249	44x	.(count = .N),
250	44x	by = c(conversation_cols, "ground_type")
251		]
252
253	44x	counts[, window := w]
254	44x	counts[, n_obs := sum(count), by = conversation_cols]
255
256	44x	out[[w]] <- counts
257		}
258
259	4x	data.table::rbindlist(out, use.names = TRUE)
260		}

1		accumulate.data <- function(enadata) {
2	100x	dfDT <- enadata$raw;
3
4	100x	units.used <- enadata$get("units.used")
5	100x	units.by <- enadata$get("units.by")
6	100x	trajectory.by <- enadata$get("trajectory.by")
7	100x	codes <- enadata$codes
8
9	100x	if (is.data.frame(codes)) {
10	1x	codes <- colnames(codes);
11		}
12
13	100x	conversations.by <- enadata$get("conversations.by")
14	100x	window <- enadata$get("window.size")
15		# binaryStanzas <- F
16	100x	units.exclude <- enadata$get("units.exclude")
17
18	100x	if(is.null(trajectory.by)) {
19	100x	trajectory.by = conversations.by
20		}
21
22		### should work to determine if binary is desired
23	100x	binary <- T;
24	100x	if (!identical(enadata$get("weight.by"), "binary")) {
25	11x	binary <- F
26		}
27		else {
28	89x	binary <- T
29		}
30
31		### We need data
32	100x	if (is.null(dfDT) \|\| nrow(dfDT) < 1) {
33	1x	stop("The provided data is NULL")
34		}
35
36		###
37		# We need a data.table, it's worth it.
38		###
39	99x	if(!data.table::is.data.table(dfDT)) {
40	1x	dfDT <- data.table::as.data.table(dfDT)
41		}
42
43		###
44		# Make a copy of the data for safe usage
45		###
46	99x	dfDT_codes <- data.table::copy(dfDT)
47
48		###
49		# Create a column representing the ENA_UNIT as defined
50		# by the the `units.by` parameter
51		###
52	99x	if(!"ENA_UNIT" %in% colnames(dfDT_codes)) {
53	1x	dfDT_codes$ENA_UNIT <- enadata$raw$ENA_UNIT <- merge_columns_c(
54	1x	dfDT_codes,
55	1x	cols = units.by, sep = "::"
56		)
57		}
58
59		##
60		# String vector of codesnames representing the names of the co-occurrences
61		##
62	99x	vL <- length(codes);
63	99x	adjacency.length <- ( (vL * (vL + 1)) / 2) - vL ;
64	99x	codedTriNames <- paste("adjacency.code",rep(1:adjacency.length), sep=".");
65
66	99x	initial_cols <- c(units.by, codes)
67	99x	just_codes <- c(codes)
68
69		##
70		# Accumulated windows appended to the end of each row
71		#
72		# FIXME: Don't append on the results to the initial data.table,
73		# keep a separate to lookup the results for the co-occurred
74		# values later on.
75		##
76	99x	if (window$back == 1 && window$forward == 0) {
77	49x	dfDT.co.occurrences <- dfDT_codes[,{
78	49x	ocs <- data.table::as.data.table(
79	49x	rows_to_co_occurrences(
80	49x	.SD[,.SD,.SDcols=codes, with=T],
81	49x	binary = binary
82		)
83		);
84
85		# Return value from data.table back to dfDT.co.occurrences
86	49x	data.table::data.table(.SD, ocs)
87		},
88	49x	.SDcols = c(codes, conversations.by, trajectory.by),
89	49x	with = T
90		]
91
92		### Generate the ENA_UNIT column
93	49x	dfDT.co.occurrences$ENA_UNIT <- dfDT_codes$ENA_UNIT
94
95		### Keep original columns used for units
96	49x	dfDT.co.occurrences[, (units.by) := dfDT_codes[, .SD, .SDcols = units.by]]
97		}
98	50x	else if (window$back == "Conversation") {
99		###
100		# First sum all lines by conversation and unit to get vectors of codes
101		# occurring in the whole conversation for each unit
102		###
103	2x	dfDT.conv.sum <- dfDT_codes[,
104	2x	lapply(.SD, sum), by = c(unique(conversations.by)),
105	2x	.SDcols = c(codes),
106	2x	with = T
107		]
108
109		###
110		# Convert each units converstation sums into adjacency vectors
111		###
112		# browser()
113	2x	dfDT.co.occurrences <- dfDT.conv.sum[,{
114	2x	ocs = data.table::as.data.table(rows_to_co_occurrences(.SD[,.SD,.SDcols=codes, with=T], binary = binary));
115	2x	data.table::data.table(.SD,ocs, ENA_UNIT=merge_columns_c(.SD, cols = units.by, sep="::"))
116		},
117	2x	.SDcols=unique(c(codes, conversations.by, trajectory.by, units.by)),
118	2x	with=T
119		];
120		}
121		else {
122		## parallell: https://stackoverflow.com/questions/14759905/data-table-and-parallel-computing
123		### Calculate occurrences of code within the provided window
124
125		# if(enadata$function.params$in.par == T) {
126		# grainSize = ifelse(!is.null(enadata$function.params$grainSize), enadata$function.params$grainSize, 10);
127		# dfDT.co.occurrences = dfDT_codes[,
128		# (codedTriNames) := try_one(
129		# .SD[,.SD, .SDcols=just_codes],
130		# window=window$back,
131		# binary = binary,
132		# grainSize = grainSize
133		# ),
134		# by=conversations.by,
135		# .SDcols=initial_cols,
136		# with=T
137		# ];
138		#
139		# } else {
140		# ,binaryStanzas = binaryStanzas
141	48x	dfDT.co.occurrences <- dfDT_codes[,
142	48x	(codedTriNames) := ref_window_df(
143	48x	.SD[, .SD, .SDcols = just_codes],
144	48x	windowSize = window$back,
145	48x	windowForward = window$forward,
146	48x	binary = binary
147		),
148	48x	by = conversations.by,
149	48x	.SDcols = initial_cols,
150	48x	with = T
151		];
152		# }
153		}
154		# browser()
155
156	99x	if( is.function(enadata$get("weight.by")) ) {
157	8x	cols <- colnames(dfDT.co.occurrences)[
158	8x	grep("adjacency.code", colnames(dfDT.co.occurrences))
159		]
160	8x	dfDT.co.occurrences <- dfDT.co.occurrences[,
161	8x	(cols) := lapply(
162	8x	.SD,
163	8x	enadata$get("weight.by")
164		),
165	8x	.SDcols = cols,
166	8x	by = 1:nrow(dfDT.co.occurrences)
167		]
168		}
169
170
171		###
172		# Convert the generic `V` names to corresponding `adjacency.vector` names
173		###
174	99x	vCols <- grep("V\\d+", colnames(dfDT.co.occurrences))
175	99x	if(length(vCols) == length(codedTriNames)) {
176	51x	colnames(dfDT.co.occurrences)[vCols] <- codedTriNames
177		}
178
179		##
180		# If units aren't supplied, use all available
181		##
182	99x	if (is.null(units.used)) {
183	99x	units.used <- dfDT_codes$ENA_UNIT
184		}
185
186
187		###
188		# Trajectory Checks
189		###
190
191		## Not a Trajectory
192	99x	if (enadata$model == "EndPoint") {
193		###
194		# Sum each unit found in dfDT.co.occurrences
195		###
196	89x	dfDT.summed.units <- dfDT.co.occurrences[ENA_UNIT %in% units.used,lapply(.SD,sum),by=units.by,.SDcols=codedTriNames]
197	89x	dfDT.summed.units$ENA_UNIT <- merge_columns_c(dfDT.summed.units, units.by, sep="::");
198
199	89x	enadata$unit.names <- dfDT.summed.units$ENA_UNIT;
200		}
201		## Trajectory
202		else {
203		## First sum all units within each Trajectory Group (trajectory.by)
204	10x	dfDT.summed.traj.by <- dfDT.co.occurrences[
205	10x	ENA_UNIT %in% units.used,
206		{
207	1096x	sums <- lapply(.SD, sum)
208	1096x	data.frame(ENA_ROW_IDX = .GRP, sums); # Return value
209		},
210	10x	by = c(units.by, trajectory.by),
211	10x	.SDcols = (codedTriNames)
212		];
213	10x	dfDT.summed.traj.by$ENA_UNIT <- merge_columns_c(
214	10x	dfDT.summed.traj.by, units.by, sep = "::"
215		)
216	10x	dfDT.summed.traj.by$TRAJ_UNIT <- merge_columns_c(
217	10x	dfDT.summed.traj.by, trajectory.by, sep = "::"
218		);
219
220	10x	enadata$trajectories$step <- dfDT.summed.traj.by$TRAJ_UNIT;
221
222		# Accumulated
223	10x	if (enadata$model == opts$TRAJ_TYPES[1]) {
224	5x	dfDT.summed.units <- dfDT.summed.traj.by[
225	5x	ENA_UNIT %in% unique(units.used), {
226	102x	cols <- colnames(.SD)
227	102x	ENA_UNIT <- paste(as.character(.BY), collapse = "::")
228	102x	TRAJ_UNIT <- .SD[, c(trajectory.by), with = F]
229	102x	inc_cols <- cols[! cols %in% c(trajectory.by, "ENA_ROW_IDX")]
230	102x	lag <- ref_window_lag(.SD[, .SD, .SDcols = inc_cols], .N)
231
232	102x	data.table::data.table(
233	102x	ENA_ROW_IDX,
234	102x	TRAJ_UNIT, lag, ENA_UNIT = ENA_UNIT
235		)
236		},
237	5x	by = c(units.by),
238	5x	.SDcols = c(codedTriNames, trajectory.by, "ENA_ROW_IDX")
239		]
240	5x	dfDT.summed.units$TRAJ_UNIT <- merge_columns_c(
241	5x	dfDT.summed.units, trajectory.by, sep = "::"
242		)
243		}
244		# Non-accumulated
245	5x	else if (enadata$model == opts$TRAJ_TYPES[2]) {
246	3x	dfDT.summed.units <- dfDT.summed.traj.by;
247		}
248		else {
249	2x	stop("Unsupported Model type.");
250		}
251
252	8x	dfDT.summed.units$ENA_UNIT <- merge_columns_c(
253	8x	dfDT.summed.units, units.by, sep = "::"
254		)
255		}
256		###
257		# END: Trajectory Checks
258		###
259
260		###
261		# Name the rows and columns accordingly
262		###
263	97x	colnames(dfDT.summed.units)[
264	97x	grep("V\\d+", colnames(dfDT.summed.units))
265	97x	] <- codedTriNames
266
267		###
268		# Set attributes
269		#
270		# TODO Most of this should be moved to a more prominent spot on ENAdata
271		###
272	97x	adjRows <- triIndices(length(codes)) + 1
273	97x	codedRow1 <- codes[adjRows[1, ]]
274	97x	codedRow2 <- codes[adjRows[2, ]]
275	97x	attr(dfDT.summed.units, "adjacency.matrix") <- rbind(codedRow1, codedRow2)
276	97x	attr(dfDT.summed.units, "adjacency.codes") <- codedTriNames
277	97x	attr(dfDT.summed.units, opts$UNIT_NAMES) <- dfDT.summed.units[,
278	97x	.SD, with = T, .SDcols = units.by]
279
280	97x	enadata$adjacency.matrix <- rbind(codedRow1, codedRow2)
281	97x	enadata$accumulated.adjacency.vectors <- dfDT.co.occurrences
282	97x	enadata$adjacency.vectors <- dfDT.summed.units
283		###
284		# END: Set attributes
285		###
286
287	97x	return(enadata);
288		}

1		####
2		#' ENAdata R6class
3		#'
4		#' @docType class
5		#' @importFrom R6 R6Class
6		#' @import data.table
7		#' @export
8		#'
9		#' @field raw A data frame constructed from the unit, convo, code, and metadata parameters of ena.accumulate.data
10		#' @field adjacency.vectors A data frame of adjacency (co-occurrence) vectors by row
11		#' @field accumulated.adjacency.vectors A data frame of adjacency (co-occurrence) vectors accumulated per unit
12		#' @field model The type of ENA model: EndPoint, Accumulated Trajectory, or Separate Trajectory
13		#' @field units A data frame of columns that were combined to make the unique units. Includes column for trajectory selections. (unique)
14		#' @field unit.names A vector of unique unit values
15		#' @field metadata A data frame of unique metadata for each unit
16		#' @field trajectories A list: units - data frame, for a given row tells which trajectory it's a part; step - data frame, where along the trajectory a row sits
17		#'
18		#' @field adjacency.matrix TBD
19		#' @field adjacency.vectors.raw TBD
20		#' @field codes A vector of code names
21		#' @field function.call The string representation of function called and parameters provided
22		#' @field function.params A list of all parameters sent to function call
23		####
24		ENAdata <- R6::R6Class("ENAdata", public = list(
25
26		#' Construct ENAdata
27		#'
28		#' @param file TBD
29		#' @param units TBD
30		#' @param units.used TBD
31		#' @param units.by TBD
32		#' @param conversations.by TBD
33		#' @param codes TBD
34		#' @param model TBD
35		#' @param weight.by TBD
36		#' @param window.size.back TBD
37		#' @param window.size.forward TBD
38		#' @param mask TBD
39		#' @param include.meta logical, if TRUE (default) unit metadata is attached to the resulting ENAdata object and accessible via the set; set to FALSE to omit metadata from the model output
40		#' @param ... TBD
41		#'
42		#' @return
43		initialize = function(
44		file,
45		units = NULL,
46		units.used = NULL,
47		units.by = NULL,
48		conversations.by = NULL,
49		codes = NULL,
50		model = NULL,
51		weight.by = "binary",
52		window.size.back = 1,
53		window.size.forward = 0,
54		mask = NULL,
55		include.meta = T,
56		...
57		) {
58	94x	args <- list(...);
59	94x	self$function.call <- sys.call(-1);
60	94x	self$function.params <- list();
61
62	94x	private$file <- file;
63	94x	self$units <- units;
64	94x	private$units.used <- units.used;
65	94x	private$units.by <- units.by
66	94x	private$conversations.by <- conversations.by;
67	94x	self$codes <- codes;
68
69	94x	if (is.data.frame(self$codes)) {
70	47x	self$codes <- colnames(self$codes);
71		}
72
73	94x	private$weight.by <- weight.by;
74	94x	private$window.size <- list(
75	94x	"back" = window.size.back,
76	94x	"forward" = window.size.forward
77		);
78
79	94x	for (p in c("units", "units.used", "units.by",
80	94x	"conversations.by", "codes", "model", "weight.by",
81	94x	"window.size.back", "window.size.forward", "mask",
82	94x	"in.par", "grainSize", "include.meta")
83		) {
84	1222x	if (exists(x = p)) {
85	1034x	self$function.params[[p]] <- get(p)
86		}
87	188x	else if (!is.null(args[[p]])) {
88	1x	self$function.params[[p]] <- args[[p]]
89		}
90		}
91
92	94x	self$model <- model
93
94	94x	private$mask <- mask
95
96	94x	return(self)
97		},
98
99		## Public Properties ----
100		model = NULL,
101		raw = NULL,
102		adjacency.vectors = NULL,
103		adjacency.matrix = NULL,
104		accumulated.adjacency.vectors = NULL,
105		adjacency.vectors.raw = NULL,
106		units = NULL,
107		unit.names = NULL,
108		metadata = NULL,
109		trajectories = list(
110		units = NULL,
111		step = NULL
112		),
113		codes = NULL,
114		function.call = NULL,
115		function.params = NULL,
116
117		## Public Functions ----
118
119		#' Process accumulation
120		#'
121		#' @return ENAdata
122		process = function() {
123	94x	private$loadFile();
124		},
125
126		#' Get property from object
127		#'
128		#' @param x character key to retrieve from object
129		#' @return value from object at x
130		get = function(x = "data") {
131	1087x	return(private[[x]])
132		},
133
134		#' Add metadata
135		#'
136		#' @param merge logical (default: FALSE)
137		#'
138		#' @return data.frame
139		add.metadata = function(merge = F) {
140	92x	meta_avail <- colnames(self$raw)[
141	92x	-which(colnames(self$raw) %in%
142	92x	c(self$codes, private$units.by, private$conversations.by))]
143		# c(self$codes, private$units.by))] # private$conversations.by))]
144
145	92x	meta_avail <- meta_avail[which(meta_avail != "ENA_UNIT")]
146	92x	meta_cols_to_use <- meta_avail[apply(self$raw[, lapply(.SD, uniqueN),
147	92x	by = c(private$units.by),
148	92x	.SDcols = meta_avail
149	92x	][, c(meta_avail), with = F]
150	92x	, 2, function(x) all(x == 1))
151		]
152	92x	raw.meta <- self$raw[!duplicated(ENA_UNIT)][
153	92x	ENA_UNIT %in% unique(
154	92x	self$accumulated.adjacency.vectors$ENA_UNIT
155		),
156	92x	c("ENA_UNIT", private$units.by, meta_cols_to_use),
157	92x	with = F
158		]
159
160	92x	df_to_return <- raw.meta[ENA_UNIT %in% self$unit.names,];
161
162	92x	return(df_to_return)
163		}
164
165		),
166
167		### Private ----
168		private = list(
169
170		## Private Properties ----
171		file = NULL,
172		window.size = NULL,
173		units.used = NULL,
174		units.by = NULL,
175		conversations.by = NULL,
176		weight.by = NULL,
177		trajectory.by = NULL,
178		mask = NULL,
179
180		## Private Functions ----
181		loadFile = function() {
182	94x	if(any(class(private$file) == "data.table")) {
183	30x	df_DT <- private$file
184		} else {
185	64x	if(any(class(private$file) == "data.frame")) {
186	64x	df <- private$file
187		} else {
188	!	df <- read.csv(private$file)
189		}
190	64x	df_DT <- data.table::as.data.table(df)
191		}
192
193	94x	self$raw <- data.table::copy(df_DT)
194	94x	self$raw$ENA_UNIT <- merge_columns_c(self$raw, private$units.by, "::")
195
196	94x	self <- accumulate.data(self)
197	93x	self$units <- self$adjacency.vectors[, private$units.by, with = F]
198
199	93x	if (!self$model %in% c("AccumulatedTrajectory", "SeparateTrajectory")) {
200	85x	self$unit.names <- self$adjacency.vectors$ENA_UNIT
201		}
202		else {
203	8x	self$trajectories$units <- self$units
204	8x	conversation <- self$adjacency.vectors[, private$conversations.by, with = F];
205
206	8x	self$trajectories$step <- conversation
207	8x	self$units <- cbind(self$units, conversation)
208	8x	self$unit.names <- paste(
209	8x	self$adjacency.vectors$ENA_UNIT,
210	8x	self$adjacency.vectors$TRAJ_UNIT,
211	8x	sep = "::"
212		)
213		}
214
215	93x	self$adjacency.vectors.raw <- self$adjacency.vectors
216
217	93x	adjCols <- colnames(self$adjacency.vectors)[
218	93x	grep("adjacency.code", colnames(self$adjacency.vectors))
219		];
220
221	93x	if (is.null(private$mask)) {
222	92x	private$mask <- matrix(1,
223	92x	nrow = length(self$codes),
224	92x	ncol = length(self$codes),
225	92x	dimnames = list(self$codes, self$codes))
226		}
227
228	93x	self$adjacency.vectors[, c(adjCols)] <-
229	93x	self$adjacency.vectors[, c(adjCols), with = F] *
230	93x	rep(
231	93x	private$mask[upper.tri(private$mask)],
232	93x	rep(nrow(self$adjacency.vectors), length(adjCols))
233		)
234
235		# if( is.function(private$weight.by) ) {
236		# cols <- colnames(self$adjacency.vectors)[
237		# grep("adjacency.code", colnames(self$adjacency.vectors))
238		# ]
239		# self$adjacency.vectors <- self$adjacency.vectors[,
240		# lapply(
241		# .SD,
242		# private$weight.by
243		# ),
244		# .SDcols = cols,
245		# by = 1:nrow(self$adjacency.vectors)
246		# ]
247		# }
248
249	93x	if( self$function.params$include.meta == T) {
250	92x	self$metadata <- self$add.metadata(merge = F);
251		} else {
252	1x	self$metadata <- data.frame();
253		}
254
255	93x	self$adjacency.vectors <- self$adjacency.vectors[,
256	93x	grep("adjacency.code",
257	93x	colnames(self$adjacency.vectors)),
258	93x	with = F
259		]
260
261	93x	return(self);
262		}
263		)
264		)

1		#' Apply metadata and code transformations to a data.table
2		#'
3		#' This function applies metadata and code transformations to a data.table if provided.
4		#' It checks if the metadata and codes are supplied as vectors of column names.
5		#'
6		#' @param x A data.table. The data.table to be transformed.
7		#' @param metadata_cols A vector of column names or NULL. A vector specifying the columns for metadata transformations.
8		#' @param codes_cols A vector of column names or NULL. A vector specifying the columns for code transformations.
9		#' @param horizon_cols A vector of column names or NULL. A vector specifying the columns for horizon transformations.
10		#' @param units_cols A vector of column names or NULL. A vector specifying the columns for unit transformations.
11		#'
12		#' @return The modified data.table after applying the metadata and code transformations.
13		#' @examples
14		#' library(data.table)
15		#' dt <- data.table(a = 1:5, b = 6:10)
16		#' dt <- define(dt, metadata = c("a"), codes = c("b"))
17		#' @export
18		define <- function(
19		x,
20		metadata_cols = find_meta_cols(x),
21		codes_cols = find_binary_cols(x),
22		horizon_cols = NULL,
23		units_cols = NULL
24		) {
25	!	x <- as.qe.data(x);
26
27	!	do_call <- function(y, wh) {
28	!	args <- list(x = x);
29	!	for(u in y) args[[length(args) + 1]] <- u
30	!	x <<- do.call(wh, args);
31
32	!	return(x);
33		}
34
35	!	if(!is.null(metadata_cols)) {
36		if(
37	!	(is.numeric(metadata_cols) \|\| is.character(metadata_cols)) &&
38	!	length(metadata_cols) > 0
39		) {
40	!	x <- do_call(metadata_cols, metadata);
41		}
42		else {
43	!	warning(WARNINGS$null_metadata);
44		}
45		}
46
47	!	if(!is.null(codes_cols)) {
48		if(
49	!	(is.numeric(codes_cols) \|\| is.character(codes_cols)) &&
50	!	length(codes_cols) > 0
51		) {
52	!	x <- do_call(codes_cols, codes);
53		}
54		else {
55	!	warning(WARNINGS$null_codes);
56		}
57		}
58
59	!	if(!is.null(units_cols)) {
60		if(
61	!	(is.numeric(units_cols) \|\| is.character(units_cols)) &&
62	!	length(units_cols) > 0
63		) {
64	!	x <- do_call(units_cols, units);
65		}
66		else {
67	!	warning(WARNINGS$null_units);
68		}
69		}
70
71	!	if(!is.null(horizon_cols)) {
72		if(
73	!	(is.numeric(horizon_cols) \|\| is.character(horizon_cols)) &&
74	!	length(horizon_cols) > 0
75		) {
76	!	x <- do_call(horizon_cols, horizon);
77		}
78		else {
79	!	warning(WARNINGS$null_horizon);
80		}
81		}
82
83	!	invisible(x);
84		}
85
86		#' Reclassify specified columns as codes or list codes columns in a data.table
87		#'
88		#' This function reclassifies specified columns of a data.table to the 'qe.code' format if column names are provided.
89		#' If no column names are provided, it returns the names of columns that are already classified as 'qe.code'.
90		#'
91		#' @param x A data.table. The data.table containing the columns to be reclassified or checked.
92		#' @param ... Additional arguments specifying the names of the columns to be reclassified.
93		#'
94		#' @return The modified data.table with specified columns reclassified as 'qe.code', or a character vector of column names already classified as 'qe.unit'.
95		#' @examples
96		#' library(data.table)
97		#' dt <- data.table(a = 1:5, b = 6:10)
98		#' # Reclassify columns 'a' and 'b' as 'qe.code'
99		#' dt <- codes(dt, "a", "b")
100		#' # List columns classified as 'qe.code'
101		#' code_columns <- codes(dt)
102		#' @export
103		codes <- function(x, ...) {
104	!	x <- as.qe.data(x);
105
106	!	if(...length() > 0) {
107	!	dot_args <- list(...);
108
109		# x <- reclassify(x, dot_args, as.qe.code);
110	!	dot_args$x <- x;
111	!	dot_args$v <- as.qe.code;
112	!	x <- do.call(reclassify, dot_args);
113
114	!	return(x);
115		}
116		else {
117	!	return(colnames(x)[sapply(x, is.qe.code)]);
118		}
119		}
120
121		#' Reclassify specified columns as metadata or list metadata columns in a data.table
122		#'
123		#' This function reclassifies specified columns of a data.table to the 'qe.metadata' format if column names are provided.
124		#' If no column names are provided, it returns the names of columns that are already classified as 'qe.metadata'.
125		#'
126		#' @param x A data.table. The data.table containing the columns to be reclassified or checked.
127		#' @param ... Additional arguments specifying the names of the columns to be reclassified.
128		#'
129		#' @return The modified data.table with specified columns reclassified as 'qe.metadata', or a character vector of column names already classified as 'qe.metadata'.
130		#' @examples
131		#' library(data.table)
132		#' dt <- data.table(a = 1:5, b = 6:10)
133		#' # Reclassify columns 'a' and 'b' as 'qe.metadata'
134		#' dt <- metadata(dt, "a", "b")
135		#' # List columns classified as 'qe.metadata'
136		#' metadata_columns <- metadata(dt)
137		#' @export
138		metadata <- function(x, ...) {
139	!	x <- as.qe.data(x);
140
141	!	if(...length() > 0) {
142	!	dot_args <- list(...);
143
144	!	dot_args$x <- x;
145	!	dot_args$v <- as.qe.metadata;
146	!	x <- do.call(reclassify, dot_args);
147
148	!	return(x);
149		}
150		else {
151	!	return(colnames(x)[sapply(x, is.qe.metadata)]);
152		}
153		}
154
155		#' Reclassify specified columns as units or list unit columns in a data.table
156		#'
157		#' This function reclassifies specified columns of a data.table to the 'qe.unit' format if column names are provided.
158		#' If no column names are provided, it returns the names of columns that are already classified as 'qe.unit'.
159		#'
160		#' @param x A data.table. The data.table containing the columns to be reclassified or checked.
161		#' @param ... Additional arguments specifying the names of the columns to be reclassified.
162		#'
163		#' @return The modified data.table with specified columns reclassified as 'qe.unit', or a character vector of column names already classified as 'qe.unit'.
164		#' @examples
165		#' library(data.table)
166		#' dt <- data.table(a = 1:5, b = 6:10)
167		#' # Reclassify columns 'a' and 'b' as 'qe.unit'
168		#' dt <- units(dt, "a", "b")
169		#' # List columns classified as 'qe.unit'
170		#' unit_columns <- units(dt)
171		#' @export
172		units <- function(x, ...) {
173	!	x <- as.qe.data(x);
174
175	!	if(...length() > 0) {
176	!	dot_args <- list(...);
177
178	!	dot_args$x <- x;
179	!	dot_args$v <- as.qe.unit;
180	!	x <- do.call(reclassify, dot_args);
181	!	return(x);
182		}
183		else {
184	!	return(colnames(x)[sapply(x, is.qe.unit)]);
185		}
186		}
187
188		#' Reclassify specified columns as horizon or list horizon columns in a data.table
189		#'
190		#' This function reclassifies specified columns of a data.table to the 'qe.horizon' format if column names are provided.
191		#' If no column names are provided, it returns the names of columns that are already classified as 'qe.horizon'.
192		#'
193		#' @param x A data.table. The data.table containing the columns to be reclassified or checked.
194		#' @param ... Additional arguments specifying the names of the columns to be reclassified.
195		#'
196		#' @return The modified data.table with specified columns reclassified as 'qe.horizon', or a character vector of column names already classified as 'qe.horizon'.
197		#' @examples
198		#' library(data.table)
199		#' dt <- data.table(a = 1:5, b = 6:10)
200		#' # Reclassify columns 'a' and 'b' as 'qe.horizon'
201		#' dt <- horizon(dt, "a", "b")
202		#' # List columns classified as 'qe.horizon'
203		#' horizon_columns <- horizon(dt)
204		#' @export
205		horizon <- function(x, ...) {
206	!	x <- as.qe.data(x);
207
208	!	if(...length() > 0) {
209	!	dot_args <- list(...);
210
211	!	dot_args$x <- x;
212	!	dot_args$v <- as.qe.horizon;
213	!	x <- do.call(reclassify, dot_args);
214
215	!	return(x);
216		}
217		else {
218	!	return(colnames(x)[sapply(x, is.qe.horizon)]);
219		}
220		}
221
222		#' @export
223		'@.horizon' <- horizon
224
225		#' Reclassify specified columns in a data.table
226		#'
227		#' This function reclassifies specified columns of a data.table using a provided function.
228		#'
229		#' @param x A data.table. The data.table containing the columns to be reclassified.
230		#' @param ... Additional arguments specifying the names of the columns to be reclassified.
231		#' @param v A function. The function to apply to each specified column for reclassification.
232		#'
233		#' @return The modified data.table with specified columns reclassified.
234		#' @examples
235		#' library(data.table)
236		#' dt <- data.table(a = 1:5, b = 6:10)
237		#' dt <- reclassify(dt, as.qe.code, "a", "b")
238		#' @export
239		reclassify <- function(x, v, ...) {
240	!	wh <- list(...);
241	!	for (i in wh) {
242	!	data.table::set(x, j = i, value = v(x[[i]]))
243		}
244
245	!	return(x);
246		}

1		##
2		#' @title Find conversations by unit
3		#'
4		#' @description Find rows of conversations by unit
5		#'
6		#' @details [TBD]
7		#'
8		#' @param set [TBD]
9		#' @param units [TBD]
10		#' @param units.by [TBD]
11		#' @param codes [TBD]
12		#' @param conversation.by [TBD]
13		#' @param window [TBD]
14		#' @param conversation.exclude [TBD]
15		#'
16		#' @examples
17		#' data(RS.data)
18		#'
19		#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
20		#' 'Client.and.Consultant.Requests','Design.Reasoning',
21		#' 'Collaboration');
22		#'
23		#' accum = ena.accumulate.data(
24		#' units = RS.data[,c("Condition","UserName")],
25		#' conversation = RS.data[,c("Condition","GroupName")],
26		#' metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre",
27		#' "CONFIDENCE.Post","C.Change")],
28		#' codes = RS.data[,codeNames],
29		#' model = "EndPoint",
30		#' window.size.back = 4
31		#' );
32		#' set = ena.make.set(
33		#' enadata = accum,
34		#' rotation.by = ena.rotate.by.mean,
35		#' rotation.params = list(accum$meta.data$Condition=="FirstGame",
36		#' accum$meta.data$Condition=="SecondGame")
37		#' );
38		#' ena.conversations(set = RS.data,
39		#' units = c("FirstGame.steven z"), units.by=c("Condition","UserName"),
40		#' conversation.by = c("Condition","GroupName"),
41		#' codes=codeNames, window = 4
42		#' )
43		#'
44		#' @export
45		#' @return list containing row indices representing conversations
46		##
47		ena.conversations = function(set, units, units.by=NULL, codes=NULL, conversation.by = NULL, window = 4, conversation.exclude = c()) {
48		# rawData = data.table::copy(set$enadata$raw);
49	4x	if(is.null(units.by)) {
50	2x	if(!is(set, "ena.set")) {
51	1x	stop("If units.by is NULL, set must be an ena.set object")
52		}
53	1x	units.by = set$`_function.params`$units.by;
54		}
55		# conversation.by = set$enadata$function.params$conversations.by;
56		# window = set$enadata$function.params$window.size.back;
57		# rawAcc = data.table::copy(set$enadata$accumulated.adjacency.vectors);
58	3x	if(is(set, "ena.set")) {
59	2x	rawAcc2 = set$model$raw.input
60		} else {
61	1x	rawAcc2 = data.table::data.table(set) #$enadata$raw);
62		}
63
64		# rawAcc$KEYCOL = merge_columns_c(rawAcc, conversation.by)
65	3x	rawAcc2$KEYCOL = merge_columns_c(rawAcc2, conversation.by, sep = "::")
66
67		# conversationsTable = rawAcc[, paste(.I, collapse = ","), by = c(conversation.by)]
68	3x	conversationsTable2 = rawAcc2[, paste(.I, collapse = ","), by = c(conversation.by)]
69
70		# rows = sapply(conversationsTable$V1, function(x) as.numeric(unlist(strsplit(x, split=","))),USE.NAMES = T)
71	3x	rows2 = lapply(conversationsTable2$V1, function(x) as.numeric(unlist(strsplit(x, split=","))))
72		# browser()
73		# names(rows) = merge_columns_c(conversationsTable,conversation.by); #unique(rawAcc[,KEYCOL])
74	3x	names(rows2) = merge_columns_c(conversationsTable2,conversation.by, sep = "::"); #unique(rawAcc[,KEYCOL])
75
76		# unitRows = merge_columns_c(rawAcc[,c(units.by),with=F], units.by)
77	3x	unitRows2 = merge_columns_c(rawAcc2[,c(units.by),with=F], units.by, sep = "::")
78
79		# adjCol = set$enadata$adjacency.matrix[1,] %in% codes[1] & set$enadata$adjacency.matrix[2,] %in% codes[2]
80		# adjColName = paste("adjacency.code.", which(adjCol), sep = "")
81		# codedUnitRows = which(unitRows %in% units & rawAcc[[adjColName]] == 1)
82
83	3x	codedRows = rawAcc2[, rowSums(.SD), .SDcols = codes] > 0
84	3x	codedUnitRows2 = which(unitRows2 %in% units & codedRows)
85	3x	codedUnitRows2 = codedUnitRows2[!(codedUnitRows2 %in% as.vector(unlist(rows2[conversation.exclude])))]
86		# codedUnitRowConvs = rawAcc[codedUnitRows,KEYCOL];
87	3x	codedUnitRowConvs2 = rawAcc2[codedUnitRows2,KEYCOL];
88
89	3x	codedUnitRowConvsAll = NULL;
90	3x	codedUnitRowConvsAll2 = NULL;
91	3x	unitRowsNotCooccurred = c()
92	3x	if(length(codedUnitRows2) > 0) {
93	3x	codedUnitRowConvsAll = unique(unlist(sapply(X = 1:length(codedUnitRows2), simplify = F, FUN = function(x) {
94	279x	thisConvRows = rows2[[codedUnitRowConvs2[x]]]
95	279x	thisRowInConv = which(thisConvRows == codedUnitRows2[x])
96	279x	winUse = ifelse(is.infinite(window), thisRowInConv, window)
97	279x	thisRowAndWindow = rep(thisRowInConv,winUse) - (winUse-1):0
98	279x	coOccursFound = all(rawAcc2[thisConvRows[thisRowAndWindow[thisRowAndWindow > 0]], lapply(.SD, sum), .SDcols=codes] > 0)
99	279x	if(coOccursFound) {
100	132x	thisConvRows[thisRowAndWindow[thisRowAndWindow > 0]]
101		} else {
102	147x	unitRowsNotCooccurred <<- c(unitRowsNotCooccurred, thisConvRows[thisRowInConv])
103		# coOccursFound
104	147x	NULL
105		}
106		})))
107		}
108	3x	return(list(
109	3x	conversations = as.list(rows2),
110	3x	unitConvs = unique(rawAcc2[codedUnitRows2,KEYCOL]),
111	3x	allRows = codedUnitRowConvsAll,
112	3x	unitRows = codedUnitRows2,
113	3x	toRemove = unitRowsNotCooccurred
114		));
115		}

1		# Pure-R replacements for functions that were previously compiled C++ exports
2		# (rENA/src/ena.cpp). All math now lives in libqe; these wrappers preserve
3		# existing R-level function names so no call sites in R code need to change.
4		#
5		# Public API functions (exported) are marked @export.
6		# Internal functions (not exported) have no @export tag.
7
8		# ── public API ────────────────────────────────────────────────────────────────
9
10		#' Merge data frame columns
11		#'
12		#' Paste together multiple columns of a data frame or data.table with a
13		#' separator, used internally to construct unit-ID strings.
14		#'
15		#' @param df A data.frame or data.table
16		#' @param cols Character vector of column names to paste together
17		#' @param sep Separator string (default "::")
18		#' @return A character vector of length \code{nrow(df)}
19		#' @export
20		merge_columns_c <- function(df, cols, sep = "::") {
21	251x	do.call(paste, c(lapply(cols, function(col) df[[col]]), list(sep = sep)))
22		}
23
24		#' Row-wise L2 (Sphere) Normalization
25		#'
26		#' Normalizes each row of a numeric data frame or matrix to unit L2 norm.
27		#'
28		#' @param dfM A data.frame or matrix
29		#' @return A numeric matrix with each row normalized to unit L2 length
30		#' @export
31		fun_sphere_norm <- function(dfM) {
32	75x	libqe::normalize_networks(as.matrix(dfM))
33		}
34
35		#' Row-wise Max-Norm Scaling
36		#'
37		#' Scales all rows of a numeric data frame by dividing by the largest row
38		#' L2 norm.
39		#'
40		#' @param dfM A data.frame or matrix
41		#' @return A numeric matrix scaled by the largest row L2 norm
42		#' @export
43		fun_skip_sphere_norm <- function(dfM) {
44	1x	libqe::scale_networks(as.matrix(dfM))
45		}
46
47		#' Upper Triangle from Vector (numeric)
48		#'
49		#' Compute pairwise products v[j] * v[i] for all j < i.
50		#'
51		#' @param v Numeric vector or single-row matrix
52		#' @return Numeric row vector of pairwise products
53		#' @export
54		vector_to_ut <- function(v) {
55	!	libqe::code_connections(as.matrix(v))
56		}
57
58		#' Directed ENA node positions
59		#'
60		#' Least-squares node positions for directed ENA.
61		#'
62		#' @param line_weights Numeric matrix (units x connections)
63		#' @param points Numeric matrix of rotated points (units x dims)
64		#' @param numDims Number of dimensions
65		#' @return List with nodes, centroids, weights, points
66		#' @export
67		directed_node_positions <- function(line_weights, points, numDims) {
68	2x	libqe::directed_node_positions(line_weights, points, numDims)
69		}
70
71		#' Directed node positions with ground+response combined
72		#'
73		#' Directed node positions with paired ground+response rows combined.
74		#'
75		#' @param line_weights Numeric matrix (units x connections)
76		#' @param points Numeric matrix of rotated points (units x dims)
77		#' @param numDims Number of dimensions
78		#' @return List with nodes, centroids, weights, points
79		#' @export
80		directed_node_positions_with_ground_response_added <- function(line_weights,
81		points,
82		numDims) {
83	!	libqe::directed_node_positions_combine_pairs(line_weights, points, numDims)
84		}
85
86		#' Calculate ENA correlations
87		#'
88		#' Pearson correlation with confidence interval between ENA points and
89		#' centroids.
90		#'
91		#' @param points Numeric matrix (units x dims)
92		#' @param centroids Numeric matrix (units x dims)
93		#' @param conf_level Confidence level (default 0.95)
94		#' @return Numeric matrix with columns: r, lower CI, upper CI
95		#' @export
96		ena_correlation <- function(points, centroids, conf_level = 0.95) {
97	!	libqe::ena_correlation(points, centroids, conf_level)
98		}
99
100		#' Confidence intervals around group mean positions
101		#'
102		#' Per-dimension t-based confidence intervals around the column means of a
103		#' numeric matrix of ENA points.
104		#'
105		#' @param points Numeric matrix (units x dims)
106		#' @param conf_level Confidence level (default 0.95)
107		#' @return Numeric matrix (dims x 3): mean, lower CI, upper CI
108		#' @export
109		ena_mean_ci <- function(points, conf_level = 0.95) {
110	7x	libqe::mean_ci(as.matrix(points), conf_level)
111		}
112
113		#' Outlier (Tukey-fence) intervals for group positions
114		#'
115		#' Per-dimension Tukey-fence intervals: Q1 - kIQR to Q3 + kIQR.
116		#'
117		#' @param points Numeric matrix (units x dims)
118		#' @param iqr_factor IQR multiplier (default 1.5)
119		#' @return Numeric matrix (dims x 2): lower fence, upper fence
120		#' @export
121		ena_outlier_ci <- function(points, iqr_factor = 1.5) {
122	6x	libqe::outlier_ci(as.matrix(points), iqr_factor)
123		}
124
125		#' Two-group comparison statistics for ENA points
126		#'
127		#' Per-dimension parametric (Welch t-test, Cohen's d) and non-parametric
128		#' (Wilcoxon rank-sum, rank-biserial r) statistics comparing two groups.
129		#'
130		#' @param g1 Numeric matrix of group 1 points (units x dims)
131		#' @param g2 Numeric matrix of group 2 points (units x dims)
132		#' @return List with: n1, n2, t, df, pvalue_t, cohens_d, means, sds,
133		#' U, pvalue_u, effect_r, medians — each a vector/matrix of length dims
134		#' @export
135		ena_group_stats <- function(g1, g2) {
136	8x	libqe::group_stats(as.matrix(g1), as.matrix(g2))
137		}
138
139		# ── internal (not exported) ───────────────────────────────────────────────────
140
141		# Per-row upper-triangle co-occurrence.
142		# @param df A data.frame or matrix of code columns
143		# @param binary If TRUE, binarise non-zero products
144		rows_to_co_occurrences <- function(df, binary = TRUE) {
145	51x	libqe::row_connections(as.matrix(df), binary)
146		}
147
148		# Stanza-window co-occurrence accumulation.
149		# @param df A data.frame or matrix of code columns
150		# @param windowSize Rows to look back (default 1; Inf = whole conversation)
151		# @param windowForward Rows to look forward (default 0)
152		# @param binary Binarise co-occurrence counts (default TRUE)
153		ref_window_df <- function(df, windowSize = 1, windowForward = 0,
154		binary = TRUE) {
155	1360x	INT_MAX <- .Machine$integer.max
156	1360x	wb <- if (is.infinite(windowSize) \|\| windowSize >= INT_MAX) INT_MAX
157	1360x	else as.integer(windowSize)
158	1360x	wf <- if (is.infinite(windowForward) \|\| windowForward >= INT_MAX) INT_MAX
159	1360x	else as.integer(windowForward)
160	1360x	data.table::as.data.table(libqe::accumulate_stanza(as.matrix(df), wb, wf, binary))
161		}
162
163		# Rolling backward window sum of code columns.
164		# @param df A data.frame or matrix of code columns
165		# @param windowSize Number of rows to look back (default 0, treated as 1)
166		# @param binary Unused; kept for API compatibility
167		ref_window_lag <- function(df, windowSize = 0, binary = TRUE) {
168	102x	libqe::rolling_window_sum(as.matrix(df), windowSize)
169		}
170
171		# Upper-triangle index pairs (0-based, +1 before use as R indices).
172		# @param len Side length of square code matrix
173		# @param row -1 = both rows, 0 = row indices, 1 = col indices
174		triIndices <- function(len, row = -1L) {
175	121x	libqe::connection_indices(len, row)
176		}
177
178		# Least-squares node positions (undirected ENA).
179		# @param adjMats Numeric matrix of line weights (units x connections)
180		# @param t Numeric matrix of rotated points (units x dims)
181		# @param numDims Number of dimensions
182		lws_lsq_positions <- function(adjMats, t, numDims) {
183	55x	libqe::node_positions(adjMats, t, numDims)
184		}
185
186		# String upper-triangle pairs: "A" "B" "C" -> "A & B" "A & C" "B & C".
187		# @param v Character vector of code names
188		svector_to_ut <- function(v) {
189	69x	libqe::connection_names(v)
190		}
191
192		# Center data by subtracting column means.
193		# @param values Numeric matrix or data.frame
194		center_data_c <- function(values) {
195	67x	libqe::center_points(as.matrix(values))
196		}

1		##
2		#' @title Compute summary statistic for groupings of units using given method (typically, mean)
3		#'
4		#' @description Computes summary statistics for groupings (given as vector) of units in ena data using given method (typically, mean); computes summary statistic for point locations and edge weights for each grouping
5		#'
6		#' @export
7		#'
8		#' @param enaset An \code{\link{ENAset}} or a vector of values to group.
9		#' @param by A vector of values the same length as units. Uses rotated points for group positions and normed data to get the group edge weights
10		#' @param method A function that is used on grouped points. Default: mean(). If `enaset` is an ENAset, enaset$points.rotated will be groups using `mean` regardless of `method` provided
11		#' @param names A vector of names to use for the results. Default: unique(by)
12		#'
13		#' @examples
14		#' data(RS.data)
15		#'
16		#' codeNames = c('Data','Technical.Constraints','Performance.Parameters',
17		#' 'Client.and.Consultant.Requests','Design.Reasoning','Collaboration');
18		#'
19		#' accum = ena.accumulate.data(
20		#' units = RS.data[,c("UserName","Condition")],
21		#' conversation = RS.data[,c("Condition","GroupName")],
22		#' metadata = RS.data[,c("CONFIDENCE.Change","CONFIDENCE.Pre","CONFIDENCE.Post")],
23		#' codes = RS.data[,codeNames],
24		#' window.size.back = 4
25		#' )
26		#'
27		#' set = ena.make.set(
28		#' enadata = accum
29		#' )
30		#'
31		#' means = ena.group(set, "Condition")
32		#'
33		#'
34		#' @return A list containing names, points, and edge weights for each of the unique groups formed by the function
35		##
36		ena.group <- function(
37		enaset = NULL,
38		by = NULL,
39		method = mean,
40		names = as.vector(unique(by))
41		) {
42	7x	run.method = function(pts, m = method) {
43	12x	to_matrix <- class(pts)[1];
44	12x	points.dt = pts;
45
46	12x	if(is.logical(by)) {
47	2x	points.dt.means = points.dt[by, { lapply(.SD, m) }, .SDcols = find_dimension_cols(points.dt) \| find_code_cols(points.dt)];
48	2x	if(length(names) == 1) {
49	2x	points.dt.means[['ENA_GROUP_NAME']] <- as.ena.metadata(names)
50		}
51		}
52	10x	else if(all(by %in% colnames(pts))) {
53	2x	points.dt.means <- points.dt[,
54	4x	{lapply(.SD, function(x) {
55	60x	get(paste0("as.", class(x)[1]))(m(x))
56		})},
57	2x	by = by,
58	2x	.SDcols = find_dimension_cols(points.dt) \| find_code_cols(points.dt)
59		];
60	2x	points.dt.means[, ENA_GROUP_NAME := do.call(paste, c(.SD, sep = ".")) , .SDcols = c(by)]
61	2x	points.dt.means <- points.dt.means[, !find_meta_cols(points.dt.means), with = F]
62	2x	set(points.dt.means, j = "ENA_GROUP_NAME", value = as.ena.metadata(points.dt.means[["ENA_GROUP_NAME"]]))
63		}
64		else {
65	8x	to_what <- get(paste0("as.", class(pts[[which(!find_meta_cols(pts))[1]]])[1]))
66	8x	to_cols <- names(which(!find_meta_cols(pts)))
67
68	8x	points.dt.means = as.data.frame(aggregate(as.matrix(points.dt), by = list(by), FUN = m)) #"mean"))
69	8x	set(points.dt.means, j = "Group.1", value = as.ena.metadata(points.dt.means$Group.1))
70	8x	colnames(points.dt.means)[colnames(points.dt.means) == "Group.1"] <- "ENA_GROUP_NAME"
71	8x	set(x = points.dt.means, j = to_cols, value = lapply(points.dt.means[, to_cols], to_what))
72	8x	points.dt.means <- as.data.table(points.dt.means)
73
74		# agg.df[as.vector(unique(group.by)),]u
75		# return (points.dt.means[as.vector(unique(by)),]);
76	8x	return(as.ena.matrix(points.dt.means[which(points.dt.means$ENA_GROUP_NAME %in% unique(by)),], to_matrix))
77		}
78
79	4x	return(as.ena.matrix(points.dt.means, to_matrix));
80		}
81
82	7x	if(is.character(method)) {
83	1x	method = get(method)
84		}
85
86	7x	if(is(enaset, "ENAset")) {
87	1x	enaset <- ena.set(enaset);
88		}
89
90	7x	if (is(enaset, "ena.set")) {
91	5x	pts <- run.method(enaset$points)
92	5x	return(list(
93	5x	"names" = pts$ENA_GROUP_NAME,
94	5x	"points" = pts,
95	5x	"line.weights" = run.method(enaset$line.weights)
96		));
97		}
98		else {
99	2x	return(run.method(enaset))
100		}
101		}

1		#' Re-class matrix as ena.matrix
2		#'
3		#' @param x data.frame, data.table, or matrix to extend
4		#' @param new.class Additional class to extend the matrix with, default: NULL
5		#'
6		#' @return Object of same st
7		#' @export
8		as.ena.matrix <- function(x, new.class = NULL) {
9	251x	class(x) = c(new.class, "ena.matrix", class(x))
10	251x	x
11		}
12
13		#' Re-class matrix as ena.metadata
14		#'
15		#' @param x data.frame, data.table, or matrix to extend
16		#'
17		#' @return Object of same st
18		#' @export
19		as.ena.metadata <- function(x) {
20	796x	if(is.factor(x)) {
21	!	x = as.character(x)
22		}
23	796x	class(x) = c("ena.metadata", "character") # This fails in the $.ena.metadata if is extending character, class(x))
24	796x	x
25		}
26		as.ena.code <- function(x) {
27	416x	if(is.factor(x)) {
28	1x	x = as.character(x)
29		}
30	416x	class(x) = c("ena.code", class(x))
31	416x	x
32		}
33		as.ena.codes <- function(x) {
34	909x	if(is.factor(x)) {
35	1x	x = as.character(x)
36		}
37	909x	class(x) = c("ena.codes", class(x))
38	909x	x
39		}
40		#' Re-class vector as ena.co.occurrence
41		#'
42		#' @param x Vector to re-class
43		#'
44		#' @return re-classed vector
45		#' @export
46		as.ena.co.occurrence <- function(x) {
47	3969x	if(is.factor(x)) {
48	1x	x = as.character(x)
49		}
50	3969x	class(x) = c("ena.co.occurrence", class(x))
51	3969x	x
52		}
53		#' Re-class vector as ena.dimension
54		#'
55		#' @param x Vector to re-class
56		#'
57		#' @return re-classed vector
58		#' @export
59		as.ena.dimension <- function(x) {
60	3359x	if(is.factor(x)) {
61	1x	x = as.character(x)
62		}
63	3359x	class(x) = c("ena.dimension", class(x))
64	3359x	x
65		}

1		#####
2		#' @title Wrapper to generate, and optionally plot, an ENA model
3		#'
4		#' @description Convenience entry point for constructing an ENA model from a
5		#' coded data frame. Handles accumulation, dimensional reduction, and optional
6		#' plot generation in a single call, returning an \code{ena.set} object that
7		#' contains unit positions, network weights, node positions, and plots.
8		#'
9		#' @details
10		#' \code{ena()} runs three phases internally:
11		#'
12		#' \strong{1. Accumulation} — co-occurrence counts are computed for each unit
13		#' across stanza windows defined by \code{window}, \code{window.size.back}, and
14		#' \code{window.size.forward}.
15		#'
16		#' \strong{2. Dimensional reduction} — accumulated vectors are normed, centered,
17		#' and rotated into a low-dimensional ENA space. When \code{groupVar} and two
18		#' \code{groups} are supplied the rotation maximises separation between the group
19		#' means (means rotation); otherwise SVD is used.
20		#'
21		#' \strong{3. Plotting} — plots are built and stored on the returned set in
22		#' \code{set$plots}. Pass \code{include.plots = FALSE} to skip this phase
23		#' entirely, which is useful for programmatic use (simulations, parameter
24		#' sweeps) where plot objects are not needed.
25		#'
26		#' \strong{Plot defaults:} \code{network = TRUE} but \code{points = FALSE} and
27		#' \code{mean = FALSE}. For a two-group comparison you almost always want
28		#' \code{mean = TRUE} as well, to show group centroids and confidence intervals
29		#' alongside the network.
30		#'
31		#' \strong{Accessing results:} the returned \code{ena.set} object contains:
32		#' \describe{
33		#' \item{\code{$points}}{unit positions in the rotated ENA space (rows = units)}
34		#' \item{\code{$line.weights}}{normed co-occurrence weights per unit (rows = units, cols = code pairs)}
35		#' \item{\code{$node.positions}}{positions of each code node in the ENA space}
36		#' \item{\code{$plots}}{named list of \code{ENAplot} objects; two-group models
37		#' produce three plots keyed by \code{group1}, \code{group2}, and
38		#' \code{"group1-group2"}}
39		#' \item{\code{$tests}}{list of Wilcoxon and t-test results on dimensions 1
40		#' and 2, populated when \code{runTest = TRUE}}
41		#' \item{\code{$variance}}{proportion of variance explained by each dimension}
42		#' }
43		#'
44		#' @param data data.frame containing metadata and coded columns
45		#' @param codes vector, numeric or character, of column names or indices containing the codes to model
46		#' @param units vector, numeric or character, of column names that together uniquely identify each unit of analysis
47		#' @param conversation vector, numeric or character, of column names used to segment the data into conversations (stanza boundaries reset at each new conversation)
48		#' @param metadata vector, numeric or character, of column names to carry through as unit-level metadata (default: NULL)
49		#' @param model character, the ENA model to construct: \code{EndPoint} (default) produces a single adjacency vector per unit summing co-occurrences across all lines; \code{AccumulatedTrajectory} produces one adjacency vector per unit per conversation, where each successive conversation accumulates prior ones; \code{SeparateTrajectory} produces one adjacency vector per unit per conversation, each modeled independently
50		#' @param weight.by how to weight co-occurrences: \code{"binary"} (default) counts each co-occurrence once per stanza window; supply a function (e.g. \code{sum}) to use raw counts
51		#' @param window stanza window type: \code{"MovingStanzaWindow"} (default) or \code{"Conversation"} (all lines in a conversation form one window)
52		#' @param window.size.back integer, number of lines back from each line to include in the stanza window (default: 1)
53		#' @param window.size.forward integer, number of lines forward from each line to include in the stanza window (default: 0). Set to model bidirectional co-occurrence within a window.
54		#' @param include.meta logical, if TRUE (default) unit metadata is attached to the resulting ENAdata object and accessible via the set; set to FALSE to omit metadata from the model output
55		#' @param groupVar character, name of the column containing group labels. When supplied with two \code{groups}, the model uses a means rotation that maximises variance between group means.
56		#' @param groups vector, character, of exactly the group values from \code{groupVar} to use for means rotation, plotting, and statistical tests. If omitted, the first two unique values of \code{groupVar} are used with a warning.
57		#' @param runTest logical, if TRUE runs a Wilcoxon rank-sum test and a Student's t-test comparing the two groups on dimensions 1 and 2; results stored in \code{set$tests} (default: FALSE)
58		#' @param points logical, TRUE will plot individual unit points (default: FALSE)
59		#' @param mean logical, TRUE will plot group mean positions with confidence intervals — recommended whenever \code{groupVar} is supplied (default: FALSE)
60		#' @param network logical, TRUE will plot mean networks (default: TRUE)
61		#' @param networkMultiplier numeric, scaling factor applied to edge weights in non-subtracted network plots (default: 1)
62		#' @param subtractionMultiplier numeric, scaling factor applied to edge weights in the subtracted network plot (default: 1)
63		#' @param unit character, name of a single unit to plot in isolation; when supplied, all group plotting is skipped
64		#' @param colors vector, character, of colors for groups or points. For two-group models, supply two values (group1, group2); for single-group or no-group models, supply one value. Defaults to "blue"/"red" for two groups and "black" otherwise.
65		#' @param confidence.interval character, style of confidence interval shown on mean points: "box" (default), "crosshairs", or "none"
66		#' @param include.plots logical, if TRUE (default) generates and attaches plot objects to the returned set; set to FALSE to skip all plotting for faster programmatic use
67		#' @param print.plots logical, if TRUE renders plots in the Viewer as they are created (default: FALSE)
68		#' @param ... Additional parameters passed to set creation and plotting functions, including \code{mask} (an optional binary matrix of size ncol(codes) x ncol(codes) where 0 suppresses co-occurrence modeling between a pair of codes; see \code{\link{ena.accumulate.data}})
69		#'
70		#' @examples
71		#' data(RS.data)
72		#'
73		#' codes = c('Data',
74		#' 'Technical.Constraints',
75		#' 'Performance.Parameters',
76		#' 'Client.and.Consultant.Requests',
77		#' 'Design.Reasoning',
78		#' 'Collaboration')
79		#'
80		#' # Minimal call: fit a model with no group comparison
81		#' rs = ena(
82		#' data = RS.data,
83		#' units = c("UserName", "Condition", "GroupName"),
84		#' conversation = c("Condition", "GroupName"),
85		#' codes = codes,
86		#' window.size.back = 4
87		#' )
88		#'
89		#' # Two-group comparison with means rotation, centroids, and statistical tests
90		#' rs = ena(
91		#' data = RS.data,
92		#' units = c("UserName", "Condition", "GroupName"),
93		#' conversation = c("Condition", "GroupName"),
94		#' codes = codes,
95		#' window.size.back = 4,
96		#' groupVar = "Condition",
97		#' groups = c("FirstGame", "SecondGame"),
98		#' mean = TRUE,
99		#' runTest = TRUE,
100		#' print.plots = FALSE
101		#' )
102		#'
103		#' # Model fitting only, no plots (faster for programmatic use)
104		#' rs = ena(
105		#' data = RS.data,
106		#' units = c("UserName", "Condition", "GroupName"),
107		#' conversation = c("Condition", "GroupName"),
108		#' codes = codes,
109		#' window.size.back = 4,
110		#' include.plots = FALSE
111		#' )
112		#'
113		#' @return An \code{ena.set} object. See the Details section for a description
114		#' of the key fields (\code{$points}, \code{$line.weights}, \code{$plots},
115		#' \code{$tests}, etc.).
116		#' @export
117		#####
118		ena <- function(
119		data,
120		codes,
121		units,
122		conversation,
123		metadata = NULL,
124		model = c("EndPoint", "AccumulatedTrajectory", "SeparateTrajectory"),
125		weight.by = "binary",
126		window = c("MovingStanzaWindow", "Conversation"),
127		window.size.back = 1,
128		window.size.forward = 0,
129		include.meta = TRUE,
130		groupVar = NULL,
131		groups = NULL,
132		runTest = FALSE,
133		points = FALSE,
134		mean = FALSE,
135		network = TRUE,
136		networkMultiplier = 1,
137		subtractionMultiplier = 1,
138		unit = NULL,
139		colors = NULL,
140		confidence.interval = "box",
141		include.plots = T,
142		print.plots = F,
143		...
144		) {
145	13x	set <- ena.set.creator(
146	13x	data = data,
147	13x	codes = codes,
148	13x	units = units,
149	13x	conversation = conversation,
150	13x	metadata = metadata,
151	13x	model = model,
152	13x	weight.by = weight.by,
153	13x	window = window,
154	13x	window.size.back = window.size.back,
155	13x	window.size.forward = window.size.forward,
156	13x	include.meta = include.meta,
157	13x	groupVar = groupVar,
158	13x	groups = groups,
159	13x	runTest = runTest,
160		...
161		)
162
163	13x	if (include.plots) {
164	13x	set <- ena.plotter(
165	13x	set = set,
166	13x	groupVar = groupVar,
167	13x	groups = groups,
168	13x	points = points,
169	13x	mean = mean,
170	13x	network = network,
171	13x	networkMultiplier = networkMultiplier,
172	13x	subtractionMultiplier = subtractionMultiplier,
173	13x	unit = unit,
174	13x	colors = colors,
175	13x	confidence.interval = confidence.interval,
176	13x	print.plots = print.plots,
177		...
178		)
179		}
180
181	13x	return(set)
182		}

1		##
2		# @title Accumulate Data from csv
3		#
4		# @description This function accumulates rows of data.
5		#
6		# @details [TBD]
7		#
8		#@export
9		#
10		# @param file The csv file location or data.frame for the function
11		# @param units.used Delimits columns based on the units (which specific units to use)
12		# @param units.by unit columns to accumulate by
13		# @param conversations.by Columns used in the conversation
14		# @param codes Columns used based on codes
15		# @param window.size.back Number of lines back to include window in stanza
16		# @param window.size.forward Number of lines forward in stanza window
17		# @param binary [TBD]
18		# @param model [TBD]
19		# @param window [TBD]
20		# @param weight.by [TBD]
21		# @param binary.stanzas [TBD]
22		# @param mask [TBD]
23		# @param ... additional parameters addressed in inner function
24		#
25		#
26		# @seealso \code{\link{ena.make.set}}
27		#
28		# @examples
29		# \dontrun{
30		# codeNames = c(
31		# "E.data","S.data","E.design","S.design","S.professional","E.client",
32		# "V.client","E.consultant","V.consultant","S.collaboration","I.engineer",
33		# "I.intern","K.actuator","K.rom","K.materials","K.power"
34		# )
35		#
36		# df.file <- system.file("extdata", "rs.data.csv", package="rENA")
37		#
38		# # Given a csv file location
39		# ena.accumulate.data(
40		# df.file, units.by = c("UserName","Condition"),
41		# conversations.by = c("ActivityNumber","GroupName"),
42		# codes = codeNames
43		# )
44		# }
45		# @return \code{\link{ENAdata}} class object with accumulated data
46		#
47		##
48		ena.accumulate.data.file <- function(
49		file,
50		units.used = NULL,
51		conversations.used = NULL,
52		units.by,
53		conversations.by,
54		codes = NULL,
55		model = c("EndPoint",
56		"AccumulatedTrajectory",
57		"SeparateTrajectory"),
58		window = c("Moving Stanza", "Conversation"),
59		window.size.back = 1,
60		window.size.forward = 0,
61		weight.by = "binary",
62		binary.stanzas = F,
63		mask = NULL,
64		include.meta = T,
65		as.list = T,
66		...
67		) {
68	46x	if(is.null(file) \|\|
69	46x	is.null(units.by) \|\|
70	46x	is.null(conversations.by) \|\| is.null(codes)
71		) {
72	1x	stop("Accumulation: file, units.by, conversations.by, and codes")
73		}
74
75	45x	units <- NULL;
76	45x	model <- match.arg(model);
77	45x	window <- match.arg(window);
78
79	45x	if (identical(window, "Conversation")) {
80	1x	conversations.by = c(conversations.by, units.by);
81	1x	window.size.back = window;
82		}
83	45x	data = ENAdata$new(
84	45x	file = file,
85	45x	units = units,
86	45x	units.used = units.used,
87	45x	units.by = units.by,
88	45x	conversations.by = conversations.by,
89	45x	codes = codes,
90	45x	window.size.back = window.size.back,
91	45x	window.size.forward = window.size.forward,
92	45x	weight.by = weight.by,
93	45x	model = model,
94	45x	mask = mask,
95	45x	include.meta = include.meta,
96		...
97		);
98	45x	data$process();
99
100	45x	data$function.call = sys.call();
101		# output = match.arg(output);
102		# if(output == "json") {
103		# output.class = get(class(data))
104		#
105		# if(is.null(output.fields)) {
106		# output.fields = names(output.class$public_fields)
107		# }
108		#
109		# r6.to.json(data, o.class = output.class, o.fields = output.fields)
110		# }
111		#else
112
113	45x	if(as.list) {
114	34x	data = ena.set(data);
115		} else {
116	11x	warning("Usage of R6 data objects is deprecated and may be removed entirely in a future version. Consider upgrading to the new data object.")
117		}
118	45x	data
119		}

1		CLASS_NAMES <- list(
2		data = "qe.data",
3		meta = "qe.metadata",
4		code = "qe.code",
5		unit = "qe.unit",
6		horizon = "qe.horizon"
7		)
8
9		WARNINGS <- list(
10		data_from_vector = "Cannot transform vectors to `qe.data`",
11		null_metadata = "`metadata` must be supplied as a vector of column names. No metadata classified.",
12		null_codes = "`codes` must be supplied as a vector of column names. No codes classified.",
13		null_units = "`units` must be supplied as a vector of column names. No units classified.",
14		null_horizon = "`horizon` must be supplied as a vector of column names. No horizon classified."
15		)
16
17		#' Convert an object to 'qe.data' class
18		#'
19		#' This function converts an object to the 'qe.data' class. If the object is not a data.frame or matrix, it is first converted to a data.table.
20		#'
21		#' @param x An object. The object to be converted to 'qe.data' class.
22		#'
23		#' @return The modified object with the 'qe.data' class.
24		#' @examples
25		#' library(data.table)
26		#'
27		#' dt <- data.table(
28		#' ID = 1:5,
29		#' Name = c("Alice", "Bob", "Charlie", "David", "Eve"),
30		#' Age = c(25, 30, 35, 40, 45),
31		#' Score = c(85, 90, 95, 80, 75)
32		#' )
33		#' dt <- as.qe.data(dt);
34		#' class(dt) # Should show 'qe.data' along with other classes
35		#'
36		#' @export
37		as.qe.data <- function(x) {
38	!	if(!is.qe.data(x)) {
39	!	if(is.vector(x)) {
40	!	warning(WARNINGS$data_from_vector);
41		}
42		else {
43		if(
44	!	is.matrix(x) \|\|
45	!	(is.data.frame(x) && !data.table::is.data.table(x))
46		) {
47	!	x <- data.table::as.data.table(x);
48		}
49	!	class(x) <- c(CLASS_NAMES$data, class(x));
50		}
51		}
52
53		# return(data.table::copy(x));
54	!	return(x);
55		}
56
57		#' Convert a vector to 'qe.code' class
58		#'
59		#' This function converts a vector to the 'qe.code' class. If the vector is a factor, it is first converted to a character vector.
60		#'
61		#' @param x A vector. The vector to be converted to 'qe.code' class.
62		#'
63		#' @return The modified vector with the 'qe.code' class.
64		#' @examples
65		#' vec <- factor(c("A", "B", "C"))
66		#' vec <- as.qe.code(vec)
67		#' class(vec) # Should show 'qe.code' along with other classes
68		#' @export
69		as.qe.code <- function(x) {
70	!	if(is.qe.code(x)) return(x);
71
72	!	if(is.factor(x)) {
73	!	x <- as.character(x);
74		}
75	!	class(x) <- c(CLASS_NAMES$code, class(x));
76
77	!	return(x);
78		}
79
80		#' Convert a vector to 'qe.metadata' class
81		#'
82		#' This function converts a vector to the 'qe.metadata' class. If the vector is a factor, it is first converted to a character vector.
83		#'
84		#' @param x A vector. The vector to be converted to 'qe.metadata' class.
85		#'
86		#' @return The modified vector with the 'qe.metadata' class.
87		#' @examples
88		#' vec <- factor(c("A", "B", "C"))
89		#' vec <- as.qe.metadata(vec)
90		#' class(vec) # Should show 'qe.metadata' along with other classes
91		#' @export
92		as.qe.metadata <- function(x) {
93	!	if(is.qe.metadata(x)) return(x);
94
95	!	if(is.factor(x)) {
96	!	x <- as.character(x);
97		}
98	!	class(x) <- c(CLASS_NAMES$meta, class(x));
99
100	!	return(x);
101		}
102
103		#' Convert a vector to 'qe.unit' class
104		#'
105		#' This function converts a vector to the 'qe.unit' class. If the vector is a factor, it is first converted to a character vector.
106		#'
107		#' @param x A vector. The vector to be converted to 'qe.unit' class.
108		#'
109		#' @return The modified vector with the 'qe.unit' class.
110		#' @examples
111		#' vec <- factor(c("A", "B", "C"))
112		#' vec <- as.qe.unit(vec)
113		#' class(vec) # Should show 'qe.unit' along with other classes
114		#' @export
115		as.qe.unit <- function(x) {
116	!	if(is.qe.unit(x)) return(x);
117
118	!	if(is.factor(x)) {
119	!	x <- as.character(x);
120		}
121	!	class(x) <- c(CLASS_NAMES$unit, class(x));
122
123	!	return(x);
124		}
125
126		#' Convert a vector to 'qe.horizon' class
127		#'
128		#' This function converts a vector to the 'qe.horizon' class. If the vector is a factor, it is first converted to a character vector.
129		#'
130		#' @param x A vector. The vector to be converted to 'qe.horizon' class.
131		#'
132		#' @return The modified vector with the 'qe.horizon' class.
133		#' @examples
134		#' vec <- factor(c("A", "B", "C"))
135		#' vec <- as.qe.horizon(vec)
136		#' class(vec) # Should show 'qe.horizon' along with other classes
137		#' @export
138		as.qe.horizon <- function(x) {
139	!	if(is.qe.horizon(x)) return(x);
140
141	!	if(is.factor(x)) {
142	!	x <- as.character(x);
143		}
144	!	class(x) <- c(CLASS_NAMES$horizon, class(x));
145
146	!	return(x);
147		}
148
149		#' Check if an object is of class 'qe.data'
150		#'
151		#' This function checks if an object is of class 'qe.data'.
152		#'
153		#' @param x An object. The object to be checked.
154		#'
155		#' @return A logical value. TRUE if the object is of class 'qe.data', otherwise FALSE.
156		#' @examples
157		#' library(data.table)
158		#'
159		#' dt <- data.table(ID = 1:5)
160		#' class(dt) <- c("qe.data", class(dt))
161		#' is.qe.data(dt) # Should return TRUE
162		#' @export
163		is.qe.data <- function(x) {
164	!	return(CLASS_NAMES$data %in% class(x));
165		}
166
167		#' Check if an object is of class 'qe.code'
168		#'
169		#' This function checks if an object is of class 'qe.code'.
170		#'
171		#' @param x An object. The object to be checked.
172		#'
173		#' @return A logical value. TRUE if the object is of class 'qe.code', otherwise FALSE.
174		#' @examples
175		#' dt <- 1:5
176		#' class(dt) <- c("qe.code", class(dt))
177		#' is.qe.code(dt) # Should return TRUE
178		#' @export
179		is.qe.code <- function(x) {
180	!	return(CLASS_NAMES$code %in% class(x));
181		}
182
183		#' Check if an object is of class 'qe.metadata'
184		#'
185		#' This function checks if an object is of class 'qe.metadata'.
186		#'
187		#' @param x An object. The object to be checked.
188		#'
189		#' @return A logical value. TRUE if the object is of class 'qe.metadata', otherwise FALSE.
190		#' @examples
191		#' dt <- 1:5
192		#' class(dt) <- c("qe.metadata", class(dt))
193		#' is.qe.metadata(dt) # Should return TRUE
194		#' @export
195		is.qe.metadata <- function(x) {
196	!	return(CLASS_NAMES$meta %in% class(x));
197		}
198
199
200		#' Check if an object is of class 'qe.unit'
201		#'
202		#' This function checks if an object is of class 'qe.unit'.
203		#'
204		#' @param x An object. The object to be checked.
205		#'
206		#' @return A logical value. TRUE if the object is of class 'qe.unit', otherwise FALSE.
207		#' @examples
208		#' dt <- 1:5
209		#' class(dt) <- c("qe.unit", class(dt))
210		#' is.qe.unit(dt) # Should return TRUE
211		#' @export
212		is.qe.unit <- function(x) {
213	!	return(CLASS_NAMES$unit %in% class(x));
214		}
215
216		#' Check if an object is of class 'qe.horizon'
217		#'
218		#' This function checks if an object is of class 'qe.horizon'.
219		#'
220		#' @param x An object. The object to be checked.
221		#'
222		#' @return A logical value. TRUE if the object is of class 'qe.horizon', otherwise FALSE.
223		#' @examples
224		#' dt <- 1:5
225		#' class(dt) <- c("qe.horizon", class(dt))
226		#' is.qe.horizon(dt) # Should return TRUE
227		#' @export
228		is.qe.horizon <- function(x) {
229	!	return(CLASS_NAMES$horizon %in% class(x));
230		}