libqe coverage - 66.10%

Files
Source

/** @file accumulation.hpp
 *  @brief Accumulation primitives shared between the rENA stanza-window model
 *         and the tma ground/response/tensor model.
 *
 *  Both models reduce to the same core connection_matrix() operation; they
 *  differ in how ground and response vectors are assembled from the raw data.
 */
#ifndef LIBQE_ACCUMULATION_HPP
#define LIBQE_ACCUMULATION_HPP

#include <armadillo>
#include <functional>
#include <vector>
#include "adjacency.hpp"

namespace qe {

/// @name Core adjacency math
/// @{

/** @brief Compute the connection matrix for one ground + response event pair.
 *
 *  @param[in] ground          Row vector of accumulated ground codes (length p).
 *  @param[in] response        Row vector of the focal (response) codes (length p).
 *  @param[in] response_weight Scalar weight applied to the response self-connection
 *                             term.  Defaults to 1.0.
 *  @param[in] ordered         When @c true, produces a directed (asymmetric) matrix:
 *                             @c ground→response cross-product plus
 *                             @c 0.5 * response_weight * response self-connection
 *                             (diagonal zeroed).
 *                             When @c false, produces a symmetric undirected matrix:
 *                             @c (response_weight * r⊗r) + g⊗r + r⊗g.
 *
 *  @returns A p × p connection matrix.
 *
 *  @note Equivalent to @c calculate_adjacency_matrix() in @c tma/code.cpp.
 */
inline arma::mat connection_matrix(
    arma::rowvec ground, arma::rowvec response,
    double response_weight = 1.0, bool ordered = true
) {
    arma::mat resp_self = response.t() * response;
    arma::mat g_by_r   = ground.t() * response;

    if (ordered) {
        resp_self.diag().zeros();
        return g_by_r + (0.5 * response_weight * resp_self);
    }
    return (response_weight * resp_self) + g_by_r + (response.t() * ground);
}

/// @}

/// @name Traditional stanza-window accumulation (rENA model)
/// @{

/** @brief Accumulate connection vectors for every row in one conversation using
 *         a stanza window.
 *
 *  For each focal row @c k in @p codes the function assembles a ground context
 *  from the surrounding window and calls connection_matrix().
 *
 *  @par Ordering semantics
 *  - @b Undirected (@p ordered = @c false, default — rENA stanza model): the
 *    window spans [@c k - window_back, @c k + window_forward]; the upper-triangle
 *    outer-product (code_connections) is computed and back/forward-reference
 *    corrections are subtracted.  Returns a matrix of shape
 *    @c n_rows × choose_two(n_codes).
 *  - @b Directed (@p ordered = @c true): focal row @c k is the response; the
 *    sum of prior rows [@c earliest, @c k−1] is the ground.
 *    connection_matrix() is called with @p ordered = @c true and the result is
 *    vectorised.  @p window_forward is ignored (future rows cannot be causal
 *    ground context).  Returns a matrix of shape @c n_rows × n_codes².
 *
 *  @param[in] codes          Code matrix for one conversation (n_rows × n_codes).
 *  @param[in] window_back    Number of prior rows included in the window
 *                            (1 = current row only; INT_MAX = all prior rows).
 *  @param[in] window_forward Number of future rows included in the window
 *                            (ignored when @p ordered is @c true).
 *  @param[in] binary         When @c true, clamp all positive entries to 1.
 *  @param[in] ordered        When @c true, use directed accumulation; when
 *                            @c false, use undirected stanza-window accumulation.
 *
 *  @returns Connection matrix (n_rows × connection_vector_length) where
 *           connection_vector_length is choose_two(n_codes) for undirected or
 *           n_codes² for directed.
 *
 *  @note Equivalent to @c ref_window_df() in @c rENA/ena.cpp (undirected case).
 */
inline arma::mat accumulate_stanza(
    arma::mat codes,
    int window_back    = 1,
    int window_forward = 0,
    bool binary        = true,
    bool ordered       = false
) {
    int n_rows  = codes.n_rows;
    int n_codes = codes.n_cols;
    const int INT_MAX_VAL = std::numeric_limits<int>::max();

    // Shared helper: earliest row index for focal row k
    auto get_earliest = [&](int row) -> int {
        if (window_back == INT_MAX_VAL || window_back == 0)
            return (window_back == 0) ? row : 0;
        return std::max(0, row - (window_back - 1));
    };

    if (ordered) {
        arma::mat out(n_rows, n_codes * n_codes, arma::fill::zeros);
        for (int row = 0; row < n_rows; row++) {
            int earliest = get_earliest(row);
            arma::rowvec response = codes.row(row);
            arma::rowvec ground(n_codes, arma::fill::zeros);
            if (row > earliest)
                ground = arma::sum(codes.rows(earliest, row - 1));
            out.row(row) = arma::vectorise(
                connection_matrix(ground, response, 1.0, true)).t();
        }
        if (binary) out.elem(arma::find(out > 0)).ones();
        return out;
    }

    // Undirected: existing rENA stanza-window logic
    int n_tri = choose_two(n_codes);
    arma::mat out(n_rows, n_tri, arma::fill::zeros);

    for (int row = 0; row < n_rows; row++) {
        int earliest = get_earliest(row);

        int latest = row;
        if (window_forward == INT_MAX_VAL || row + window_forward >= n_rows) {
            latest = n_rows - 1;
        } else if (window_forward > 0) {
            latest = std::min(n_rows - 1, row + window_forward);
        }

        arma::mat window_rows = codes.rows(earliest, latest);
        arma::mat summed      = arma::sum(window_rows);
        arma::rowvec to_ut    = code_connections(summed);

        // Back-reference correction
        int win_rows  = latest - earliest + 1;
        if (win_rows > 0 && window_back > 1 && row - 1 >= 0) {
            int head_rows = win_rows - 1 - window_forward;
            if (head_rows > 0) {
                arma::mat refs    = window_rows.head_rows(head_rows);
                arma::mat ref_sum = arma::sum(refs);
                to_ut -= code_connections(ref_sum);
            }
        }

        // Forward-reference correction
        if (window_forward > 0 && latest <= n_rows - 1) {
            int tail_rows = latest - row;
            if (tail_rows > 0) {
                arma::mat refs    = window_rows.tail_rows(tail_rows);
                arma::mat ref_sum = arma::sum(refs);
                to_ut -= code_connections(ref_sum);
            }
        }

        out.row(row) = to_ut;
    }

    if (binary) out.elem(arma::find(out > 0)).ones();
    return out;
}

/// @}

/// @name Ground/response accumulation (tma model)
/// @{

/** @brief Accumulate connections for a single unit from its context rows.
 *
 *  For every response row belonging to this unit, the function computes a
 *  weighted ground vector from all preceding context rows and calls
 *  connection_matrix().
 *
 *  @par Ordering semantics
 *  When @p ordered is @c false (default), the result is folded into the
 *  upper-triangle representation (length choose_two(p)).  When @p ordered is
 *  @c true, the full directed p² vector is returned.
 *
 *  @param[in] codes      Full context matrix visible to this unit (n × p).
 *  @param[in] unit_rows  0-based indices of the rows that belong to this unit.
 *  @param[in] decay_fn   Callback with signature
 *                        @c arma::vec(arma::vec distances) that maps a vector
 *                        of distances (0 = current row, 1 = one step back, …)
 *                        to a vector of scalar weights; default behaviour is a
 *                        simple rectangular window.
 *  @param[in] ordered    When @c true, return a directed flat vector (length p²);
 *                        when @c false, return an undirected upper-triangle flat
 *                        vector (length choose_two(p)).
 *
 *  @returns Flat connection vector of length choose_two(p) (undirected) or p²
 *           (directed).
 *
 *  @note Equivalent to @c accumulate_network() in @c tma/code.cpp, with the R
 *        function callback replaced by @c std::function.
 */
inline arma::rowvec accumulate_unit(
    const arma::mat& codes,
    const std::vector<int>& unit_rows,
    std::function<arma::vec(arma::vec)> decay_fn,
    bool ordered = false
) {
    int code_cnt = codes.n_cols;
    arma::mat g_w_vec(code_cnt, code_cnt, arma::fill::zeros);

    for (int unit_row : unit_rows) {
        if (unit_row == 0) continue;

        arma::rowvec response = codes.row(unit_row);

        // Ground rows: everything from 0 up to (and including) unit_row
        arma::uvec ground_indices = arma::regspace<arma::uvec>(0, 1, unit_row);
        arma::vec  distances(ground_indices.n_elem);
        for (arma::uword k = 0; k < ground_indices.n_elem; k++)
            distances[k] = static_cast<double>(unit_row - ground_indices[k]);

        arma::vec decay_weights = decay_fn(distances);

        arma::mat ground_codes   = codes.rows(ground_indices);
        arma::mat weighted       = ground_codes.each_col() % decay_weights;
        arma::rowvec g_summed    = arma::sum(weighted);

        // Exclude the response row's own contribution from the ground
        arma::rowvec g_no_resp   = g_summed - weighted.tail_rows(1).row(0);

        g_w_vec += connection_matrix(g_no_resp, response, 1.0, ordered);
    }

    if (!ordered) return fold_directed_network(arma::vectorise(g_w_vec));
    return arma::vectorise(g_w_vec).t();
}

/// @}

/// @name Extended ground/response accumulation — returns per-row connection data
/// @{

/** @brief Result of accumulate_unit_with_rows().
 *
 *  Bundles the unit-level flat connection vector together with the per-response-row
 *  full p² connection matrices needed by tma's @c accumulate_network().
 */
struct UnitNetworks {
    arma::rowvec networks;      ///< Flat connection vector (choose_two(p) or p²).
    arma::mat    row_networks;  ///< Per-response-row full p² matrix (n_unit_rows × p²).
};

/** @brief Like accumulate_unit() but also returns the per-response-row connection
 *         matrix needed by tma's @c accumulate_network().
 *
 *  @param[in] codes      Full context matrix visible to this unit (n × p).
 *  @param[in] unit_rows  0-based indices of the rows that belong to this unit.
 *  @param[in] decay_fn   Callback with signature
 *                        @c arma::vec(int unit_row, arma::uvec ground_indices)
 *                        that returns a weight vector of length
 *                        @c ground_indices.n_elem.  The two-argument form lets
 *                        callers (e.g. the tma Rcpp wrapper) set R environment
 *                        variables before invoking the actual R decay function,
 *                        without any R-specific code leaking into libqe.
 *  @param[in] ordered    When @c true, return directed flat vectors (length p²);
 *                        when @c false, return undirected upper-triangle flat
 *                        vectors (length choose_two(p)).
 *
 *  @returns A UnitNetworks struct containing the aggregated connection vector
 *           and the per-response-row connection matrices.
 *
 *  @note Equivalent to @c accumulate_network() in @c tma/code.cpp (extended form).
 */
inline UnitNetworks accumulate_unit_with_rows(
    const arma::mat&                              codes,
    const std::vector<int>&                       unit_rows,
    std::function<arma::vec(int, arma::uvec)>     decay_fn,
    bool ordered = false
) {
    int code_cnt    = codes.n_cols;
    int n_unit_rows = unit_rows.size();

    arma::mat g_w_mat(code_cnt, code_cnt, arma::fill::zeros);
    arma::mat row_networks(n_unit_rows, code_cnt * code_cnt, arma::fill::zeros);

    for (int i = 0; i < n_unit_rows; ++i) {
        int unit_row = unit_rows[i];

        arma::uvec ground_indices = arma::regspace<arma::uvec>(0, 1, unit_row);
        arma::vec  decay_weights  = decay_fn(unit_row, ground_indices);

        arma::mat    ground_codes = codes.rows(ground_indices);
        arma::mat    weighted     = ground_codes.each_col() % decay_weights;
        arma::rowvec g_summed     = arma::sum(weighted);
        arma::rowvec g_no_resp    = g_summed - weighted.tail_rows(1).row(0);

        arma::rowvec response = codes.row(unit_row);
        arma::mat    conn     = connection_matrix(g_no_resp, response, 1.0, ordered);
        g_w_mat += conn;
        row_networks.row(i) = arma::vectorise(conn).t();
    }

    UnitNetworks result;
    if (!ordered)
        result.networks = fold_directed_network(arma::vectorise(g_w_mat));
    else
        result.networks = arma::vectorise(g_w_mat).t();
    result.row_networks = row_networks;
    return result;
}

/// @}

/// @name Tensor-based multi-modal accumulation (tma model)
/// @{

/** @brief Compute the linear index into a column-major multi-dimensional array.
 *
 *  @param[in] indices  Per-dimension index values (0-based).
 *  @param[in] dims     Size of each dimension.
 *
 *  @returns The scalar column-major flat index.
 *
 *  @note Equivalent to @c flat_index() in @c tma/code.cpp.
 */
inline int flat_index(const std::vector<int>& indices,
                               const std::vector<int>& dims) {
    if (indices.size() != dims.size())
        throw std::invalid_argument("Number of indices must match number of dimensions.");
    size_t linear = 0, stride = 1;
    for (size_t v = 0; v < indices.size(); ++v) {
        linear += static_cast<size_t>(indices[v]) * stride;
        stride *= static_cast<size_t>(dims[v]);
    }
    return static_cast<int>(linear);
}

/** @brief Result of apply_tensor_unit().
 *
 *  Bundles the unit-level aggregated connection vector together with the
 *  per-response-row connection matrices used by the tma tensor accumulation.
 */
struct TensorNetworks {
    arma::rowvec connection_counts;      ///< Unit-level flat vector (p²).
    arma::mat    row_connection_counts;  ///< Per-response-row connection matrix (n_unit_rows × p²).
};

/** @brief Pure-C++ port of the tma @c apply_tensor() inner logic.
 *
 *  For each response row in @p unit_rows, the function looks up window and
 *  weight values from the tensor, collects all ground rows that fall within the
 *  response row's window, and calls connection_matrix() to accumulate the
 *  connection counts.
 *
 *  @param[in] tensor          Flat column-major array containing window and
 *                             weight values indexed by the factor dimensions.
 *  @param[in] dims            Sizes of each dimension of @p tensor.
 *  @param[in] dims_sender     Tensor axis indices corresponding to sender factors.
 *  @param[in] dims_receiver   Tensor axis indices corresponding to receiver
 *                             factors; these axes are overridden to the response
 *                             row's values when looking up window sizes for
 *                             ground rows.
 *  @param[in] dims_mode       Tensor axis indices corresponding to mode factors.
 *  @param[in] context_lookup  Integer matrix (n_context_rows × n_factors, 0-based)
 *                             mapping each context row to its factor level indices.
 *  @param[in] unit_rows       0-based response-row indices for this unit.
 *  @param[in] codes           Full context code matrix (n_context_rows × n_codes).
 *  @param[in] times           Timestamp associated with each context row
 *                             (length n_context_rows).
 *  @param[in] ordered         When @c true, produce a directed full p² result;
 *                             when @c false, produce an undirected upper-triangle
 *                             result.
 *
 *  @returns A TensorNetworks struct containing the unit-level connection counts
 *           and the per-response-row connection counts.
 *
 *  @note Equivalent to the inner loop of @c apply_tensor() in @c tma/code.cpp.
 */
inline TensorNetworks apply_tensor_unit(
    const arma::vec&        tensor,
    const std::vector<int>& dims,
    const std::vector<int>& dims_sender,
    const std::vector<int>& dims_receiver,
    const std::vector<int>& dims_mode,
    const arma::imat&       context_lookup,
    const std::vector<int>& unit_rows,
    const arma::mat&        codes,
    const arma::vec&        times,
    bool ordered = true
) {
    const int  WINDOW_DIM  = 1;
    const int  WEIGHT_DIM  = 0;
    const bool IS_DEFAULT  = (dims.size() == 1 && dims[0] == 2);

    int code_cnt    = codes.n_cols;
    int n_unit_rows = static_cast<int>(unit_rows.size());
    int ctx_cols    = static_cast<int>(context_lookup.n_cols);

    arma::mat g_w_mat(code_cnt, code_cnt, arma::fill::zeros);
    arma::mat row_conn(n_unit_rows, code_cnt * code_cnt, arma::fill::zeros);

    int response_win    = 0;
    int response_weight = 0;
    if (IS_DEFAULT) {
        response_win    = static_cast<int>(tensor[1]);
        response_weight = static_cast<int>(tensor[0]);
    }

    for (int i = 0; i < n_unit_rows; ++i) {
        int    ri            = unit_rows[i];
        double response_time = times[ri];

        std::vector<int> resp_ctx(ctx_cols + 1);
        for (int j = 0; j < ctx_cols; ++j) resp_ctx[j] = context_lookup(ri, j);
        resp_ctx[ctx_cols] = WINDOW_DIM;

        if (!IS_DEFAULT)
            response_win = static_cast<int>(tensor[flat_index(resp_ctx, dims)]);

        std::vector<int>    gri_v;
        std::vector<double> grw_v;
        arma::rowvec g_ws(code_cnt, arma::fill::zeros);

        if (ri > 0) {
            for (int gr = ri - 1; gr >= 0; --gr) {
                std::vector<int> row_v(ctx_cols + 1);
                for (int j = 0; j < ctx_cols; ++j) row_v[j] = context_lookup(gr, j);
                row_v[ctx_cols] = WINDOW_DIM;
                for (int dim : dims_receiver) row_v[dim] = resp_ctx[dim];

                double row_win = static_cast<double>(response_win);
                if (!IS_DEFAULT)
                    row_win = tensor[flat_index(row_v, dims)];

                if (times[gr] + row_win > response_time) {
                    gri_v.push_back(gr);
                    row_v[ctx_cols] = WEIGHT_DIM;
                    double row_wgt = static_cast<double>(response_weight);
                    if (!IS_DEFAULT)
                        row_wgt = tensor[flat_index(row_v, dims)];
                    grw_v.push_back(row_wgt);
                }
            }

            if (!gri_v.empty()) {
                arma::uvec gri_u(gri_v.size());
                for (size_t k = 0; k < gri_v.size(); ++k) gri_u[k] = gri_v[k];
                arma::mat    gc  = codes.rows(gri_u);
                arma::colvec wts = arma::conv_to<arma::colvec>::from(grw_v);
                g_ws = arma::sum(gc.each_col() % wts, 0);
            }
        }

        if (!IS_DEFAULT) {
            resp_ctx[ctx_cols] = WEIGHT_DIM;
            response_weight = static_cast<int>(tensor[flat_index(resp_ctx, dims)]);
        }

        arma::rowvec row_vec = codes.row(ri);
        arma::mat resp = connection_matrix(
            g_ws, row_vec, static_cast<double>(response_weight), ordered);
        g_w_mat += resp;
        row_conn.row(i) = arma::vectorise(resp).t();
    }

    TensorNetworks result;
    result.connection_counts     = arma::vectorise(g_w_mat).t();
    result.row_connection_counts = row_conn;
    return result;
}

/// @}

/// @name Per-row co-occurrence and rolling window (rENA accumulation primitives)
/// @{

/** @brief Compute the upper-triangle co-occurrence vector for each row.
 *
 *  For each row, calls code_connections() and optionally binarizes the result.
 *
 *  @param[in] codes   Code matrix (n_rows × n_codes).
 *  @param[in] binary  When @c true, clamp all positive entries to 1.
 *
 *  @returns Matrix of shape n_rows × choose_two(n_codes).
 *
 *  @note Equivalent to @c rows_to_co_occurrences() in @c rENA/ena.cpp.
 */
inline arma::mat row_connections(
    const arma::mat& codes,
    bool binary = true
) {
    int n_rows = codes.n_rows;
    int n_tri  = choose_two(codes.n_cols);
    arma::mat out(n_rows, n_tri, arma::fill::zeros);
    for (int row = 0; row < n_rows; ++row)
        out.row(row) = code_connections(codes.row(row));
    if (binary) out.elem(arma::find(out > 0)).ones();
    return out;
}

/** @brief Rolling backward window sum of the raw code matrix.
 *
 *  For each focal row @c k, sums rows [@c max(0, k - window_size + 1), @c k].
 *  Returns a matrix of the same shape as @p codes — no upper-triangle transform
 *  is applied.  A @p window_size of 0 or less is treated as 1 (current row only).
 *
 *  @param[in] codes        Code matrix (n_rows × n_codes).
 *  @param[in] window_size  Number of rows to include in the backward window
 *                          (including the focal row).  Values <= 0 are clamped
 *                          to 1.
 *
 *  @returns Matrix of the same shape as @p codes containing the windowed sums.
 *
 *  @note Equivalent to @c ref_window_lag() in @c rENA/ena.cpp.
 */
inline arma::mat rolling_window_sum(
    const arma::mat& codes,
    int window_size = 1
) {
    int n_rows = codes.n_rows;
    arma::mat out(n_rows, codes.n_cols, arma::fill::zeros);
    for (int row = 0; row < n_rows; ++row) {
        int earliest = (window_size > 0)
            ? std::max(0, row - (window_size - 1))
            : row;  // guard: window_size <= 0 -> single row
        out.row(row) = arma::sum(codes.rows(earliest, row));
    }
    return out;
}

/// @}

} // namespace qe

#endif // LIBQE_ACCUMULATION_HPP

/**
 * @file adjacency.hpp
 * @brief Combinatorics and vector/matrix utilities for ENA connection networks.
 *
 * Pure C++ / Armadillo — no Rcpp dependency.
 * Include @c <RcppArmadillo.h> before this header when building inside an R package.
 */
#ifndef LIBQE_ADJACENCY_HPP
#define LIBQE_ADJACENCY_HPP

#include <armadillo>
#include <string>
#include <vector>
#include <cmath>

namespace qe {

// ---------------------------------------------------------------------------
/// @name Combinatorics
/// @{
// ---------------------------------------------------------------------------

/**
 * @brief Number of unordered pairs from @p n items (n choose 2).
 *
 * @param n Number of codes/nodes.
 * @returns @c (n * (n - 1)) / 2.
 */
inline int choose_two(int n) {
    return (n * (n - 1)) / 2;
}

/**
 * @brief Upper-triangle index pairs for a symmetric matrix of side @p len.
 *
 * @param len Side length of the square matrix (number of codes).
 * @param row Controls what is returned:
 *   - @c -1 (default): 2 × k matrix of [row_idx; col_idx].
 *   - @c  0: row indices only (1 × k).
 *   - @c  1: column indices only (1 × k).
 * @returns An @c arma::umat of shape 2×k or 1×k depending on @p row.
 * @note Equivalent to @c triIndices() in rENA/ena.cpp and tma/code.cpp.
 */
inline arma::umat connection_indices(int len, int row = -1) {
    int vS = choose_two(len);
    int s  = 0;

    arma::umat vR(2, vS, arma::fill::zeros);
    arma::umat vRone(1, vS, arma::fill::zeros);

    for (int i = 2; i <= len; i++) {
        for (int j = 0; j < i - 1; j++) {
            vR(0, s) = j;
            vR(1, s) = i - 1;
            if (row == 0)      vRone[s] = j;
            else if (row == 1) vRone[s] = i - 1;
            s++;
        }
    }
    return (row == -1) ? vR : vRone;
}

/// @}

// ---------------------------------------------------------------------------
/// @name Vector ↔ upper-triangle conversions
/// @{
// ---------------------------------------------------------------------------

/**
 * @brief Compute pairwise products and return as a flat upper-triangle vector.
 *
 * For each pair @c (j, i) with @c j < i, outputs @c v[j] * v[i].
 * The result has length @c choose_two(v.size()).
 *
 * @param v A 1 × p row vector (or p-element matrix).
 * @returns A row vector of length @c choose_two(p).
 * @note Equivalent to @c vector_to_ut() in rENA/ena.cpp.
 */
inline arma::rowvec code_connections(arma::mat v) {
    int vL = v.size();
    int vS = choose_two(vL);
    arma::rowvec out(vS, arma::fill::zeros);
    int s = 0;
    for (int i = 2; i <= vL; i++) {
        for (int j = 0; j < i - 1; j++) {
            out[s] = v[j] * v[i - 1];
            s++;
        }
    }
    return out;
}

/**
 * @brief Fold a directed n² vector into an undirected upper-triangle vector.
 *
 * Symmetric elements are summed: A→B + B→A for every pair.
 *
 * @param v Flat directed vector of length n², column-major (n = sqrt(v.size())).
 * @returns Upper-triangle row vector of length @c choose_two(n).
 * @note Equivalent to @c vector_to_summed_uppertri() in tma/code.cpp.
 */
inline arma::rowvec fold_directed_network(arma::vec v) {
    int n    = static_cast<int>(std::round(std::sqrt(static_cast<double>(v.size()))));
    int tris = choose_two(n);

    arma::mat m(v);
    m.reshape(n, n);
    m = m + m.t();

    arma::colvec flat     = m.as_col();
    arma::uvec   up_inds  = arma::trimatu_ind(arma::size(m), 1);
    return arma::conv_to<arma::rowvec>::from(flat.elem(up_inds));
}

/**
 * @brief Flatten an adjacency matrix to a vector.
 *
 * @param x Square adjacency matrix (n × n).
 * @param full If @c true (default), returns the full n² vector (directed).
 *             If @c false, returns the upper-triangle only (undirected).
 * @returns Row vector of length n² or @c choose_two(n).
 * @note Equivalent to @c adjacency_matrix_to_vector() in tma/code.cpp.
 */
inline arma::rowvec network_to_vector(arma::mat x, bool full = true) {
    if (full) return arma::vectorise(x).t();
    arma::mat combined  = arma::trimatu(x, 1) + arma::trimatl(x, -1);
    arma::uvec up_inds  = arma::trimatu_ind(arma::size(combined), 1);
    return arma::vectorise(combined(up_inds)).t();
}

/// @}

// ---------------------------------------------------------------------------
/// @name String utilities
/// @{
// ---------------------------------------------------------------------------

/**
 * @brief Generate "A & B" pair names for every upper-triangle position.
 *
 * For code names @c {"A", "B", "C"} returns @c {"A & B", "A & C", "B & C"}.
 *
 * @param v Ordered list of code names (length p).
 * @returns Vector of @c choose_two(p) label strings.
 * @note Equivalent to @c svector_to_ut() in rENA/ena.cpp.
 */
inline std::vector<std::string> connection_names(std::vector<std::string> v) {
    int vL = v.size();
    int vS = choose_two(vL);
    std::vector<std::string> out(vS);
    int s = 0;
    for (int i = 2; i <= vL; i++) {
        for (int j = 0; j < i - 1; j++) {
            out[s] = v[j] + " & " + v[i - 1];
            s++;
        }
    }
    return out;
}

/// @}

} // namespace qe

#endif // LIBQE_ADJACENCY_HPP

/**
 * @file generalized_rotation.hpp
 * @brief Generalized Means Rotation (GMR) with Lasso-based covariate adjustment.
 *
 * @details
 * Mirrors rENA's `ena.rotate.by.generalized()` + `gmr()` + `get_x1_main_effect()`.
 * Authoritative reference: commit 46776a1981a90b3a3b2861ed1010e9dbb7acf901
 * ("Fixed bugs in rotation functions") by Zhiqiang Cai.
 *
 * **Algorithm overview**
 *
 *  1. Compute x_vector: the rotation axis that best represents the target
 *     variable's contribution to the ENA point space, after controlling for
 *     covariates via Lasso (`lasso_x1_contribution`).
 *  2. Deflate V by x_vector → defA = V - V*x*x'.
 *  3. Optionally compute secondary axis x1: the normalised group-mean
 *     difference in the *original* V space (not defA) for groups 0 vs 1.
 *     x1 stays empty when the target is not categorical or there are not
 *     exactly 2 groups — there is no SVD fallback.
 *  4. Compute y_vector from a GMR call on defA, or from SVD of defA.
 *     Explicitly orthogonalise y_vector against x_vector, and against x1
 *     when x1 is available.
 *  5. Double-deflate V by x_vector and y_vector, then call `complete_rotation()`
 *     to fill remaining dimensions from the SVD of the doubly-deflated space.
 *
 * **Caller responsibilities**
 *
 *  - Build the model matrix (`X_raw`) from the data frame, including dummy
 *    variables for categorical predictors and any desired interaction terms.
 *    No formula parsing is done here.
 *  - Identify `x1_cols`: the 0-based column indices in `X_raw` that correspond to
 *    the target variable (these are unpenalized in the Lasso).
 *  - Encode categorical targets as 0-based integer codes in `x_target`.
 *  - Set `x_n_groups` to the number of distinct groups when `x_categorical=true`.
 *
 * The R wrapper (`rENA::ena.rotate.by.generalized`) handles these steps and
 * calls `libqe::generalized_means_rotation()` with the prepared matrices.
 */

#ifndef LIBQE_GENERALIZED_ROTATION_HPP
#define LIBQE_GENERALIZED_ROTATION_HPP

#include "linalg_fallback.hpp"  // qe::linalg::eig_sym (works w/o LAPACK)
#include "rotation.hpp"         // ena_svd, complete_rotation, RotationResult
#include "lasso.hpp"            // lasso_x1_contribution

#include <armadillo>
#include <stdexcept>
#include <string>
#include <vector>

namespace qe {

/// @name Between-group scatter
/// @{

/**
 * @brief Compute the between-group scatter matrix S_B.
 *
 * @details
 * Computes S_B = Σ_g  n_g * (μ_g − μ)(μ_g − μ)'  where μ is the grand mean.
 *
 * @note Mirrors rENA's `compute_SB()` (commit 46776a1981a90b3a3b2861ed1010e9dbb7acf901).
 *
 * @param[in] V        n×p matrix of ENA points (or covariate-adjusted contribution).
 * @param[in] labels   n×1 0-based integer group labels.
 * @param[in] n_groups Number of distinct groups.
 *
 * @returns p×p symmetric between-group scatter matrix.
 */
inline arma::mat between_group_scatter(
    const arma::mat&  V,
    const arma::uvec& labels,
    arma::uword       n_groups
) {
    const arma::uword p = V.n_cols;
    arma::rowvec mu = arma::mean(V, 0);
    arma::mat SB(p, p, arma::fill::zeros);

    for (arma::uword g = 0; g < n_groups; ++g) {
        arma::uvec idx = arma::find(labels == g);
        if (idx.is_empty()) continue;
        arma::vec diff = (arma::mean(V.rows(idx), 0) - mu).t();
        SB += static_cast<double>(idx.n_elem) * diff * diff.t();
    }
    return SB;
}

/// @}

/// @name Rotation direction
/// @{

/**
 * @brief Compute the unit rotation direction from a covariate-adjusted contribution matrix.
 *
 * @details
 * **Numeric target:**
 * Centres the target, then computes β = (t_c' t_c)^{-1} t_c' Vx (OLS slope from
 * lm(Vx ~ target)). Mirrors R: `model = lm(Vx ~ target); beta = coefficients[2,]`.
 *
 * **Categorical target:**
 * Computes the between-group scatter matrix S_B and returns its leading
 * eigenvector (the direction of maximum between-group variance).
 * Mirrors R: `sb = compute_SB(Vx, target); r = svd(sb)$v[,1]`.
 *
 * @note Mirrors rENA's `gmr()` direction computation
 *       (commit 46776a1981a90b3a3b2861ed1010e9dbb7acf901).
 *
 * @param[in] Vx          n×q covariate-adjusted contribution matrix.
 * @param[in] target      n×1 target variable (numeric float, or 0-based integer codes).
 * @param[in] categorical `true` if the target is categorical (uses between-group scatter);
 *                        `false` for a numeric target (uses OLS slope).
 * @param[in] n_groups    Number of distinct groups; only used when `categorical` is `true`.
 *
 * @returns Unit vector (p×1) representing the rotation direction.
 *
 * @throws std::runtime_error If `categorical` is `true` and `n_groups < 2`.
 * @throws std::runtime_error If the numeric target has zero variance after centering.
 * @warning Throws `std::runtime_error` if the resulting direction has zero norm,
 *          which can occur when the Lasso-adjusted contribution collapses to zero.
 */
inline arma::vec gmr_direction(
    const arma::mat& Vx,         // n×q covariate-adjusted contribution
    const arma::vec& target,     // n×1 (numeric float, or 0-based int codes)
    bool             categorical,
    arma::uword      n_groups = 0
) {
    arma::vec r;

    if (categorical) {
        if (n_groups < 2)
            throw std::runtime_error(
                "gmr_direction: x_n_groups must be >= 2 for categorical target");
        arma::uvec labels = arma::conv_to<arma::uvec>::from(target);
        arma::mat  SB     = between_group_scatter(Vx, labels, n_groups);

        arma::vec eigval; arma::mat eigvec;
        qe::linalg::eig_sym(eigval, eigvec, SB);  // ascending order
        r = eigvec.col(eigvec.n_cols - 1);         // largest eigenvalue → last col
    } else {
        // Centre target to get the OLS slope (mirrors R's lm() intercept handling)
        arma::vec tc   = target - arma::mean(target);
        double    tTt  = arma::dot(tc, tc);
        if (tTt < 1e-12)
            throw std::runtime_error(
                "gmr_direction: target has zero variance after centering");
        arma::rowvec beta = (tc.t() * Vx) / tTt;  // 1×q slope vector
        r = beta.t();
    }

    double norm = arma::norm(r);
    if (norm < 1e-12)
        throw std::runtime_error("gmr_direction: resulting direction has zero norm");
    return r / norm;
}

/// @}

/**
 * @brief Result of one GMR axis step.
 */
struct GMRStepResult {
    arma::vec direction;   ///< Unit vector in connection space (p×1).
};

/// @name GMR axis step
/// @{

/**
 * @brief Compute the rotation direction for one axis of the Generalized Means Rotation.
 *
 * @details
 * Applies optional row subsetting, performs Lasso-based covariate adjustment
 * (or a simple OLS projection when all columns are target columns), then
 * delegates to `gmr_direction()` to produce a unit direction vector.
 *
 * When `x1_cols.n_elem >= p` (all columns are target columns and there are no
 * penalized covariates), the function falls back to a simple OLS projection:
 * Vx = fitted values from lm(V ~ t).
 *
 * @note Mirrors rENA's `gmr()` step logic
 *       (commit 46776a1981a90b3a3b2861ed1010e9dbb7acf901).
 *
 * @param[in] V          n×q matrix of ENA points (full data; subset applied internally).
 * @param[in] X_model    n×p model matrix (caller-built; target cols + covariate cols).
 * @param[in] target     n×1 target variable (raw float or 0-based integer codes).
 * @param[in] x1_cols    0-based indices of target columns in `X_model` (unpenalized in Lasso).
 * @param[in] categorical `true` if the target is categorical.
 * @param[in] n_groups   Number of distinct groups (only used when `categorical` is `true`).
 * @param[in] subset     Optional 0-based row indices to use; empty means use all rows.
 * @param[in] n_lambda   Length of the Lasso lambda path (forwarded to `lasso_x1_contribution`).
 * @param[in] k_folds    Number of cross-validation folds (forwarded to `lasso_x1_contribution`).
 * @param[in] lasso_eps  lambda_min = lasso_eps * lambda_max (forwarded to `lasso_x1_contribution`).
 *
 * @returns `GMRStepResult` containing the unit direction vector.
 *
 * @throws std::runtime_error Propagated from `gmr_direction()` if the direction
 *         is degenerate or the target has zero variance.
 */
inline GMRStepResult gmr_step(
    const arma::mat&  V,
    const arma::mat&  X_model,
    const arma::vec&  target,
    const arma::uvec& x1_cols,
    bool              categorical,
    arma::uword       n_groups,
    const arma::uvec& subset,
    int               n_lambda  = 50,
    int               k_folds   = 5,
    double            lasso_eps = 0.01
) {
    const arma::uword n = V.n_rows;
    const arma::uword p = X_model.n_cols;

    // ── Select rows ──────────────────────────────────────────────────────────
    arma::uvec rows = subset.is_empty()
                    ? arma::regspace<arma::uvec>(0, n - 1)
                    : subset;

    const arma::mat V_sub  = V.rows(rows);
    const arma::mat X_sub  = X_model.rows(rows);
    const arma::vec t_sub  = target.rows(rows);

    // ── Lasso-adjusted contribution Vx ───────────────────────────────────────
    arma::mat Vx_sub;
    if (x1_cols.n_elem >= p) {
        // All columns are target columns (no penalized covariates):
        // fall back to simple OLS projection (Vx = fitted values from lm(V~t))
        arma::vec tc  = t_sub - arma::mean(t_sub);
        double    tTt = arma::dot(tc, tc);
        if (tTt > 1e-12) {
            arma::rowvec b1 = (tc.t() * V_sub) / tTt;
            Vx_sub = tc * b1;
        } else {
            Vx_sub.zeros(V_sub.n_rows, V_sub.n_cols);
        }
    } else {
        arma::vec pf(p, arma::fill::ones);
        for (arma::uword j : x1_cols) pf(j) = 0.0;

        Vx_sub = lasso_x1_contribution(
            X_sub, V_sub, x1_cols, pf, n_lambda, k_folds, lasso_eps);
    }

    // ── Rotation direction ───────────────────────────────────────────────────
    arma::vec direction = gmr_direction(Vx_sub, t_sub, categorical, n_groups);

    return {direction};
}

/// @}

/**
 * @brief Parameters for `generalized_means_rotation()`.
 *
 * @details
 * Groups all inputs needed to specify both the X and Y rotation axes, along
 * with Lasso tuning knobs. The Y axis is optional: when `has_y` is `false`
 * the second axis is taken from the leading SVD of the x-deflated space.
 */
struct GeneralizedRotationParams {
    /// @name X axis (required)
    /// @{
    arma::mat   x_model_matrix;           ///< n×p model matrix for the X axis.
    arma::vec   x_target;                 ///< n×1 target variable (raw float or 0-based integer codes).
    arma::uvec  x1_cols;                  ///< 0-based target column indices in `x_model_matrix`.
    bool        x_categorical = false;    ///< `true` if the X target is categorical.
    arma::uword x_n_groups    = 0;        ///< Number of distinct groups (used when `x_categorical` is `true`).
    arma::uvec  x_subset;                 ///< Optional row subset (0-based); empty means use all rows.
    /// @}

    /// @name Y axis (optional)
    /// @{
    /// When `has_y` is `false`, the Y axis is taken from the leading SVD of the x-deflated space.
    bool        has_y          = false;   ///< `true` if a Y-axis GMR target is provided.
    arma::mat   y_model_matrix;           ///< n×p model matrix for the Y axis.
    arma::vec   y_target;                 ///< n×1 target variable for the Y axis.
    arma::uvec  y1_cols;                  ///< 0-based target column indices in `y_model_matrix`.
    bool        y_categorical  = false;   ///< `true` if the Y target is categorical.
    arma::uword y_n_groups     = 0;       ///< Number of distinct groups for the Y axis.
    /// The Y axis always uses all rows (no subset parameter).
    /// @}

    /// @name Lasso tuning
    /// @{
    int    n_lambda  = 50;    ///< Length of the lambda path.
    int    k_folds   = 5;     ///< Number of cross-validation folds.
    double lasso_eps = 0.01;  ///< lambda_min = lasso_eps * lambda_max.
    /// @}
};

/// @name Generalized Means Rotation
/// @{

/**
 * @brief Full Generalized Means Rotation (GMR) producing a complete q×q rotation matrix.
 *
 * @details
 * C++/Armadillo implementation of `ena.rotate.by.generalized()`, matching the
 * authoritative R implementation in commit 46776a1981a90b3a3b2861ed1010e9dbb7acf901
 * by Zhiqiang Cai.
 *
 * **Steps performed:**
 *
 *  1. **X axis via GMR** — calls `gmr_step()` on V with the X-axis parameters.
 *  2. **Deflate V** — computes defA = V − V·x·x' for subsequent Y-axis work.
 *  3. **Secondary axis x1** — when the X target is categorical with ≥ 2 groups,
 *     computes x1 = normalize(mean(V[group0]) − mean(V[group1])) using the
 *     *original* V (not defA) and the full-length target (all n rows).
 *     x1 is used only to orthogonalise y_vector; it does not deflate defA.
 *     There is no SVD fallback: x1 stays empty if the condition is not met.
 *  4. **Y axis** — either GMR on defA (when `params.has_y` is `true`) or the
 *     leading SVD direction of defA.
 *  5. **Orthogonalise y_vector** — mirrors R:
 *     `y_vector <- y_vector - (t(y_vector) %*% x_vector) * x_vector`
 *     and, when x1 is available,
 *     `y_vector <- y_vector - (t(y_vector) %*% x1) * x1`,
 *     followed by safe normalisation.
 *  6. **Complete with SVD** — double-deflates V by both x_vector and y_vector,
 *     then calls `complete_rotation()` to fill remaining dimensions.
 *
 * @note Mirrors rENA's `ena.rotate.by.generalized()`
 *       (commit 46776a1981a90b3a3b2861ed1010e9dbb7acf901).
 *
 * @param[in] V       n×q matrix of ENA points (line weights; caller normalises/
 *                    centres upstream).
 * @param[in] params  Fully populated `GeneralizedRotationParams` struct.
 *
 * @returns `RotationResult` with:
 *   - `rotation`     q×q full rotation matrix (column j = axis j).
 *   - `eigenvalues`  q×1 variance per SVD-filled axis (named axes carry 0).
 *   - `column_names` "GMR1", "GMR2" or "SVD2", "SVD3", …, "SVD{q}".
 *
 * @throws std::runtime_error If `gmr_direction()` produces a degenerate direction
 *         for the X or Y axis.
 * @throws std::runtime_error If y_vector has zero norm after orthogonalisation
 *         (can happen when defA is rank-deficient or perfectly aligned with x_vector).
 */
inline RotationResult generalized_means_rotation(
    const arma::mat&                  V,
    const GeneralizedRotationParams&  params
) {
    // ── Step 1: X axis via GMR ────────────────────────────────────────────
    GMRStepResult x_gmr = gmr_step(
        V,
        params.x_model_matrix,
        params.x_target,
        params.x1_cols,
        params.x_categorical,
        params.x_n_groups,
        params.x_subset,
        params.n_lambda,
        params.k_folds,
        params.lasso_eps);

    const arma::vec x_vector = x_gmr.direction;

    // ── Step 2: Deflate V by x_vector ─────────────────────────────────────
    // defA is used only for y_vector computation; x1 uses original V.
    const arma::mat defA = V - V * x_vector * x_vector.t();

    // ── Step 3: Secondary axis x1 ─────────────────────────────────────────
    // Mirrors R: x1 = normalize(mean(V[group0]) - mean(V[group1])).
    //
    // Key points matching the authoritative R:
    //   - Uses original V (not defA)
    //   - Uses the full-length target (all n rows), not just the subset
    //   - Groups are always 0 (first) and 1 (second) in 0-based encoding
    //   - NO fallback: x1 stays empty when categorical/2-group condition fails
    //   - x1 does NOT deflate defA; it is only used to orthogonalise y_vector
    arma::vec x1;

    if (params.x_categorical && params.x_n_groups >= 2) {
        arma::uvec labels = arma::conv_to<arma::uvec>::from(params.x_target);
        arma::uvec g0 = arma::find(labels == 0);
        arma::uvec g1 = arma::find(labels == 1);

        if (!g0.is_empty() && !g1.is_empty()) {
            arma::rowvec m0 = arma::mean(V.rows(g0), 0);   // original V, not defA
            arma::rowvec m1 = arma::mean(V.rows(g1), 0);
            arma::vec diff  = (m0 - m1).t();
            double   dlen   = arma::norm(diff);
            if (dlen > 1e-10) x1 = diff / dlen;
        }
    }

    // ── Step 4: Y axis ────────────────────────────────────────────────────
    // GMR on defA (deflated by x_vector only), or SVD of defA.
    arma::vec y_vector;
    std::string y_name = "SVD2";

    if (params.has_y) {
        GMRStepResult y_gmr = gmr_step(
            defA,
            params.y_model_matrix,
            params.y_target,
            params.y1_cols,
            params.y_categorical,
            params.y_n_groups,
            {},          // no row subset for y axis
            params.n_lambda,
            params.k_folds,
            params.lasso_eps);
        y_vector = y_gmr.direction;
        y_name   = "GMR2";
    } else {
        RotationResult svd_r = ena_svd(defA);
        y_vector = svd_r.rotation.col(0);
    }

    // ── Step 5: Orthogonalise y_vector ────────────────────────────────────
    // Mirrors R (authoritative commit):
    //   y_vector <- y_vector - (t(y_vector) %*% x_vector) * x_vector
    //   if (!is.null(x1)) y_vector <- y_vector - (t(y_vector) %*% x1) * x1
    //   y_vector <- safe_normalize(y_vector)
    y_vector -= arma::dot(y_vector, x_vector) * x_vector;
    if (!x1.is_empty())
        y_vector -= arma::dot(y_vector, x1) * x1;

    double yn = arma::norm(y_vector);
    if (yn < 1e-12)
        throw std::runtime_error(
            "generalized_means_rotation: y_vector has zero norm after orthogonalization");
    y_vector /= yn;

    // ── Step 6: Combine and complete with SVD ─────────────────────────────
    // Double-deflate the original data (mirrors R):
    //   defA <- A - A %*% x %*% t(x) - A %*% y %*% t(y)
    // then pass to prcomp (equivalent: SVD of defA_final).
    arma::mat defA_final = V
        - V * x_vector * x_vector.t()
        - V * y_vector * y_vector.t();

    arma::mat named_axes = arma::join_rows(x_vector, y_vector);
    return complete_rotation(defA_final, named_axes, {"GMR1", y_name});
}

/// @}

}  // namespace qe
#endif  // LIBQE_GENERALIZED_ROTATION_HPP

/**
 * @file lasso.hpp
 * @brief Coordinate-descent Lasso for multivariate response with k-fold CV.
 *
 * Mirrors `glmnet(x, y, family = "mgaussian", penalty.factor = ...)` with
 * k-fold cross-validation to select lambda, and warm-starts along the lambda
 * path for efficiency.
 *
 * The public entry point is qe::lasso_x1_contribution(), which returns an
 * n×q matrix of fitted values attributable to the "target" predictors at the
 * CV-selected lambda.  It is used by `generalized_means_rotation` to isolate
 * the target variable's contribution to the ENA point space after controlling
 * for covariates.
 *
 * **Penalty factors (`pf`):**
 * - `0.0` — predictor is always included (forced in; no shrinkage)
 * - `1.0` — standard L1 penalty
 *
 * The caller is responsible for building the model matrix (main effects and/or
 * interactions); this function accepts a numeric matrix, not a formula.
 */

#ifndef LIBQE_LASSO_HPP
#define LIBQE_LASSO_HPP

#include <armadillo>
#include <cmath>
#include <limits>
#include <stdexcept>

namespace qe {

/**
 * @brief Soft-threshold (shrinkage) operator used by coordinate descent.
 *
 * @details Computes the scalar soft-threshold function:
 *   \f$ S(z,\gamma) = \operatorname{sign}(z)\,\max(|z|-\gamma,\,0) \f$
 *
 * @param[in] z     Input value.
 * @param[in] gamma Threshold amount (must be non-negative for meaningful use).
 * @returns Soft-thresholded value; exactly zero when `|z| <= gamma`.
 */
inline double soft_threshold(double z, double gamma) noexcept {
    return z > gamma ? z - gamma : z < -gamma ? z + gamma : 0.0;
}

/**
 * @brief Single-response Lasso via cyclic coordinate descent at a fixed lambda.
 *
 * @details Fits a Lasso regression for a single response vector using cyclic
 * coordinate descent (naive updates).  The partial-residual update for
 * predictor \f$j\f$ is:
 * \f[
 *   z_j = \frac{X_j^\top r}{n} + \frac{X_j^\top X_j}{n}\,\beta_j
 * \f]
 * \f[
 *   \beta_j \leftarrow \frac{S\!\left(z_j,\;\lambda\,\mathit{pf}_j\right)}{X_j^\top X_j / n}
 * \f]
 * where \f$r = y - X\beta\f$ is the residual maintained incrementally.
 * Predictors with \f$\mathit{pf}_j = 0\f$ skip the soft-threshold and receive
 * the unconstrained OLS update instead.
 *
 * @param[in] X        n×p column-standardized predictor matrix.
 * @param[in] y        n×1 response vector (centering recommended but not required).
 * @param[in] lambda   Regularization strength, on the standardized predictor scale.
 * @param[in] pf       p×1 penalty factors (0 = unpenalized / forced in, 1 = standard L1).
 * @param[in] beta0    Warm-start coefficient vector; pass an empty `arma::vec{}`
 *                     for a cold start from zero.
 * @param[in] max_iter Maximum number of coordinate-descent passes (default 1000).
 * @param[in] tol      Convergence tolerance on the maximum coefficient change
 *                     per pass (default 1e-7).
 * @returns p×1 coefficient vector on the standardized predictor scale.
 */
inline arma::vec lasso_cd(
    const arma::mat& X,
    const arma::vec& y,
    double           lambda,
    const arma::vec& pf,
    arma::vec        beta0   = {},
    int              max_iter = 1000,
    double           tol      = 1e-7
) {
    const arma::uword p = X.n_cols;
    const double      n = static_cast<double>(X.n_rows);

    // Precompute X_j'X_j / n for each predictor
    arma::vec xTx(p);
    for (arma::uword j = 0; j < p; ++j)
        xTx(j) = arma::dot(X.col(j), X.col(j)) / n;

    arma::vec beta = beta0.is_empty()
                   ? arma::vec(p, arma::fill::zeros)
                   : beta0;
    arma::vec r = y - X * beta;   // residual, maintained incrementally

    for (int it = 0; it < max_iter; ++it) {
        double max_delta = 0.0;

        for (arma::uword j = 0; j < p; ++j) {
            if (xTx(j) < 1e-14) continue;   // skip zero-variance predictor

            // Partial correlation statistic
            double z = arma::dot(X.col(j), r) / n + xTx(j) * beta(j);

            double b_new = (pf(j) < 1e-12)
                ? z / xTx(j)                                   // forced in: OLS
                : soft_threshold(z, lambda * pf(j)) / xTx(j); // L1

            double delta = b_new - beta(j);
            if (std::abs(delta) > max_delta) max_delta = std::abs(delta);
            r     -= X.col(j) * delta;
            beta(j) = b_new;
        }

        if (max_delta < tol) break;
    }
    return beta;
}

/**
 * @brief Compute the smallest lambda that drives all penalized coefficients to zero.
 *
 * @details For column-standardized `X` and column-centered `Yc`, lambda_max is:
 * \f[
 *   \lambda_{\max} = \max_{\substack{j:\,\mathit{pf}_j > 0 \\ k}} \frac{|X_j^\top Y_{c,k}|}{n}
 * \f]
 * Above this value every penalized coefficient is exactly zero (unpenalized
 * predictors are unaffected and remain in via OLS regardless of lambda).
 *
 * @param[in] X   n×p column-standardized predictor matrix.
 * @param[in] Yc  n×q column-centered response matrix.
 * @param[in] pf  p×1 penalty factors; predictors with `pf(j) < 1e-12` are skipped.
 * @returns Scalar lambda_max value.
 */
inline double lasso_lambda_max(
    const arma::mat& X,   // column-standardized
    const arma::mat& Yc,  // column-centered responses
    const arma::vec& pf
) {
    const double n = static_cast<double>(X.n_rows);
    double lmax = 0.0;
    for (arma::uword j = 0; j < X.n_cols; ++j) {
        if (pf(j) < 1e-12) continue;   // skip unpenalized predictors
        for (arma::uword k = 0; k < Yc.n_cols; ++k) {
            double v = std::abs(arma::dot(X.col(j), Yc.col(k))) / n;
            if (v > lmax) lmax = v;
        }
    }
    return lmax;
}

/**
 * @brief Cross-validated multivariate Lasso; returns fitted values for target columns only.
 *
 * @details Fits a multivariate Lasso (one independent single-response problem per
 * response column) along a log-spaced lambda path from lambda_max down to
 * `eps * lambda_max`.  Lambda is selected by k-fold cross-validation
 * (mean squared error, averaged over folds and response columns), and the
 * model is refit on the full data at the selected lambda using warm-starts
 * along the path.
 *
 * **Preprocessing performed internally:**
 * - `X_raw` is column-standardized to zero mean and unit variance (zero-variance
 *   columns are left as-is to avoid division by zero).
 * - `Y` is column-centered to absorb the intercept.
 * - Coefficients are unstandardized before the contribution is computed.
 *
 * **Fold assignment:** row \f$i\f$ is assigned to fold \f$i \bmod k\f$
 * (deterministic, order-dependent).
 *
 * **Return value:** The n×q matrix \f$X_{\text{raw},x_1} \hat{B}_{x_1}\f$
 * represents the portion of `Y` (mean-removed) that is linearly explained by
 * the target predictors after penalizing covariate coefficients.  A zero
 * matrix is returned when the target predictors carry no signal at any lambda
 * (e.g., the target is constant after subsetting).  Constant shifts cancel
 * in the downstream between-group scatter (categorical) and slope regression
 * (numeric), so no `ymu` adjustment is applied to the return value.
 *
 * @param[in] X_raw    n×p raw model matrix built by the caller (main effects
 *                     and/or interactions); no formula parsing is done here.
 * @param[in] Y        n×q ENA point matrix (multivariate response).
 * @param[in] x1_cols  0-based column indices in `X_raw` that belong to the
 *                     target variable; these receive `pf = 0` (always included).
 * @param[in] pf       p×1 penalty factors: 0 for `x1_cols`, 1 elsewhere.
 *                     The caller constructs this; non-standard weightings are
 *                     supported.
 * @param[in] n_lambda Length of the lambda path (default 50).
 * @param[in] k_folds  Number of cross-validation folds (default 5; clamped to
 *                     `[2, n/2]`).
 * @param[in] eps      Ratio `lambda_min / lambda_max`; controls how far down
 *                     the path to search (default 0.01).
 * @returns n×q matrix of fitted values attributable to `x1_cols` at the
 *          CV-selected lambda (lambda.min), on the original `Y` scale.
 */
inline arma::mat lasso_x1_contribution(
    const arma::mat& X_raw,
    const arma::mat& Y,
    const arma::uvec& x1_cols,
    const arma::vec&  pf,
    int    n_lambda = 50,
    int    k_folds  = 5,
    double eps      = 0.01
) {
    const arma::uword n = X_raw.n_rows;
    const arma::uword p = X_raw.n_cols;
    const arma::uword q = Y.n_cols;

    // Guard: need at least 2 folds and 2 observations per fold
    k_folds = std::max(2, std::min(k_folds, static_cast<int>(n) / 2));

    // ── Standardize X (μ=0, σ=1); guard zero-variance predictors ────────────
    arma::rowvec xmu  = arma::mean(X_raw, 0);
    arma::rowvec xstd = arma::stddev(X_raw, 0, 0);
    xstd.elem(arma::find(xstd < 1e-12)).ones();   // avoid divide-by-zero
    arma::mat X = (X_raw.each_row() - xmu).each_row() / xstd;

    // ── Center Y (absorbs intercept into the standardized-scale fit) ─────────
    arma::rowvec ymu = arma::mean(Y, 0);
    arma::mat    Yc  = Y.each_row() - ymu;

    // ── Build lambda path: lambda_max → lambda_min on log scale ─────────────
    double lmax = lasso_lambda_max(X, Yc, pf);
    if (lmax < 1e-12)   // target uncorrelated with Y after standardization
        return arma::mat(n, q, arma::fill::zeros);

    arma::vec lambdas = arma::exp(
        arma::linspace(std::log(lmax),
                       std::log(eps * lmax),
                       n_lambda));

    // ── Stratified k-fold CV (fold assignment: row i → fold i % k_folds) ────
    arma::uvec fold_id(n);
    for (arma::uword i = 0; i < n; ++i)
        fold_id(i) = i % static_cast<arma::uword>(k_folds);

    arma::vec cv_err(n_lambda, arma::fill::zeros);

    for (int fold = 0; fold < k_folds; ++fold) {
        arma::uvec tr = arma::find(fold_id != static_cast<arma::uword>(fold));
        arma::uvec va = arma::find(fold_id == static_cast<arma::uword>(fold));

        const arma::mat X_tr = X.rows(tr), X_va = X.rows(va);
        const arma::mat Y_tr = Yc.rows(tr), Y_va = Yc.rows(va);
        const double n_va_q  = static_cast<double>(va.n_elem * q);

        // Warm-start along path for this fold (start cold at lambda_max)
        arma::mat bw(p, q, arma::fill::zeros);
        for (int l = 0; l < n_lambda; ++l) {
            arma::mat b(p, q);
            for (arma::uword k = 0; k < q; ++k)
                b.col(k) = lasso_cd(X_tr, Y_tr.col(k), lambdas(l), pf, bw.col(k));
            bw = b;

            arma::mat err = Y_va - X_va * b;
            cv_err(l) += arma::accu(err % err) / n_va_q;
        }
    }
    cv_err /= static_cast<double>(k_folds);

    // ── Refit on full data at lambda_min, warming along the path ─────────────
    arma::uword best = cv_err.index_min();
    arma::mat bfull(p, q, arma::fill::zeros);
    for (arma::uword l = 0; l <= best; ++l) {
        arma::mat b(p, q);
        for (arma::uword k = 0; k < q; ++k)
            b.col(k) = lasso_cd(X, Yc.col(k), lambdas(l), pf, bfull.col(k));
        bfull = b;
    }

    // ── Unstandardize: beta_orig[j] = beta_std[j] / xstd[j] ─────────────────
    for (arma::uword j = 0; j < p; ++j)
        bfull.row(j) /= xstd(j);

    // ── Return fitted contribution from x1 columns only ──────────────────────
    // This is the portion of Y (mean-removed) linearly explained by the target
    // predictors after covariate adjustment.  Constant shifts cancel in both
    // the between-group scatter (categorical) and slope regression (numeric)
    // that follow, so no ymu adjustment is needed here.
    return X_raw.cols(x1_cols) * bfull.rows(x1_cols);
}

}  // namespace qe
#endif  // LIBQE_LASSO_HPP

/** @file linalg_fallback.hpp
 *  @brief Thin LAPACK-free implementations of SVD, QR, and symmetric
 *         eigendecomposition for use in WASM / no-LAPACK builds.
 *
 *  When @c ARMA_USE_LAPACK is defined these wrappers delegate directly to the
 *  standard Armadillo routines (@c arma::svd, @c arma::qr, @c arma::eig_sym),
 *  so there is zero overhead in normal builds.  When LAPACK is not available
 *  (e.g. Emscripten WASM builds) the wrappers fall back to pure C++
 *  implementations that work for the small matrices typical in ENA
 *  (p ≤ ~100 connection dimensions).
 *
 *  All routines live in namespace @c qe::linalg and operate on @c arma::mat
 *  (double precision).
 *
 *  Fallback algorithms:
 *  - **SVD**      — one-sided Jacobi iterations on \f$A^T\f$ (Demmel & Veselić 1992)
 *  - **QR**       — modified Gram-Schmidt
 *  - **eig_sym**  — cyclic Jacobi (classical Jacobi sweep)
 */

#ifndef LIBQE_LINALG_FALLBACK_HPP
#define LIBQE_LINALG_FALLBACK_HPP

#include <armadillo>
#include <cmath>
#include <limits>

namespace qe {
namespace linalg {

/// @cond INTERNAL
// ── helpers ───────────────────────────────────────────────────────────────────

namespace detail {

/// @brief Modified Gram-Schmidt orthonormalization applied in-place.
///
/// Columns of @p Q are successively orthonormalized against all previously
/// processed columns.  A column whose norm falls below 1e-14 after projection
/// is left unchanged (not zeroed) to avoid introducing NaNs.
///
/// @param Q  Matrix whose columns are orthonormalized in-place (modified).
inline void mgs(arma::mat& Q) {
    const arma::uword p = Q.n_cols;
    for (arma::uword j = 0; j < p; ++j) {
        for (arma::uword i = 0; i < j; ++i)
            Q.col(j) -= arma::dot(Q.col(i), Q.col(j)) * Q.col(i);
        double n = arma::norm(Q.col(j));
        if (n > 1e-14) Q.col(j) /= n;
    }
}

/// @brief One cyclic Jacobi sweep on a p×p symmetric matrix.
///
/// Iterates over all super-diagonal index pairs @c (q,r) and applies a
/// two-sided Jacobi rotation that annihilates @c A(q,r).  The same rotation
/// is accumulated in @p V so that, starting with @c V = I, the full sequence
/// of sweeps yields @c V = eigenvector matrix.
///
/// @param A  Symmetric p×p matrix, updated in-place toward diagonal form.
/// @param V  Rotation accumulator (p×p); initialize to identity to recover
///           eigenvectors.
///
/// @note Algorithm follows the classical cyclic-by-rows Jacobi scheme.
///       Off-diagonal entries no larger than 1e-14 × (|a_qq| + |a_rr|) are
///       skipped for numerical stability.  The `<=` (not `<`) guard correctly
///       handles the all-zero case (a_qq = a_rr = a_qr = 0) that would
///       otherwise produce NaN via 0/0 in the theta computation.
inline void jacobi_sweep(arma::mat& A, arma::mat& V) {
    const arma::uword p = A.n_rows;
    for (arma::uword q = 0; q < p - 1; ++q) {
        for (arma::uword r = q + 1; r < p; ++r) {
            double aqq = A(q, q), arr = A(r, r), aqr = A(q, r);
            if (std::abs(aqr) <= 1e-14 * (std::abs(aqq) + std::abs(arr)))
                continue;
            double theta = (arr - aqq) / (2.0 * aqr);
            double t = (theta >= 0.0)
                ? 1.0 / (theta + std::sqrt(1.0 + theta * theta))
                : 1.0 / (theta - std::sqrt(1.0 + theta * theta));
            double c = 1.0 / std::sqrt(1.0 + t * t); ///< cosine of Jacobi rotation angle
            double s = t * c;                          ///< sine of Jacobi rotation angle
            // Apply Jacobi rotation to A (symmetric update)
            double new_qq = aqq - t * aqr;
            double new_rr = arr + t * aqr;
            A(q, q) = new_qq;  A(r, r) = new_rr;  A(q, r) = A(r, q) = 0.0;
            for (arma::uword k = 0; k < p; ++k) {
                if (k == q || k == r) continue;
                double akq = A(k, q), akr = A(k, r);
                A(k, q) = A(q, k) = c * akq - s * akr;
                A(k, r) = A(r, k) = s * akq + c * akr;
            }
            // Accumulate rotation in V
            for (arma::uword k = 0; k < p; ++k) {
                double vkq = V(k, q), vkr = V(k, r);
                V(k, q) = c * vkq - s * vkr;
                V(k, r) = s * vkq + c * vkr;
            }
        }
    }
}

} // namespace detail

/// @endcond

/// @name Eigendecomposition
/// @{

// ── eig_sym ───────────────────────────────────────────────────────────────────

/// @brief Eigendecomposition of a real symmetric matrix.
///
/// Computes eigenvalues and eigenvectors of the symmetric p×p matrix @p S and
/// returns them sorted in **ascending** eigenvalue order.
///
/// @param[out] eigval  p×1 vector of eigenvalues, ascending.
/// @param[out] eigvec  p×p matrix whose columns are the corresponding
///                     orthonormal eigenvectors.
/// @param[in]  S       Real symmetric p×p input matrix (not modified).
///
/// @warning With @c ARMA_USE_LAPACK defined this calls @c arma::eig_sym
///          directly.  Without LAPACK the fallback runs up to @c 50*p cyclic
///          Jacobi sweeps; convergence is guaranteed for well-conditioned
///          matrices but may be slow for p > 200.
///
/// @note Fallback algorithm: classical cyclic-by-rows Jacobi iteration.
///       Convergence is declared when the Frobenius norm of the strictly
///       off-diagonal part drops below 1e-13.
inline void eig_sym(arma::vec& eigval, arma::mat& eigvec, const arma::mat& S) {
#if defined(ARMA_USE_LAPACK)
    arma::eig_sym(eigval, eigvec, S);
#else
    const arma::uword p = S.n_rows;
    arma::mat A = S;                              ///< working copy of S
    eigvec.eye(p, p);                             ///< accumulate rotations; starts as identity

    for (int sweep = 0; sweep < 50 * static_cast<int>(p); ++sweep) {
        // Check off-diagonal convergence
        double off = 0.0;
        for (arma::uword q = 0; q < p - 1; ++q)
            for (arma::uword r = q + 1; r < p; ++r)
                off += A(q, r) * A(q, r);
        if (std::sqrt(off) < 1e-13) break;
        detail::jacobi_sweep(A, eigvec);
    }
    eigval = A.diag();

    // Sort ascending
    arma::uvec idx = arma::sort_index(eigval);
    eigval  = eigval(idx);
    eigvec  = eigvec.cols(idx);
#endif
}

/// @}

/// @name QR Factorization
/// @{

// ── qr_full ───────────────────────────────────────────────────────────────────

/// @brief Full (square) QR factorization of an n×p matrix.
///
/// Produces a square n×n orthogonal factor @p Q and an n×p upper-trapezoidal
/// factor @p R such that @c A = Q*R.  This matches the convention of
/// @c arma::qr(), which returns the full (non-economy) @p Q.
///
/// @param[out] Q  n×n orthogonal matrix.
/// @param[out] R  n×p upper-trapezoidal matrix.
/// @param[in]  A  n×p input matrix (not modified).
///
/// @warning With @c ARMA_USE_LAPACK defined this calls @c arma::qr directly.
///          Without LAPACK the fallback constructs the full ONB by seeding
///          columns beyond @c A.n_cols with standard basis vectors and then
///          applying modified Gram-Schmidt; numerical quality degrades for
///          nearly rank-deficient inputs.
///
/// @note The caller (@c orthogonal_svd) depends on @p Q being n×n so that
///       @c Q.cols(k, n-1) spans the null space of @c A^T.
inline void qr_full(arma::mat& Q, arma::mat& R, const arma::mat& A) {
#if defined(ARMA_USE_LAPACK)
    arma::qr(Q, R, A);
#else
    // Build a full n×n orthogonal matrix.
    // Step 1: thin QR via modified Gram-Schmidt (gives first k=A.n_cols columns).
    // Step 2: extend to a full ONB using random vectors + re-orthogonalisation.
    const arma::uword n = A.n_rows, p = A.n_cols;

    // Start with all n columns of A padded with random vectors if n > p.
    arma::mat Qfull(n, n, arma::fill::zeros);
    // Copy A's columns first
    for (arma::uword j = 0; j < p && j < n; ++j)
        Qfull.col(j) = A.col(j);
    // Fill remaining columns with standard basis vectors (or random fallback)
    for (arma::uword j = p; j < n; ++j) {
        Qfull(j, j) = 1.0;   ///< standard basis vector e_j used as seed
    }
    // Full MGS orthonormalization
    detail::mgs(Qfull);
    Q = Qfull;

    // R is n×p (upper trapezoidal); only first p rows are non-zero
    R = Q.t() * A;
    // Zero the strictly lower-triangular part of R[0:p, 0:p]
    for (arma::uword j = 0; j < p; ++j)
        for (arma::uword i = j + 1; i < n; ++i)
            R(i, j) = 0.0;
#endif
}

/// @}

/// @name Singular Value Decomposition
/// @{

// ── svd ───────────────────────────────────────────────────────────────────────

/// @brief Full SVD: @c A ≈ U * diag(s) * V', singular values descending.
///
/// Decomposes the n×p matrix @p A into left singular vectors @p U, singular
/// values @p s, and right singular vectors @p V.
///
/// @param[out] U  n×min(n,p) matrix of left singular vectors.
/// @param[out] s  min(n,p)×1 vector of singular values, descending.
/// @param[out] V  p×p matrix of right singular vectors (all p columns).
/// @param[in]  A  n×p input matrix (not modified).
///
/// @warning With @c ARMA_USE_LAPACK defined this calls @c arma::svd directly.
///          Without LAPACK the fallback computes @c A^T*A, calls @c eig_sym,
///          and derives @p U via @c U[:,j] = A*V[:,j]/s_j.  Columns with
///          @c s_j < 1e-14 are zeroed.  Accuracy is governed by the
///          conditioning of @c A^T*A (squares the condition number of @p A).
///
/// @note Fallback algorithm: eigendecomposition of \f$A^T A\f$ via cyclic
///       Jacobi (Demmel & Veselić 1992).  Suitable for the small, moderately
///       conditioned matrices typical in ENA (p ≤ ~100).
inline void svd(arma::mat& U, arma::vec& s, arma::mat& V, const arma::mat& A) {
#if defined(ARMA_USE_LAPACK)
    arma::svd(U, s, V, A);
#else
    // One-sided Jacobi on A^T:
    // We work with B = A^T (p×n). The right singular vectors of A are the
    // left singular vectors of B (= columns of U_B after Jacobi on B^T B).
    // We use the eigendecomposition of A^T * A to get V and s.
    const arma::uword n = A.n_rows, p = A.n_cols;
    const arma::uword k = std::min(n, p);

    arma::mat AtA = A.t() * A;   ///< p×p symmetric Gram matrix
    arma::vec eigval;
    arma::mat eigvec;
    eig_sym(eigval, eigvec, AtA);

    // Eigenvalues from eig_sym are ascending; we want descending.
    arma::uvec idx = arma::sort_index(eigval, "descend");
    eigval = eigval(idx);
    eigvec = eigvec.cols(idx);

    // Singular values (non-negative square roots)
    s.set_size(k);
    for (arma::uword j = 0; j < k; ++j)
        s(j) = std::sqrt(std::max(0.0, eigval(j)));

    // V = eigvec (right singular vectors of A are eigvectors of A^T*A)
    V = eigvec;

    // U = A * V / s (left singular vectors, only first k columns needed)
    U.set_size(n, k);
    for (arma::uword j = 0; j < k; ++j) {
        if (s(j) > 1e-14)
            U.col(j) = A * V.col(j) / s(j);
        else
            U.col(j).zeros();
    }
#endif
}

// ── svd_right ─────────────────────────────────────────────────────────────────

/// @brief Singular values and right singular vectors only (left vectors skipped).
///
/// A lighter-weight variant of @c svd() for callers that only need @p s and
/// @p V.  Under LAPACK the full @c arma::svd is still called internally; the
/// savings come in WASM builds where computing @p U is avoided.
///
/// @param[out] s  min(n,p)×1 vector of singular values, descending.
/// @param[out] V  p×p matrix of right singular vectors.
/// @param[in]  A  n×p input matrix (not modified).
///
/// @warning With @c ARMA_USE_LAPACK defined this still invokes the full
///          @c arma::svd; the left-vector output is discarded.
inline void svd_right(arma::vec& s, arma::mat& V, const arma::mat& A) {
#if defined(ARMA_USE_LAPACK)
    arma::mat U;
    arma::svd(U, s, V, A);
#else
    arma::mat U_unused;
    svd(U_unused, s, V, A);
#endif
}

/// @}

/// @name Linear System Solve
/// @{

// ── solve_spd ─────────────────────────────────────────────────────────────────

/// @brief Solve @c A*x = b for symmetric positive (semi-)definite @p A.
///
/// Returns the n×k solution matrix @p x given the n×n system matrix @p A and
/// the n×k right-hand side @p b.  Near-zero eigenvalues are handled gracefully,
/// making this a pseudo-inverse solve safe for rank-deficient Gram matrices.
///
/// @param[in] A    Real symmetric positive (semi-)definite n×n matrix.
/// @param[in] b    n×k right-hand side matrix.
/// @param[in] tol  Relative threshold below which eigenvalues are treated as
///                 zero.  Eigenvalues with @c |λ| < tol * max(|λ|) are
///                 inverted as zero (i.e. those directions are projected out).
///                 Default: 1e-12.
/// @returns  n×k solution matrix @p x satisfying @c A*x ≈ b.
///
/// @warning With @c ARMA_USE_LAPACK defined this delegates to
///          @c arma::solve(A, b, arma::solve_opts::fast), which uses a
///          Cholesky factorization and does **not** handle rank deficiency.
///          Without LAPACK the eigendecomposition-based pseudo-inverse is used,
///          which is safe for rank-deficient inputs but slower.
///
/// @note Without LAPACK: @c A = V * diag(λ) * V^T is computed via @c eig_sym;
///       the solution is @c x = V * diag(1/λ) * V^T * b with small λ skipped.
inline arma::mat solve_spd(const arma::mat& A, const arma::mat& b,
                            double tol = 1e-12) {
#if defined(ARMA_USE_LAPACK)
    return arma::solve(A, b, arma::solve_opts::fast);
#else
    arma::vec  eigval;
    arma::mat  eigvec;
    eig_sym(eigval, eigvec, A);                     ///< A = eigvec * diag(eigval) * eigvec^T

    const double thresh = tol * std::abs(eigval.max());
    arma::vec inv_eigval(eigval.n_elem, arma::fill::zeros);
    for (arma::uword i = 0; i < eigval.n_elem; ++i)
        if (std::abs(eigval(i)) > thresh)
            inv_eigval(i) = 1.0 / eigval(i);

    // x = eigvec * diag(inv_eigval) * eigvec^T * b
    return eigvec * arma::diagmat(inv_eigval) * eigvec.t() * b;
#endif
}

/// @}

} // namespace linalg
} // namespace qe

#endif // LIBQE_LINALG_FALLBACK_HPP

/**
 * @file modeling.hpp
 * @brief ENA modeling utilities: centering, correlation, confidence intervals,
 *        and node-position solvers for undirected and directed ENA networks.
 */
#ifndef LIBQE_MODELING_HPP
#define LIBQE_MODELING_HPP

#include <armadillo>
#include <algorithm>
#include <cmath>
#include <limits>
#include <stdexcept>
#include <vector>
#include <libqe/linalg_fallback.hpp>

namespace qe {

// ---------------------------------------------------------------------------
/// @name Return types
/// @{
// ---------------------------------------------------------------------------

/**
 * @brief Aggregated result returned by every node-position solver.
 *
 * All matrices share the same dimensional conventions:
 * rows index units (or nodes), columns index rotated dimensions.
 */
struct NodePositions {
    arma::mat nodes;      ///< Solved node locations, n_codes × n_dims.
    arma::mat centroids;  ///< Per-unit centroids projected onto the node space, n_units × n_dims.
    arma::mat weights;    ///< Normalised half-edge weight per unit per node, n_units × n_codes.
    arma::mat points;     ///< Input rotated points echoed back unchanged, n_units × n_dims.
};

/// @}

// ---------------------------------------------------------------------------
/// @name Centering
/// @{
// ---------------------------------------------------------------------------

/**
 * @brief Subtract column means so that every dimension is centred at the origin.
 *
 * @param[in] values  Matrix of ENA unit points (n_units × n_dims).
 * @returns A copy of @p values with each column mean subtracted (centre-to-origin).
 *
 * @note Equivalent to `center_data_c()` in rENA/ena.cpp.
 */
inline arma::mat center_points(arma::mat values) {
    return values.each_row() - arma::mean(values);
}

/// @}

/// @cond INTERNAL

// ---------------------------------------------------------------------------
// Normal quantile (probit) — pure C++, no R dependency
// Rational approximation by Peter Acklam; max abs error < 1.15e-9.
// Used internally by ena_correlation() so we don't need Rcpp::qnorm.
// ---------------------------------------------------------------------------

inline double normal_quantile(double p) {
    static const double a[] = {
        -3.969683028665376e+01,  2.209460984245205e+02,
        -2.759285104469687e+02,  1.383577518672690e+02,
        -3.066479806614716e+01,  2.506628277459239e+00
    };
    static const double b[] = {
        -5.447609879822406e+01,  1.615858368580409e+02,
        -1.556989798598866e+02,  6.680131188771972e+01,
        -1.328068155288572e+01
    };
    static const double c[] = {
        -7.784894002430293e-03, -3.223964580411365e-01,
        -2.400758277161838e+00, -2.549732539343734e+00,
         4.374664141464968e+00,  2.938163982698783e+00
    };
    static const double d[] = {
         7.784695709041462e-03,  3.224671290700398e-01,
         2.445134137142996e+00,  3.754408661907416e+00
    };
    const double p_low  = 0.02425;
    const double p_high = 1.0 - p_low;
    double q, r;
    if (p < p_low) {
        q = std::sqrt(-2.0 * std::log(p));
        return (((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
               ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1.0);
    } else if (p <= p_high) {
        q = p - 0.5; r = q * q;
        return (((((a[0]*r+a[1])*r+a[2])*r+a[3])*r+a[4])*r+a[5])*q /
               (((((b[0]*r+b[1])*r+b[2])*r+b[3])*r+b[4])*r+1.0);
    } else {
        q = std::sqrt(-2.0 * std::log(1.0 - p));
        return -(((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
                ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1.0);
    }
}

// ---------------------------------------------------------------------------
// Internal helpers: regularized incomplete beta + t-distribution quantile
// ---------------------------------------------------------------------------
//
// These live in qe::detail so they don't pollute the public qe namespace.
// They are used by mean_ci() and are not part of the public API.

namespace detail {

// Lentz continued-fraction evaluation for the regularized incomplete beta.
// Ported from Numerical Recipes in C, §6.4.
inline double betacf(double a, double b, double x) {
    const int    MAXIT = 200;
    const double EPS   = std::numeric_limits<double>::epsilon();
    const double FPMIN = std::numeric_limits<double>::min() / EPS;
    double qab = a + b, qap = a + 1.0, qam = a - 1.0;
    double c = 1.0;
    double d = 1.0 - qab * x / qap;
    if (std::abs(d) < FPMIN) d = FPMIN;
    d = 1.0 / d;
    double h = d;
    for (int m = 1; m <= MAXIT; ++m) {
        const int m2 = 2 * m;
        // even step
        double aa = static_cast<double>(m) * (b - static_cast<double>(m)) * x
                    / ((qam + m2) * (a + m2));
        d = 1.0 + aa * d; if (std::abs(d) < FPMIN) d = FPMIN;
        c = 1.0 + aa / c; if (std::abs(c) < FPMIN) c = FPMIN;
        d = 1.0 / d; h *= d * c;
        // odd step
        aa = -(a + static_cast<double>(m)) * (qab + static_cast<double>(m)) * x
             / ((a + m2) * (qap + m2));
        d = 1.0 + aa * d; if (std::abs(d) < FPMIN) d = FPMIN;
        c = 1.0 + aa / c; if (std::abs(c) < FPMIN) c = FPMIN;
        d = 1.0 / d;
        const double del = d * c;
        h *= del;
        if (std::abs(del - 1.0) <= EPS) break;
    }
    return h;
}

// Regularized incomplete beta I_x(a, b).
inline double betai(double a, double b, double x) {
    if (x <= 0.0) return 0.0;
    if (x >= 1.0) return 1.0;
    const double lbab = std::lgamma(a + b) - std::lgamma(a) - std::lgamma(b);
    const double bt   = std::exp(lbab + a * std::log(x) + b * std::log(1.0 - x));
    if (x < (a + 1.0) / (a + b + 2.0))
        return bt * betacf(a, b, x) / a;
    return 1.0 - bt * betacf(b, a, 1.0 - x) / b;
}

// Quantile function (inverse CDF) for the t-distribution with `df` degrees
// of freedom.  Uses the relation:
//
//   P(T ≤ t | df)  =  1 - I_x(df/2, 1/2) / 2,   x = df / (df + t^2)
//
// Inverted via Newton–Raphson on I_x with bisection fallback; normal-quantile
// plus one Cornish–Fisher correction term provides the starting guess.
inline double t_quantile(double p, double df) {
    const double INF = std::numeric_limits<double>::infinity();
    if (p <= 0.0) return -INF;
    if (p >= 1.0) return  INF;
    if (p == 0.5) return  0.0;
    if (df <= 0.0) return INF;   // df = 0 → t-distribution undefined → treat as ∞

    // Work with pp > 0.5 for numerical stability; negate at the end if needed.
    const bool   flip = (p < 0.5);
    const double pp   = flip ? 1.0 - p : p;
    const double a    = 0.5 * df;
    const double bv   = 0.5;

    // We need x in (0,1) such that I_x(a, bv) = 2*(1-pp).
    const double target = 2.0 * (1.0 - pp);

    // Initial guess via normal quantile + single Cornish–Fisher correction.
    const double z  = normal_quantile(pp);
    const double t0 = z + (z * z * z + z) / (4.0 * df);
    double x = df / (df + t0 * t0);
    x = std::max(1e-12, std::min(1.0 - 1e-12, x));

    // Newton–Raphson with a bisection bracket.
    // f(x) = I_x(a,bv) - target;  f'(x) = x^(a-1)*(1-x)^(bv-1) / B(a,bv)
    const double lbab = std::lgamma(a + bv) - std::lgamma(a) - std::lgamma(bv);
    double xlo = 0.0, xhi = 1.0;
    for (int iter = 0; iter < 100; ++iter) {
        const double fx = betai(a, bv, x) - target;
        // betai is increasing in x, so fx < 0 → x too small → raise lower bound
        if (fx < 0.0) xlo = x; else xhi = x;

        // Derivative: may underflow near boundaries — fall back to bisection.
        const double log_fpx = lbab + (a - 1.0) * std::log(x)
                                     + (bv - 1.0) * std::log(1.0 - x);
        const double fpx = (log_fpx > -700.0) ? std::exp(log_fpx) : 0.0;

        double x_new;
        if (fpx > 0.0) {
            x_new = x - fx / fpx;
            if (x_new <= xlo || x_new >= xhi)
                x_new = 0.5 * (xlo + xhi);
        } else {
            x_new = 0.5 * (xlo + xhi);
        }

        if (std::abs(x_new - x) < 1e-13 * x) break;
        x = x_new;
    }

    const double t_val = std::sqrt(df * (1.0 - x) / x);
    return flip ? -t_val : t_val;
}

// ---------------------------------------------------------------------------
// IQR helper — matches R's quantile(type = 7) (Hyndman–Fan #7), which is
// identical to numpy's np.percentile(method="linear").
//
// For a sorted n-element vector and probability p:
//   h    = (n - 1) * p
//   lo   = floor(h),  frac = h - lo
//   Q(p) = sorted[lo] * (1 - frac) + sorted[lo+1] * frac
// ---------------------------------------------------------------------------

inline double quantile_type7(const arma::vec& sorted, double p) {
    const int n = static_cast<int>(sorted.n_elem);
    if (n == 0) return std::numeric_limits<double>::quiet_NaN();
    if (n == 1) return sorted[0];
    const double h    = (n - 1) * p;
    const int    lo   = static_cast<int>(std::floor(h));
    const double frac = h - lo;
    if (lo + 1 >= n) return sorted[n - 1];          // guard: p == 1.0
    return sorted[lo] * (1.0 - frac) + sorted[lo + 1] * frac;
}

// Interquartile range for a column vector — uses the same type-7 quantile as R.
inline double iqr(const arma::vec& col) {
    const arma::vec s = arma::sort(col);
    return quantile_type7(s, 0.75) - quantile_type7(s, 0.25);
}

} // namespace detail

/// @endcond

// ---------------------------------------------------------------------------
/// @name Correlation
/// @{
// ---------------------------------------------------------------------------

/**
 * @brief Pearson correlation with Fisher-z confidence interval between ENA
 *        unit points and their centroids.
 *
 * All unique pairs of units are formed; for each pair the per-dimension
 * difference vectors are computed, then Pearson r is obtained between the
 * @p points differences and the @p centroids differences.  The confidence
 * interval is derived via Fisher's z-transform using the pure-C++
 * `normal_quantile` function so the function works outside R without
 * `Rcpp::qnorm`.
 *
 * @param[in] points      Rotated ENA unit points, n_units × n_dims.
 * @param[in] centroids   Corresponding centroid coordinates, n_units × n_dims.
 * @param[in] conf_level  Confidence level for the interval (default 0.95).
 * @returns An n_dims × 3 matrix whose columns are [r, ci_lower, ci_upper].
 *
 * @note Equivalent to `ena_correlation()` in rENA/ena.cpp.
 */
inline arma::mat ena_correlation(arma::mat points, arma::mat centroids,
                                  double conf_level = 0.95) {
    int n = points.n_rows;
    int n_pairs = (n * (n - 1)) / 2;

    arma::umat idx1(1, n_pairs), idx2(1, n_pairs);
    int col = 0;
    for (int i = 0; i < n; i++)
        for (int j = i + 1; j < n; j++) { idx1[col] = i; idx2[col] = j; col++; }

    arma::mat pts_diff = points.rows(idx1.row(0))    - points.rows(idx2.row(0));
    arma::mat cts_diff = centroids.rows(idx1.row(0)) - centroids.rows(idx2.row(0));
    arma::mat cr       = arma::cor(pts_diff, cts_diff);

    double qq = normal_quantile((1.0 + conf_level) / 2.0);
    arma::mat out(points.n_cols, 3);
    for (arma::uword i = 0; i < points.n_cols; i++) {
        double r     = cr(i, i);
        double z     = std::atanh(r);
        double sigma = 1.0 / std::sqrt(static_cast<double>(n_pairs) - 3.0);
        out(i, 0) = r;
        out(i, 1) = std::tanh(z - sigma * qq);
        out(i, 2) = std::tanh(z + sigma * qq);
    }
    return out;
}

/// @}

// ---------------------------------------------------------------------------
/// @name Group confidence intervals
/// @{
// ---------------------------------------------------------------------------

/**
 * @brief Student-t confidence interval for the mean of a group of ENA unit points.
 *
 * For each dimension d:
 * @code
 *   mean ± t_{α/2, n-1} × (sample_sd / sqrt(n))
 * @endcode
 * where α = 1 − @p conf_level and degrees-of-freedom = n − 1.
 *
 * @param[in] points      ENA unit points for a single group, n_units × n_dims.
 * @param[in] conf_level  Confidence level for the interval (default 0.95).
 * @returns An n_dims × 3 matrix whose columns are [mean, ci_lower, ci_upper].
 *          When n == 0 all entries are NaN.
 *          When n == 1 the CI bounds are ±Inf.
 */
inline arma::mat mean_ci(const arma::mat& points, double conf_level = 0.95) {
    const int n      = static_cast<int>(points.n_rows);
    const int n_dims = static_cast<int>(points.n_cols);

    arma::mat out(n_dims, 3);
    out.fill(std::numeric_limits<double>::quiet_NaN());

    if (n == 0) return out;

    const double df     = static_cast<double>(n - 1);
    const double t_crit = detail::t_quantile(1.0 - (1.0 - conf_level) / 2.0, df);
    const double INF    = std::numeric_limits<double>::infinity();

    for (int d = 0; d < n_dims; ++d) {
        const arma::vec col = points.col(d);
        const double mu = arma::mean(col);
        out(d, 0) = mu;
        if (n == 1) {
            // t_crit = Inf and se = 0 would give NaN; explicitly set ±Inf
            out(d, 1) = -INF;
            out(d, 2) =  INF;
        } else {
            const double se = arma::stddev(col) / std::sqrt(static_cast<double>(n));
            out(d, 1) = mu - t_crit * se;
            out(d, 2) = mu + t_crit * se;
        }
    }
    return out;
}

/**
 * @brief Tukey-fence outlier interval for a group of ENA unit points.
 *
 * For each dimension d the half-width is computed as:
 * @code
 *   half_width[d] = iqr_factor * IQR(points[:, d])
 * @endcode
 * and the interval [−half_width, +half_width] is symmetric around zero,
 * matching rENA's formula:
 * @code
 *   outlier.interval.values = matrix(...) * c(-1, 1)
 * @endcode
 * where the matrix is built from `c(IQR(dim1), IQR(dim2), ...) * iqr_factor`.
 *
 * IQR uses R's default type-7 / Hyndman-Fan #7 quantile, identical to
 * `numpy.percentile(method="linear")`.
 *
 * @param[in] points      ENA unit points for a single group, n_units × n_dims.
 * @param[in] iqr_factor  Multiplier applied to the IQR (default 1.5, the
 *                        standard Tukey fence threshold).
 * @returns An n_dims × 2 matrix whose columns are [lower, upper].
 *          When n == 0 all entries are NaN.
 *          When n == 1, IQR == 0, so lower == upper == 0.
 */
inline arma::mat outlier_ci(const arma::mat& points, double iqr_factor = 1.5) {
    const int n_dims = static_cast<int>(points.n_cols);
    arma::mat out(n_dims, 2, arma::fill::zeros);

    if (points.n_rows == 0) {
        out.fill(std::numeric_limits<double>::quiet_NaN());
        return out;
    }

    for (int d = 0; d < n_dims; ++d) {
        const double half_width = iqr_factor * detail::iqr(points.col(d));
        out(d, 0) = -half_width;
        out(d, 1) =  half_width;
    }
    return out;
}

/// @}

// ---------------------------------------------------------------------------
/// @name Node-position solvers
/// @{
// ---------------------------------------------------------------------------

/**
 * @brief Multiobjective least-squares node positions for undirected ENA.
 *
 * Half of each adjacency-vector entry (line weight) is distributed to each of
 * its two endpoint nodes, building a per-unit weight matrix.  Each row is then
 * L1-normalised.  An overdetermined system is solved per dimension:
 * @code
 *   (W^T W) X = W^T T
 * @endcode
 * where W is the normalised weight matrix and T contains the rotated unit
 * points.
 *
 * @param[in] adj_mats  Upper-triangular adjacency vectors stacked row-wise,
 *                      n_units × tri_size.
 * @param[in] t         Rotated unit points, n_units × n_dims.
 * @param[in] num_dims  Number of dimensions to solve for.
 * @returns A @ref NodePositions struct containing `nodes`, `centroids`,
 *          `weights`, and `points`.
 *
 * @note Equivalent to `lws_lsq_positions()` in rENA/ena.cpp.
 */
inline NodePositions node_positions(arma::mat adj_mats, arma::mat t,
                                        int num_dims) {
    if (!adj_mats.is_finite() || !t.is_finite())
        throw std::invalid_argument("node_positions: input matrices must not contain NaN or Inf");
    int tri_size  = adj_mats.n_cols;
    int num_nodes = static_cast<int>(
        std::pow(std::ceil(std::sqrt(static_cast<double>(2 * tri_size))), 2.0)
    ) - (2 * tri_size);
    int row_count = adj_mats.n_rows;

    arma::mat weights(row_count, num_nodes, arma::fill::zeros);
    for (int k = 0; k < row_count; k++) {
        arma::rowvec curr = adj_mats.row(k);
        int z = 0;
        for (int x = 0; x < num_nodes - 1; x++) {
            for (int y = 0; y <= x; y++) {
                weights(k, x + 1) += 0.5 * curr[z];
                weights(k, y)     += 0.5 * curr[z];
                z++;
            }
        }
    }
    for (int k = 0; k < row_count; k++) {
        double len = arma::accu(arma::abs(weights.row(k)));
        if (len < 0.0001) len = 0.0001;
        weights.row(k) /= len;
    }

    arma::mat ssX(num_dims, num_nodes, arma::fill::zeros);
    arma::mat ssA = weights.t() * weights;
    for (int i = 0; i < num_dims; i++) {
        arma::mat ssb = weights.t() * t.col(i);
        ssX.row(i) = qe::linalg::solve_spd(ssA, ssb).t();
    }

    NodePositions r;
    r.nodes     = ssX.t();
    r.centroids = (ssX * weights.t()).t();
    r.weights   = weights;
    r.points    = t;
    return r;
}

/**
 * @brief Least-squares node positions for directed (ordered) ENA.
 *
 * Each row of @p line_weights is an n_nodes × n_nodes directed weight matrix
 * stored in row-major order.  The per-unit node weight for node x accumulates
 * the full row weight plus all column weights directed at x from other nodes.
 * Each row is L1-normalised before solving.
 *
 * When @p combine_pairs is `false` (the standard directed case) the system:
 * @code
 *   (W^T W) X = W^T P
 * @endcode
 * is solved directly with W = normalised weight matrix and P = @p points.
 *
 * When @p combine_pairs is `true`, adjacent row pairs (ground row k and
 * response row k+1) are summed before solving:
 * @code
 *   W_combined[k/2] = W[k] + W[k+1]
 *   P_combined[k/2] = P[k] + P[k+1]
 * @endcode
 * The system is then solved on the combined matrices, but centroids are
 * projected back using the original (un-combined) weight matrix so that
 * every unit retains its own centroid.
 *
 * @param[in] line_weights  Directed adjacency vectors, n_units × (n_nodes²).
 * @param[in] points        Rotated unit points, n_units × n_dims.
 * @param[in] num_dims      Number of dimensions to solve for.
 * @param[in] combine_pairs If `true`, sum adjacent row pairs before solving
 *                          (ground + response model).  Default `false`.
 * @returns A @ref NodePositions struct containing `nodes`, `centroids`,
 *          `weights`, and `points`.
 *
 * @note Equivalent to `directed_node_positions()` in rENA/ena.cpp when
 *       @p combine_pairs is `false`.
 * @note Equivalent to `directed_node_positions_with_ground_response_added()`
 *       in rENA/ena.cpp when @p combine_pairs is `true`.
 *
 * @warning When @p combine_pairs is `true`, @p line_weights must have an even
 *          number of rows (row_count must be even); odd row counts result in
 *          an out-of-bounds access when forming the combined matrices.
 */
inline NodePositions directed_node_positions(arma::mat line_weights,
                                              arma::mat points, int num_dims,
                                              bool combine_pairs = false) {
    if (!line_weights.is_finite() || !points.is_finite())
        throw std::invalid_argument("directed_node_positions: input matrices must not contain NaN or Inf");
    int num_nodes = static_cast<int>(
        std::ceil(std::sqrt(static_cast<double>(line_weights.n_cols)))
    );
    int row_count = line_weights.n_rows;

    arma::mat nw(row_count, num_nodes, arma::fill::zeros);
    for (int k = 0; k < row_count; k++) {
        arma::mat curr = line_weights.row(k);
        int z = 0;
        for (int x = 0; x < num_nodes; x++)
            for (int y = 0; y < num_nodes; y++) {
                nw(k, x) += curr[z];
                if (x != y) nw(k, y) += curr[z];
                z++;
            }
    }
    for (int k = 0; k < row_count; k++) {
        double len = arma::accu(arma::abs(nw.row(k)));
        if (len < 0.0001) len = 0.0001;
        nw.row(k) /= len;
    }

    if (combine_pairs) {
        // Combine paired rows (ground at k, response at k+1)
        arma::mat nw_added(row_count / 2, num_nodes, arma::fill::zeros);
        arma::mat pts_added(row_count / 2, num_dims, arma::fill::zeros);
        for (int k = 0; k < row_count; k += 2) {
            nw_added.row(k / 2)  = nw.row(k)     + nw.row(k + 1);
            pts_added.row(k / 2) = points.row(k) + points.row(k + 1);
        }

        arma::mat ssX(num_dims, num_nodes, arma::fill::zeros);
        arma::mat ssA = nw_added.t() * nw_added;
        for (int i = 0; i < num_dims; i++) {
            arma::mat ssb = nw_added.t() * pts_added.col(i);
            ssX.row(i) = qe::linalg::solve_spd(ssA, ssb).t();
        }

        NodePositions r;
        r.nodes     = ssX.t();
        r.centroids = (ssX * nw.t()).t();
        r.weights   = nw;
        r.points    = points;
        return r;
    }

    arma::mat ssX(num_dims, num_nodes, arma::fill::zeros);
    arma::mat ssA = nw.t() * nw;
    for (int i = 0; i < num_dims; i++) {
        arma::mat ssb = nw.t() * points.col(i);
        ssX.row(i) = qe::linalg::solve_spd(ssA, ssb).t();
    }

    NodePositions r;
    r.nodes     = ssX.t();
    r.centroids = (ssX * nw.t()).t();
    r.weights   = nw;
    r.points    = points;
    return r;
}

/// @}

// ---------------------------------------------------------------------------
/// @name Two-group comparison statistics
/// @{
// ---------------------------------------------------------------------------

/**
 * @brief Per-dimension statistics from a two-group comparison.
 *
 * Matches the output of rENA-api's `group.stats()`:
 *
 * - **Parametric** (Welch two-sample t-test): `t`, `df`, `pvalue_t`,
 *   `cohens_d` (pooled-SD formula), `means`, `sds`.
 * - **Non-parametric** (Wilcoxon rank-sum / Mann-Whitney U):
 *   `U` (= R's W statistic for group 1), `pvalue_u` (normal approximation
 *   with tie correction and continuity correction), `effect_r`
 *   (rank-biserial: `1 − 2*U / (n1*n2)`), `medians`.
 *
 * All per-dimension vectors have length `n_dims`; `means`, `sds`, and
 * `medians` are `2 × n_dims` matrices with row 0 = group 1, row 1 = group 2.
 * Entries are NaN when a statistic is undefined (e.g. n < 2 for parametric
 * tests, or n = 0 for either group).
 */
struct GroupStats {
    int n1;       ///< Sample size of group 1.
    int n2;       ///< Sample size of group 2.

    // Parametric
    arma::vec t;          ///< Welch two-sample t-statistics.
    arma::vec df;         ///< Welch–Satterthwaite degrees of freedom.
    arma::vec pvalue_t;   ///< Two-tailed p-values from the Welch t-test.
    arma::vec cohens_d;   ///< Cohen's d (pooled-SD formula, group1 − group2).
    arma::mat means;      ///< 2 × n_dims: means; row 0 = group1, row 1 = group2.
    arma::mat sds;        ///< 2 × n_dims: sample standard deviations.

    // Non-parametric
    arma::vec U;          ///< Wilcoxon rank-sum U for group 1 (= R's W statistic).
    arma::vec pvalue_u;   ///< Two-tailed p-values (normal approx, tie + continuity correction).
    arma::vec effect_r;   ///< Rank-biserial effect: 1 − 2·U / (n1·n2).
    arma::mat medians;    ///< 2 × n_dims: medians; row 0 = group1, row 1 = group2.
};

/// @cond INTERNAL
namespace detail {

// Two-tailed p-value for a Welch t-statistic.
// Uses the relation P(T≤t|df) = 1 − I_x(df/2, 1/2)/2, x = df/(df+t²).
inline double t_test_pvalue(double t_stat, double df) {
    if (!std::isfinite(t_stat) || df <= 0.0)
        return std::numeric_limits<double>::quiet_NaN();
    const double x = df / (df + t_stat * t_stat);
    return betai(df / 2.0, 0.5, x);   // == 2 * P(T_df > |t_stat|)
}

// Wilcoxon rank-sum U (= R's W for group 1) and two-tailed p-value.
// Normal approximation with tie correction and continuity correction,
// matching R's wilcox.test(..., exact=FALSE, correct=TRUE).
struct WilcoxResult { double U; double p; };

inline WilcoxResult wilcox_ranksum(const arma::vec& g1, const arma::vec& g2) {
    const int    n1  = static_cast<int>(g1.n_elem);
    const int    n2  = static_cast<int>(g2.n_elem);
    const int    n   = n1 + n2;
    const double NaN = std::numeric_limits<double>::quiet_NaN();

    if (n1 == 0 || n2 == 0) return {NaN, NaN};

    // Merge both groups into a single array, tagging each element's origin.
    arma::vec      vals(n);
    std::vector<int> grp(n);
    for (int i = 0; i < n1; ++i) { vals[i]      = g1[i]; grp[i]      = 0; }
    for (int i = 0; i < n2; ++i) { vals[n1 + i] = g2[i]; grp[n1 + i] = 1; }

    arma::uvec ord = arma::sort_index(vals);

    // Assign average ranks for tied values; accumulate tie correction T = Σ(t³−t).
    arma::vec rnk(n);
    double    T_ties = 0.0;
    arma::uword i = 0;
    while (i < static_cast<arma::uword>(n)) {
        arma::uword j = i;
        while (j + 1 < static_cast<arma::uword>(n) &&
               vals[ord[j + 1]] == vals[ord[j]])
            ++j;
        const double avg = 0.5 * (static_cast<double>(i) + static_cast<double>(j)) + 1.0;
        for (arma::uword k = i; k <= j; ++k) rnk[ord[k]] = avg;
        const double t = static_cast<double>(j - i + 1);
        T_ties += t * (t * t - 1.0);   // t³ − t
        i = j + 1;
    }

    // W = rank-sum for group 1; U = W − n1*(n1+1)/2 (R's wilcox.test statistic).
    double W = 0.0;
    for (int k = 0; k < n; ++k)
        if (grp[k] == 0) W += rnk[k];
    const double U = W - static_cast<double>(n1) * (n1 + 1) / 2.0;

    // Normal approximation: Var[U] = n1·n2/12 · ((n+1) − T_ties/(n·(n−1)))
    const double mu_U  = static_cast<double>(n1) * n2 / 2.0;
    const double tie_adj = (n > 1) ? T_ties / (static_cast<double>(n) * (n - 1)) : 0.0;
    double var_U = static_cast<double>(n1) * n2 / 12.0
                   * (static_cast<double>(n + 1) - tie_adj);
    if (var_U < 0.0) var_U = 0.0;

    double p;
    if (var_U == 0.0) {
        p = (U == mu_U) ? 1.0 : 0.0;
    } else {
        // Continuity correction: |U − mu| − 0.5
        double z = (std::abs(U - mu_U) - 0.5) / std::sqrt(var_U);
        if (z < 0.0) z = 0.0;
        p = std::erfc(z / std::sqrt(2.0));   // = 2*(1−Φ(z))
        p = std::min(p, 1.0);
    }

    return {U, p};
}

} // namespace detail (group stats helpers)
/// @endcond

/**
 * @brief Compute per-dimension two-group comparison statistics.
 *
 * For each dimension @p d of the input matrices the function computes:
 *
 * **Parametric (Welch two-sample t-test)**
 *  - t-statistic and Welch–Satterthwaite degrees of freedom
 *  - Two-tailed p-value via the regularised incomplete beta function
 *  - Cohen's d using the pooled-SD formula
 *    (`(μ1−μ2) / √(((n1−1)s1² + (n2−1)s2²)/(n1+n2−2))`)
 *  - Per-group means and sample standard deviations
 *
 * **Non-parametric (Wilcoxon rank-sum / Mann-Whitney U)**
 *  - U statistic for group 1 (identical to R's `wilcox.test` W statistic)
 *  - Two-tailed p-value via the normal approximation with tie correction
 *    and continuity correction (matching R's `wilcox.test(..., correct=TRUE)`)
 *  - Rank-biserial effect size: `1 − 2·U / (n1·n2)`
 *    (rENA-api's `nonparam.effect` formula)
 *  - Per-group medians
 *
 * @param[in] g1  Unit points for group 1, n1 × n_dims.
 * @param[in] g2  Unit points for group 2, n2 × n_dims.
 * @returns A @ref GroupStats struct.
 *
 * @note Parametric statistics are NaN when either group has fewer than 2 units
 *       (no sample variance).  All statistics are NaN when either group is empty.
 * @note The Wilcoxon p-value uses the normal approximation regardless of sample
 *       size; R switches to an exact test for small (n ≤ ~50) tie-free samples,
 *       so p-values may differ slightly in those cases.
 * @note Equivalent to `group.stats()` in rENA-api, moved to the C++ layer so
 *       all language bindings share a single implementation.
 */
inline GroupStats group_stats(const arma::mat& g1, const arma::mat& g2) {
    const int    n1     = static_cast<int>(g1.n_rows);
    const int    n2     = static_cast<int>(g2.n_rows);
    const int    n_dims = static_cast<int>(g1.n_cols);
    const double NaN    = std::numeric_limits<double>::quiet_NaN();

    GroupStats out;
    out.n1 = n1;
    out.n2 = n2;

    out.t        = arma::vec(n_dims); out.t.fill(NaN);
    out.df       = arma::vec(n_dims); out.df.fill(NaN);
    out.pvalue_t = arma::vec(n_dims); out.pvalue_t.fill(NaN);
    out.cohens_d = arma::vec(n_dims); out.cohens_d.fill(NaN);
    out.means    = arma::mat(2, n_dims); out.means.fill(NaN);
    out.sds      = arma::mat(2, n_dims); out.sds.fill(NaN);
    out.U        = arma::vec(n_dims); out.U.fill(NaN);
    out.pvalue_u = arma::vec(n_dims); out.pvalue_u.fill(NaN);
    out.effect_r = arma::vec(n_dims); out.effect_r.fill(NaN);
    out.medians  = arma::mat(2, n_dims); out.medians.fill(NaN);

    if (n1 == 0 || n2 == 0) return out;

    for (int d = 0; d < n_dims; ++d) {
        const arma::vec c1 = g1.col(d);
        const arma::vec c2 = g2.col(d);

        // ── Means and medians ───────────────────────────────────────────────
        const double m1 = arma::mean(c1);
        const double m2 = arma::mean(c2);
        out.means(0, d) = m1;
        out.means(1, d) = m2;
        out.medians(0, d) = arma::median(c1);
        out.medians(1, d) = arma::median(c2);

        // ── Standard deviations (ddof = 1, matching R's sd()) ───────────────
        const double s1 = (n1 > 1) ? arma::stddev(c1) : NaN;
        const double s2 = (n2 > 1) ? arma::stddev(c2) : NaN;
        out.sds(0, d) = s1;
        out.sds(1, d) = s2;

        // ── Welch two-sample t-test ──────────────────────────────────────────
        if (n1 > 1 && n2 > 1 && std::isfinite(s1) && std::isfinite(s2)) {
            const double v1n = s1 * s1 / n1;
            const double v2n = s2 * s2 / n2;
            const double se  = std::sqrt(v1n + v2n);
            if (se > 0.0) {
                const double t_stat = (m1 - m2) / se;
                const double df_val = (v1n + v2n) * (v1n + v2n)
                                    / (v1n * v1n / (n1 - 1) + v2n * v2n / (n2 - 1));
                out.t(d)        = t_stat;
                out.df(d)       = df_val;
                out.pvalue_t(d) = detail::t_test_pvalue(t_stat, df_val);
            }
        }

        // ── Cohen's d (pooled SD) ────────────────────────────────────────────
        if (n1 + n2 > 2 && std::isfinite(s1) && std::isfinite(s2)) {
            const double pooled_var = ((n1 - 1) * s1 * s1 + (n2 - 1) * s2 * s2)
                                    / (n1 + n2 - 2);
            const double pooled_sd  = std::sqrt(pooled_var);
            out.cohens_d(d) = (pooled_sd > 0.0) ? (m1 - m2) / pooled_sd : 0.0;
        }

        // ── Wilcoxon rank-sum ────────────────────────────────────────────────
        const detail::WilcoxResult wr = detail::wilcox_ranksum(c1, c2);
        out.U(d)        = wr.U;
        out.pvalue_u(d) = wr.p;
        if (std::isfinite(wr.U))
            out.effect_r(d) = 1.0 - 2.0 * wr.U / (static_cast<double>(n1) * n2);
    }

    return out;
}

/// @}

} // namespace qe

#endif // LIBQE_MODELING_HPP

/**
 * @file normalization.hpp
 * @brief Row-wise normalization utilities for ENA connection networks.
 */
#ifndef LIBQE_NORMALIZATION_HPP
#define LIBQE_NORMALIZATION_HPP

#include <armadillo>
#include <cmath>

namespace qe {

/**
 * @brief Normalize each row to unit L2 norm (project onto the unit hypersphere).
 *
 * Rows whose L2 norm is zero are left unchanged.
 *
 * @param m Input matrix (n_units × n_connections), row-major.
 * @returns Matrix of the same shape with each row divided by its L2 norm.
 * @note Equivalent to @c fun_sphere_norm() in rENA/ena.cpp.
 */
inline arma::mat normalize_networks(arma::mat m) {
    arma::mat out(m.n_rows, m.n_cols, arma::fill::zeros);
    for (arma::uword r = 0; r < m.n_rows; r++) {
        double len = arma::norm(m.row(r), 2);
        if (std::isfinite(len) && len > 0.0) out.row(r) = m.row(r) / len;
    }
    return out;
}

/**
 * @brief Scale every entry by the reciprocal of the largest row L2 norm.
 *
 * Preserves relative magnitudes across rows; does not normalise each row
 * individually.  If the largest row norm is zero, the matrix is returned
 * unchanged.
 *
 * @param m Input matrix (n_units × n_connections), modified in place.
 * @returns Scaled matrix (same shape as @p m).
 * @note Equivalent to @c fun_skip_sphere_norm() in rENA/ena.cpp.
 */
inline arma::mat scale_networks(arma::mat m) {
    double largest = 0.0;
    for (arma::uword r = 0; r < m.n_rows; r++) {
        double len = arma::norm(m.row(r), 2);
        if (std::isfinite(len)) largest = std::max(largest, len);
    }
    if (largest > 0.0) m /= largest;
    return m;
}

} // namespace qe

#endif // LIBQE_NORMALIZATION_HPP

/**
 * @file rotation.hpp
 * @brief Rotation routines for Epistemic Network Analysis (ENA):
 *        SVD, deflation, orthogonal-SVD, complete (generalized), and
 *        means rotation.  All routines are designed to match the
 *        numerical output of the reference R implementation in rENA.
 */

#ifndef LIBQE_ROTATION_HPP
#define LIBQE_ROTATION_HPP

#include "linalg_fallback.hpp"
#include <armadillo>
#include <stdexcept>
#include <string>
#include <utility>
#include <vector>

namespace qe {

/// @name Return types
/// @{

/**
 * @brief Aggregated result returned by every rotation routine.
 *
 * The layout mirrors the output of R's `prcomp()`: `rotation` holds the
 * right-singular vectors (principal axes), `eigenvalues` holds the
 * explained-variance values (sdev^2 in R parlance), and `column_names`
 * carries the human-readable axis labels used downstream for network plots
 * and loadings tables.
 */
struct RotationResult {
    arma::mat                rotation;      ///< p x p orthogonal matrix; column j is rotation axis j.
    arma::vec                eigenvalues;   ///< Length-p vector of sdev^2 values, matching rENA's convention.
    std::vector<std::string> column_names;  ///< Axis labels, e.g. "MR1", "SVD2", "GMR1".
};

/**
 * @brief A pair of 0-based row-index vectors identifying two groups within a
 *        points matrix.
 *
 * Used by means_rotation() to specify which rows belong to group A and which
 * belong to group B for each successive mean-difference axis.
 */
struct GroupPair {
    arma::uvec a;  ///< Row indices of group A (0-based).
    arma::uvec b;  ///< Row indices of group B (0-based).
};

/// @}

/// @name SVD rotation
/// @{

/**
 * @brief Compute a full SVD rotation of a (pre-centered) points matrix.
 *
 * Produces the right-singular vectors V as the rotation matrix and scales
 * each singular value to an eigenvalue (sdev^2) compatible with rENA's
 * `prcomp()` output.
 *
 * @param points  n x p data matrix.  The caller is responsible for
 *                centering upstream; this function applies no centering.
 *
 * @returns A RotationResult where:
 *   - `rotation`    = V  (p x p full SVD; trailing columns span the null
 *                         space when the matrix is rank-deficient).
 *   - `eigenvalues[j]` = s[j]^2 / max(1, n - 1)  (== sdev^2 in R).
 *   - `column_names`   = {"SVD1", "SVD2", ..., "SVDp"}.
 *
 * @note Equivalent to `prcomp(points, retx=FALSE, scale=FALSE,
 *       center=FALSE, tol=0)` in rENA/R.
 *
 * @note Sign convention: none.  Signs follow the underlying LAPACK SVD,
 *       matching rENA's long-standing behavior.  A deterministic sign rule
 *       (e.g. svd_flip) may be added later as an opt-in flag.
 */
inline RotationResult ena_svd(const arma::mat& points) {
    if (!points.is_finite())
        throw std::invalid_argument("ena_svd: input matrix must not contain NaN or Inf");
    arma::mat U, V;
    arma::vec s;
    qe::linalg::svd(U, s, V, points);

    const arma::uword p     = points.n_cols;
    const double      denom = points.n_rows > 1
                                  ? static_cast<double>(points.n_rows - 1)
                                  : 1.0;

    arma::vec eigenvalues(p, arma::fill::zeros);
    const arma::uword k = std::min<arma::uword>(s.n_elem, p);
    for (arma::uword j = 0; j < k; ++j) {
        eigenvalues(j) = (s(j) * s(j)) / denom;
    }

    std::vector<std::string> labels(p);
    for (arma::uword j = 0; j < p; ++j) {
        labels[j] = "SVD" + std::to_string(j + 1);
    }
    return {V, eigenvalues, std::move(labels)};
}

/// @}

/// @name Deflation
/// @{

/**
 * @brief Project a data matrix onto the hyperplane orthogonal to a given axis.
 *
 * Computes `data - (data * axis) * axis^T`, effectively removing the
 * component of every row of `data` that lies along `axis`.
 *
 * @param data  n x p data matrix.
 * @param axis  Unit-norm column vector of length p.  The caller is
 *              responsible for normalizing `axis` before calling this
 *              function; no normalization is performed internally.
 *
 * @returns n x p matrix with the projection onto `axis` subtracted out.
 */
inline arma::mat deflate(const arma::mat& data, const arma::vec& axis) {
    return data - (data * axis) * axis.t();
}

/// @}

/// @name Orthogonal SVD rotation
/// @{

/**
 * @brief Orthonormalize named axes via QR, then complete the rotation with
 *        an SVD of the data projected onto the orthogonal complement.
 *
 * The named axes in `weights` are orthonormalized via a full QR
 * decomposition.  The first k columns of the resulting Q become the
 * "fixed" part of the rotation; the remaining p - k columns define a
 * complementary subspace onto which `data` is projected, and a further SVD
 * fills those trailing rotation columns.
 *
 * @param data          n x p data matrix (caller-centered if required).
 * @param weights       p x k matrix of named axis directions.  Column norms
 *                      need not be 1; QR handles normalization internally.
 * @param named_labels  Length-k vector of labels for the first k rotation
 *                      axes.  Trailing axes receive labels "SVD{k+1}" ..
 *                      "SVDp".
 *
 * @returns A RotationResult containing the combined p x p rotation,
 *          eigenvalues for the SVD-filled trailing axes (indices k..p-1;
 *          the first k eigenvalues are zero), and axis labels.
 *
 * @note Equivalent to `orthogonal_svd()` in rENA/R/ena.rotate.by.mean.R:
 * @code
 *   Q     = qr.Q(qr(weights), complete = TRUE)   # p x p
 *   X_bar = data %*% Q[, (k+1):p]                # n x (p - k)
 *   V     = prcomp(X_bar)$rotation
 *   out   = cbind(Q[, 1:k], Q[, (k+1):p] %*% V)
 * @endcode
 *
 * @note The named axes in the OUTPUT are the orthonormalized Q columns, NOT
 *       the original `weights` columns.  Use complete_rotation() if you need
 *       to preserve input axes verbatim.
 *
 * @throws std::runtime_error if `data.n_cols != weights.n_rows` or
 *         `named_labels.size() != weights.n_cols`.
 */
inline RotationResult orthogonal_svd(
    const arma::mat&                data,
    const arma::mat&                weights,
    const std::vector<std::string>& named_labels) {
    const arma::uword p = weights.n_rows;
    const arma::uword k = weights.n_cols;
    if (data.n_cols != p) {
        throw std::runtime_error(
            "orthogonal_svd: data.n_cols must equal weights.n_rows");
    }
    if (named_labels.size() != k) {
        throw std::runtime_error(
            "orthogonal_svd: named_labels.size() must equal weights.n_cols");
    }
    if (k == 0) {
        return ena_svd(data);
    }

    arma::mat Q, R;
    qe::linalg::qr_full(Q, R, weights);                 // Q is p x p

    arma::mat rotation(p, p);
    arma::vec eigenvalues(p, arma::fill::zeros);
    rotation.cols(0, k - 1) = Q.cols(0, k - 1);

    if (k < p) {
        arma::mat Q_comp = Q.cols(k, p - 1);            // p x (p - k)
        arma::mat X_bar  = data * Q_comp;               // n x (p - k)
        RotationResult inner = ena_svd(X_bar);
        rotation.cols(k, p - 1) = Q_comp * inner.rotation;
        for (arma::uword j = 0; j < (p - k); ++j) {
            eigenvalues(k + j) = inner.eigenvalues(j);
        }
    }

    std::vector<std::string> labels(p);
    for (arma::uword j = 0; j < k; ++j) labels[j] = named_labels[j];
    for (arma::uword j = k; j < p; ++j) {
        labels[j] = "SVD" + std::to_string(j + 1);
    }
    return {rotation, eigenvalues, std::move(labels)};
}

/// @}

/// @name Complete (generalized) rotation
/// @{

/**
 * @brief Keep named axes verbatim and fill remaining axes from an SVD of the
 *        data after parallel deflation by all named axes.
 *
 * Unlike orthogonal_svd(), this routine preserves the caller-supplied
 * `named_axes` exactly (no QR re-orthonormalization).  The complementary
 * axes are obtained by deflating `data` simultaneously by all named axes and
 * then running ena_svd() on the result.
 *
 * The deflation is **parallel**: every projection is subtracted from the
 * original `data`, not from a progressively deflated copy:
 * @code
 *   defA = data - data * (named_axes * named_axes^T)
 * @endcode
 * This matches `defA <- A - A %*% v1 %*% t(v1) - A %*% v2 %*% t(v2)` in
 * rENA line-for-line.  For mutually orthogonal axes the result is identical
 * to sequential deflation; for non-orthogonal axes it differs.
 *
 * @param data          n x p data matrix.
 * @param named_axes    p x k matrix of named rotation axes.  Each column
 *                      must be unit-norm; orthonormality between columns is
 *                      NOT required.
 * @param named_labels  Length-k vector of labels for the first k axes.
 *                      Trailing axes receive labels "SVD{k+1}" .. "SVDp".
 *                      Conventional labels for generalized rotation are
 *                      "GMR1", "GMR2", then "SVD{k+1}" onward, but labels
 *                      are entirely caller-supplied.
 *
 * @returns A RotationResult where columns 0..k-1 of `rotation` equal
 *          `named_axes` verbatim and columns k..p-1 come from the SVD of
 *          the deflated data.
 *
 * @note Equivalent to the tail of `ena.rotate.by.generalized` in rENA
 *       (canonical version: commit 2c079126 on rENA `origin/main`).
 *
 * @throws std::runtime_error if `data.n_cols != named_axes.n_rows` or
 *         `named_labels.size() != named_axes.n_cols`.
 */
inline RotationResult complete_rotation(
    const arma::mat&                data,
    const arma::mat&                named_axes,
    const std::vector<std::string>& named_labels) {
    const arma::uword p = named_axes.n_rows;
    const arma::uword k = named_axes.n_cols;
    if (data.n_cols != p) {
        throw std::runtime_error(
            "complete_rotation: data.n_cols must equal named_axes.n_rows");
    }
    if (named_labels.size() != k) {
        throw std::runtime_error(
            "complete_rotation: named_labels.size() must equal named_axes.n_cols");
    }
    if (k == 0) {
        return ena_svd(data);
    }

    arma::mat defA = data - data * named_axes * named_axes.t();
    RotationResult inner = ena_svd(defA);

    arma::mat rotation(p, p);
    arma::vec eigenvalues(p, arma::fill::zeros);
    rotation.cols(0, k - 1) = named_axes;
    if (k < p) {
        rotation.cols(k, p - 1) = inner.rotation.cols(0, p - k - 1);
        for (arma::uword j = 0; j < (p - k); ++j) {
            eigenvalues(k + j) = inner.eigenvalues(j);
        }
    }

    std::vector<std::string> labels(p);
    for (arma::uword j = 0; j < k; ++j) labels[j] = named_labels[j];
    for (arma::uword j = k; j < p; ++j) {
        labels[j] = "SVD" + std::to_string(j + 1);
    }
    return {rotation, eigenvalues, std::move(labels)};
}

/// @}

/// @name Means rotation
/// @{

/**
 * @brief Compute a means rotation from one or more group pairs.
 *
 * For each GroupPair, computes the normalized mean-difference vector on the
 * progressively deflated data and stacks the resulting unit-norm axes into a
 * weights matrix.  The final rotation is completed by orthogonal_svd().
 * The input `points` is mean-centered before any axes are computed, matching
 * rENA's `scale(data, scale=FALSE, center=TRUE)`.
 *
 * @param points  n x p matrix of network positions (raw, not pre-centered).
 * @param pairs   Ordered list of GroupPair objects.  Each pair contributes
 *                one "MR" axis in sequence.  Must be non-empty.
 *
 * @returns A RotationResult with `column_names` = {"MR1", ..., "MRm",
 *          "SVD{m+1}", ..., "SVDp"} where m = pairs.size().
 *
 * @note Equivalent to `ena.rotate.by.mean()` in rENA/R/ena.rotate.by.mean.R.
 *       The progressive deflation (each axis is computed on the data after
 *       removing all previous axes) is the key numerical step.
 *
 * @warning There is NO guard against a zero-norm mean-difference vector.
 *          If the two group means are identical on the current deflated data,
 *          the axis will contain NaN values (division by zero).  This mirrors
 *          a latent bug in rENA and is preserved verbatim for numerical
 *          fidelity; a safe opt-in guard will be added in a future release.
 *
 * @throws std::runtime_error if `pairs` is empty.
 */
inline RotationResult means_rotation(const arma::mat&              points,
                                      const std::vector<GroupPair>& pairs) {
    if (pairs.empty()) {
        throw std::runtime_error("Unable to rotate without 2 groups.");
    }

    arma::mat data     = points.each_row() - arma::mean(points, 0);
    arma::mat deflated = data;
    arma::mat weights(data.n_cols, pairs.size(), arma::fill::zeros);
    std::vector<std::string> labels;
    labels.reserve(pairs.size());

    for (std::size_t i = 0; i < pairs.size(); ++i) {
        const arma::rowvec mean_a = arma::mean(deflated.rows(pairs[i].a), 0);
        const arma::rowvec mean_b = arma::mean(deflated.rows(pairs[i].b), 0);
        const arma::vec    diff   = (mean_a - mean_b).t();
        const double       norm   = std::sqrt(arma::accu(diff % diff));
        const arma::vec    axis   = diff / norm;        // no zero-norm guard

        deflated        = deflate(deflated, axis);
        weights.col(i)  = axis;
        labels.push_back("MR" + std::to_string(i + 1));
    }

    return orthogonal_svd(deflated, weights, labels);
}

/// @}

}  // namespace qe

#endif  // LIBQE_ROTATION_HPP

// [[Rcpp::depends(RcppArmadillo)]]
#include <RcppArmadillo.h>
#include <libqe/libqe.hpp>

using namespace Rcpp;

// =============================================================================
// Adjacency utilities
// =============================================================================

//' Number of upper-triangle pairs for n codes
//' @param n Number of codes
//' @return Integer: n*(n-1)/2
//' @export
// [[Rcpp::export]]
int choose_two(int n) {
    return qe::choose_two(n);
}

//' Upper-triangle index pairs
//' @param len Number of codes (side length of square matrix)
//' @param row -1 = both rows, 0 = row indices only, 1 = col indices only
//' @export
// [[Rcpp::export]]
arma::umat connection_indices(int len, int row = -1) {
    return qe::connection_indices(len, row);
}

//' Pairwise products → upper-triangle vector
//' @param v Numeric vector of code values
//' @export
// [[Rcpp::export]]
arma::rowvec code_connections(arma::mat v) {
    return qe::code_connections(v);
}

//' Fold a directed (n*n) vector into an undirected upper-triangle vector
//' @param v Numeric vector of length n*n
//' @export
// [[Rcpp::export]]
arma::rowvec fold_directed_network(arma::vec v) {
    return qe::fold_directed_network(v);
}

//' Flatten an adjacency matrix to a connection vector
//' @param x Numeric matrix
//' @param full TRUE = full n*n (directed); FALSE = upper triangle (undirected)
//' @export
// [[Rcpp::export]]
arma::rowvec network_to_vector(arma::mat x, bool full = true) {
    return qe::network_to_vector(x, full);
}

//' Code-name pairs for upper-triangle positions ("A & B")
//' @param v Character vector of code names
//' @export
// [[Rcpp::export]]
std::vector<std::string> connection_names(std::vector<std::string> v) {
    return qe::connection_names(v);
}

// =============================================================================
// Normalization
// =============================================================================

//' Row-wise L2 (sphere) normalization
//' @param m Numeric matrix
//' @export
// [[Rcpp::export]]
arma::mat normalize_networks(arma::mat m) {
    return qe::normalize_networks(m);
}

//' Max-norm scaling (divide all rows by the largest row L2 norm)
//' @param m Numeric matrix
//' @export
// [[Rcpp::export]]
arma::mat scale_networks(arma::mat m) {
    return qe::scale_networks(m);
}

// =============================================================================
// Modeling
// =============================================================================

//' Center points (subtract column means)
//' @param values Numeric matrix
//' @export
// [[Rcpp::export]]
arma::mat center_points(arma::mat values) {
    return qe::center_points(values);
}

//' Confidence interval for the mean of a group of ENA unit points
//'
//' For each dimension computes a t-based CI:
//' \code{mean ± t(α/2, n-1) × (SD / sqrt(n))} where \code{α = 1 - conf_level}.
//'
//' @param points     Numeric matrix (units x dims) — one row per unit in the group
//' @param conf_level Confidence level (default 0.95)
//' @return Numeric matrix (n_dims x 3): columns are [mean, ci_lower, ci_upper].
//'   When \code{nrow(points) == 1} the CI bounds are \code{±Inf}.
//' @export
// [[Rcpp::export]]
arma::mat mean_ci(arma::mat points, double conf_level = 0.95) {
    return qe::mean_ci(points, conf_level);
}

//' Outlier interval based on IQR (Tukey fence) for a group of ENA unit points
//'
//' For each dimension: \code{lower = -iqr_factor * IQR}, \code{upper = +iqr_factor * IQR}.
//' Equivalent to rENA's:
//' \preformatted{
//'   oi <- c(IQR(pts[,1]), ...) * iqr_factor
//'   matrix(rep(oi, 2), ncol = n_dims, byrow = TRUE) * c(-1, 1)
//' }
//' IQR uses R's default \code{type = 7} quantile.
//'
//' @param points     Numeric matrix (units x dims) — one row per unit in the group
//' @param iqr_factor Multiplier applied to the IQR (default 1.5, the Tukey fence)
//' @return Numeric matrix (n_dims x 2): columns are [lower, upper].
//' @export
// [[Rcpp::export]]
arma::mat outlier_ci(arma::mat points, double iqr_factor = 1.5) {
    return qe::outlier_ci(points, iqr_factor);
}

//' Two-group comparison statistics
//'
//' Computes per-dimension parametric and non-parametric statistics for two
//' groups of ENA unit points.  Equivalent to \code{group.stats()} in rENA-api,
//' moved to the C++ layer so all language bindings share a single implementation.
//'
//' \strong{Parametric (Welch two-sample t-test):}
//' \itemize{
//'   \item \code{t}        — Welch t-statistics
//'   \item \code{parameter}— Welch–Satterthwaite degrees of freedom
//'   \item \code{pvalue}   — two-tailed p-values
//'   \item \code{effect}   — Cohen's d (pooled-SD: \code{(μ1−μ2)/pooled_sd})
//'   \item \code{mean}     — 2×n_dims matrix, row 1 = group1, row 2 = group2
//'   \item \code{std.dev}  — 2×n_dims matrix of sample standard deviations
//' }
//'
//' \strong{Non-parametric (Wilcoxon rank-sum):}
//' \itemize{
//'   \item \code{U}        — rank-sum U for group 1 (= R's \code{wilcox.test} W)
//'   \item \code{pvalue}   — two-tailed p-values (normal approx, tie + continuity correction)
//'   \item \code{effect}   — rank-biserial: \code{1 − 2·U / (n1·n2)}
//'   \item \code{median}   — 2×n_dims matrix of medians
//' }
//'
//' Entries are \code{NA} when a statistic is undefined (e.g. n < 2 for
//' parametric tests).
//'
//' @param g1 Numeric matrix (n1 × n_dims) — unit points for group 1
//' @param g2 Numeric matrix (n2 × n_dims) — unit points for group 2
//' @return Named list with \code{N}, \code{parametric}, and \code{nonparametric}
//'   sub-lists, matching the structure of rENA-api's \code{group.stats()}.
//' @export
// [[Rcpp::export]]
List group_stats(arma::mat g1, arma::mat g2) {
    qe::GroupStats s = qe::group_stats(g1, g2);
    return List::create(
        _("N") = IntegerVector::create(s.n1, s.n2),
        _("parametric") = List::create(
            _("t")         = s.t,
            _("parameter") = s.df,
            _("pvalue")    = s.pvalue_t,
            _("effect")    = s.cohens_d,
            _("mean")      = s.means,
            _("std.dev")   = s.sds
        ),
        _("nonparametric") = List::create(
            _("U")      = s.U,
            _("pvalue") = s.pvalue_u,
            _("effect") = s.effect_r,
            _("median") = s.medians
        )
    );
}

//' Pearson correlation with CI between ENA points and centroids
//' @param points  Numeric matrix (units x dims)
//' @param centroids Numeric matrix (units x dims)
//' @param conf_level Confidence level (default 0.95)
//' @export
// [[Rcpp::export]]
arma::mat ena_correlation(arma::mat points, arma::mat centroids,
                              double conf_level = 0.95) {
    return qe::ena_correlation(points, centroids, conf_level);
}

//' Least-squares node positions for undirected ENA
//' @param adj_mats Numeric matrix of line weights (units x connections)
//' @param t        Numeric matrix of rotated points (units x dims)
//' @param num_dims Number of dimensions
//' @return List with nodes, centroids, weights, points
//' @export
// [[Rcpp::export]]
List node_positions(arma::mat adj_mats, arma::mat t, int num_dims) {
    if (!adj_mats.is_finite() || !t.is_finite())
        stop("node_positions: input matrices must not contain NaN or Inf - "
             "filter or impute rows with non-finite values before calling");
    qe::NodePositions r = qe::node_positions(adj_mats, t, num_dims);
    return List::create(
        _("nodes")     = r.nodes,
        _("centroids") = r.centroids,
        _("weights")   = r.weights,
        _("points")    = r.points
    );
}

//' Least-squares node positions for directed ENA
//' @param line_weights Numeric matrix (units x connections)
//' @param points       Numeric matrix of rotated points (units x dims)
//' @param num_dims     Number of dimensions
//' @return List with nodes, centroids, weights, points
//' @export
// [[Rcpp::export]]
List directed_node_positions(arma::mat line_weights, arma::mat points,
                                 int num_dims) {
    if (!line_weights.is_finite() || !points.is_finite())
        stop("directed_node_positions: input matrices must not contain NaN or Inf - "
             "filter or impute rows with non-finite values before calling");
    qe::NodePositions r = qe::directed_node_positions(line_weights, points, num_dims);
    return List::create(
        _("nodes")     = r.nodes,
        _("centroids") = r.centroids,
        _("weights")   = r.weights,
        _("points")    = r.points
    );
}

//' Directed node positions with paired ground+response rows combined
//' @param line_weights Numeric matrix (units x connections)
//' @param points       Numeric matrix of rotated points (units x dims)
//' @param num_dims     Number of dimensions
//' @return List with nodes, centroids, weights, points
//' @export
// [[Rcpp::export]]
List directed_node_positions_combine_pairs(arma::mat line_weights,
                                               arma::mat points,
                                               int num_dims) {
    if (!line_weights.is_finite() || !points.is_finite())
        stop("directed_node_positions_combine_pairs: input matrices must not contain NaN or Inf - "
             "filter or impute rows with non-finite values before calling");
    qe::NodePositions r = qe::directed_node_positions(
        line_weights, points, num_dims, true);
    return List::create(
        _("nodes")     = r.nodes,
        _("centroids") = r.centroids,
        _("weights")   = r.weights,
        _("points")    = r.points
    );
}

// =============================================================================
// Accumulation
// =============================================================================

//' Core connection matrix for one ground+response pair
//' @param ground          Numeric row vector of ground (context) code values
//' @param response        Numeric row vector of response code values
//' @param response_weight Scalar weight applied to the response self-connection
//' @param ordered         TRUE = directed; FALSE = undirected
//' @export
// [[Rcpp::export]]
arma::mat connection_matrix(arma::rowvec ground, arma::rowvec response,
                                         double response_weight = 1.0,
                                         bool ordered = true) {
    return qe::connection_matrix(ground, response, response_weight, ordered);
}

//' Traditional stanza-window accumulation (rENA model)
//'
//' For each row k in a single conversation's code matrix, accumulates
//' connections over a back/forward window.
//'
//' \strong{Undirected} (\code{ordered = FALSE}, default): upper-triangle
//' co-occurrences with back/forward-reference corrections.
//' Returns \code{n_rows × choose_two(n_codes)}.
//'
//' \strong{Directed} (\code{ordered = TRUE}): focal row k as response,
//' sum of prior window rows as ground, using the directed connection formula.
//' \code{window_forward} is ignored. Returns \code{n_rows × n_codes²}.
//'
//' @param codes          Numeric matrix (rows = lines, cols = codes) for ONE conversation
//' @param window_back    Number of prior lines in window (default 1); use .Machine$integer.max for Inf
//' @param window_forward Number of subsequent lines (default 0; ignored when ordered = TRUE)
//' @param binary         If TRUE, binarise non-zero connection counts
//' @param ordered        If TRUE, return directed n² output; FALSE (default) returns upper-tri
//' @return Undirected: (n_rows × choose_two(n_codes)); directed: (n_rows × n_codes²)
//' @export
// [[Rcpp::export]]
arma::mat accumulate_stanza(arma::mat codes,
                            int window_back    = 1,
                            int window_forward = 0,
                            bool binary        = true,
                            bool ordered       = false) {
    return qe::accumulate_stanza(codes, window_back, window_forward, binary, ordered);
}

//' Compute a column-major linear index into a multi-dimensional array
//'
//' Equivalent to tma's flat_index(). Throws if lengths of `indices`
//' and `dims` differ.
//'
//' @param indices 0-based integer vector of per-dimension indices
//' @param dims    Integer vector of array dimensions
//' @return Scalar integer linear index
//' @export
// [[Rcpp::export]]
int flat_index(std::vector<int> indices, std::vector<int> dims) {
    return qe::flat_index(indices, dims);
}

//' Per-row upper-triangle co-occurrence matrix
//'
//' For each row in \code{codes}, computes the pairwise code products
//' (upper-triangle) and optionally binarizes. Output has
//' \code{choose(n_codes, 2)} columns.
//'
//' @param codes  Numeric matrix (rows = observations, cols = codes)
//' @param binary If TRUE, binarise non-zero co-occurrences (default TRUE)
//' @return Numeric matrix (n_rows x choose_two(n_codes))
//' @export
// [[Rcpp::export]]
arma::mat row_connections(arma::mat codes, bool binary = true) {
    return qe::row_connections(codes, binary);
}

//' Rolling backward window sum of a code matrix
//'
//' For each row k, sums the raw code values over rows
//' \code{[max(0, k - window_size + 1), k]}. Returns a matrix of the same
//' shape as \code{codes}. \code{window_size <= 0} is treated as 1
//' (current row only).
//'
//' @param codes       Numeric matrix (rows = observations, cols = codes)
//' @param window_size Number of rows to look back (default 1)
//' @return Numeric matrix (same dimensions as \code{codes})
//' @export
// [[Rcpp::export]]
arma::mat rolling_window_sum(arma::mat codes, int window_size = 1) {
    return qe::rolling_window_sum(codes, window_size);
}

//' Ground/response accumulation for one unit (tma model)
//'
//' @param codes      Numeric matrix for the full context (n_rows x n_codes)
//' @param unit_rows  0-based integer vector of rows belonging to this unit
//' @param decay_fn   R function mapping a numeric distance vector to weights
//' @param ordered    TRUE = directed; FALSE = undirected (upper-tri)
//' @return Numeric vector of connection counts
//' @export
// [[Rcpp::export]]
arma::rowvec accumulate_unit(arma::mat codes, std::vector<int> unit_rows,
                                  Function decay_fn, bool ordered = false) {
    auto cpp_decay = [&](arma::vec distances) -> arma::vec {
        NumericVector d = wrap(distances);
        NumericVector w = decay_fn(d);
        return as<arma::vec>(w);
    };
    return qe::accumulate_unit(codes, unit_rows, cpp_decay, ordered);
}

//' Ground/response accumulation for one unit — returns unit vector and per-row matrix
//'
//' Equivalent to tma's accumulate_network() but without R-env-var setup.
//' The decay function receives a distance vector (response=0, older rows
//' have larger values) and returns a weight vector of the same length.
//'
//' @param codes      Numeric matrix (n_rows x n_codes)
//' @param unit_rows  0-based integer vector of rows belonging to this unit
//' @param decay_fn   R function(distances) -> weights
//' @param ordered    TRUE = directed (full p^2); FALSE = undirected (upper-tri)
//' @return List with `networks` (vector) and `row_networks` (matrix)
//' @export
// [[Rcpp::export]]
List accumulate_unit_with_rows(arma::mat codes, std::vector<int> unit_rows,
                                   Function decay_fn, bool ordered = false) {
    auto cpp_decay = [&](int unit_row, arma::uvec ground_indices) -> arma::vec {
        arma::vec dists(ground_indices.n_elem);
        for (arma::uword k = 0; k < ground_indices.n_elem; ++k)
            dists[k] = static_cast<double>(unit_row - ground_indices[k]);
        return as<arma::vec>(wrap(decay_fn(wrap(dists))));
    };
    qe::UnitNetworks r = qe::accumulate_unit_with_rows(codes, unit_rows, cpp_decay, ordered);
    return List::create(
        _("networks")     = r.networks,
        _("row_networks") = r.row_networks
    );
}

//' Tensor-based multi-modal accumulation for one unit (tma model)
//'
//' Pure-C++ port of tma's apply_tensor().  Accepts 0-based index vectors
//' for sender/receiver/mode dimensions and a pre-converted integer context
//' lookup matrix.
//'
//' @param tensor         Numeric vector (column-major flat tensor)
//' @param dims           Integer vector of tensor dimensions
//' @param dims_sender    0-based sender axis indices
//' @param dims_receiver  0-based receiver axis indices
//' @param dims_mode      0-based mode axis indices
//' @param context_lookup Integer matrix (n_context_rows x n_factors), 0-based
//' @param unit_rows      0-based response-row indices for this unit
//' @param codes          Numeric matrix (n_context_rows x n_codes)
//' @param times          Numeric vector of timestamps per context row
//' @param ordered        TRUE = directed; FALSE = undirected
//' @return List with `connection_counts` (vector) and `row_connection_counts` (matrix)
//' @export
// [[Rcpp::export]]
List apply_tensor(arma::vec tensor,
                     std::vector<int> dims,
                     std::vector<int> dims_sender,
                     std::vector<int> dims_receiver,
                     std::vector<int> dims_mode,
                     arma::imat context_lookup,
                     std::vector<int> unit_rows,
                     arma::mat codes,
                     arma::vec times,
                     bool ordered = true) {
    qe::TensorNetworks r = qe::apply_tensor_unit(
        tensor, dims, dims_sender, dims_receiver, dims_mode,
        context_lookup, unit_rows, codes, times, ordered);
    return List::create(
        _("connection_counts")     = r.connection_counts,
        _("row_connection_counts") = r.row_connection_counts
    );
}

// =============================================================================
// Rotation
// =============================================================================

// Pack a RotationResult into the list shape that matches rENA's ENARotationSet
// payload (rotation + eigenvalues + column names). Column names are attached
// to the rotation matrix as dimnames so downstream R code can index by them.
// Eigenvalues are returned as a plain numeric vector (not an Nx1 matrix) to
// match what rENA's `pcaResults$sdev^2` produces.
static List pack_rotation_result(const qe::RotationResult& r) {
    NumericMatrix rotation = wrap(r.rotation);
    CharacterVector col_names(r.column_names.begin(), r.column_names.end());
    rotation.attr("dimnames") = List::create(R_NilValue, col_names);
    NumericVector eigenvalues(r.eigenvalues.begin(), r.eigenvalues.end());
    return List::create(
        _("rotation")     = rotation,
        _("eigenvalues")  = eigenvalues,
        _("column_names") = col_names
    );
}

//' SVD rotation (matches prcomp(retx=F, scale=F, center=F, tol=0))
//'
//' Caller is responsible for centering upstream. Eigenvalues are stored as
//' \code{sdev^2} (variance) to match rENA's \code{ena.svd}.
//'
//' @param points Numeric matrix (n_units x n_dims)
//' @return List with \code{rotation} (n_dims x n_dims), \code{eigenvalues}
//'   (length n_dims, = sdev^2), and \code{column_names} ("SVD1", "SVD2", ...)
//' @export
// [[Rcpp::export]]
List ena_svd(arma::mat points) {
    if (!points.is_finite())
        stop("ena_svd: input matrix must not contain NaN or Inf - "
             "filter or impute rows with non-finite values before calling");
    return pack_rotation_result(qe::ena_svd(points));
}

//' Project a matrix onto the hyperplane orthogonal to a unit-norm axis
//'
//' Computes \code{data - (data \%*\% axis) \%*\% t(axis)}. The caller is
//' responsible for ensuring \code{axis} is unit-norm.
//'
//' @param data Numeric matrix (n_units x n_dims)
//' @param axis Numeric vector of length n_dims, unit-norm
//' @return Numeric matrix of the same shape as \code{data}
//' @export
// [[Rcpp::export]]
arma::mat deflate(arma::mat data, arma::vec axis) {
    return qe::deflate(data, axis);
}

//' Orthogonal SVD — orthonormalize named axes via QR, fill the rest from SVD
//'
//' Mirrors rENA's \code{orthogonal_svd()} in \code{ena.rotate.by.mean.R}:
//' the named axes in the output are the orthonormalized Q columns, not the
//' original \code{weights} columns. Use \code{complete_rotation} to keep
//' the named axes verbatim.
//'
//' @param data         Numeric matrix (n_units x n_dims)
//' @param weights      Numeric matrix (n_dims x k); columns are the named axes
//' @param named_labels Character vector of length k
//' @return List with \code{rotation}, \code{eigenvalues}, \code{column_names}
//' @export
// [[Rcpp::export]]
List orthogonal_svd(arma::mat data,
                        arma::mat weights,
                        std::vector<std::string> named_labels) {
    return pack_rotation_result(qe::orthogonal_svd(data, weights, named_labels));
}

//' Complete a rotation — keep named axes verbatim, fill remainder from SVD
//'
//' Mirrors the tail of \code{ena.rotate.by.generalized}: the named axes
//' appear in the output exactly as provided, and the trailing columns come
//' from an SVD of the data deflated by all named axes.
//'
//' @param data         Numeric matrix (n_units x n_dims)
//' @param named_axes   Numeric matrix (n_dims x k); columns must be unit-norm
//' @param named_labels Character vector of length k
//' @return List with \code{rotation}, \code{eigenvalues}, \code{column_names}
//' @export
// [[Rcpp::export]]
List complete_rotation(arma::mat data,
                           arma::mat named_axes,
                           std::vector<std::string> named_labels) {
    return pack_rotation_result(qe::complete_rotation(data, named_axes, named_labels));
}

//' Means rotation
//'
//' For each group pair, computes a normalized mean-difference axis on the
//' progressively-deflated data and finishes with \code{orthogonal_svd}.
//' The input is column-centered first, matching rENA's
//' \code{scale(data, scale=F, center=T)} at the top of \code{ena.rotate.by.mean}.
//'
//' Each element of \code{group_pairs} is a length-2 list \code{list(a, b)}
//' of 0-based row indices into \code{points}.
//'
//' @param points      Numeric matrix (n_units x n_dims)
//' @param group_pairs List of length k; each element is \code{list(a, b)}
//'   where \code{a} and \code{b} are 0-based integer index vectors
//' @return List with \code{rotation}, \code{eigenvalues}, \code{column_names}
//' @export
// [[Rcpp::export]]
List means_rotation(arma::mat points, List group_pairs) {
    std::vector<qe::GroupPair> pairs;
    pairs.reserve(group_pairs.size());
    for (R_xlen_t i = 0; i < group_pairs.size(); ++i) {
        List pair = group_pairs[i];
        if (pair.size() != 2) {
            stop("group_pairs[[%d]] must be a length-2 list(a, b)",
                 static_cast<int>(i + 1));
        }
        IntegerVector ra = pair[0];
        IntegerVector rb = pair[1];
        arma::uvec a(ra.size());
        arma::uvec b(rb.size());
        for (R_xlen_t j = 0; j < ra.size(); ++j) {
            a(j) = static_cast<arma::uword>(ra[j]);
        }
        for (R_xlen_t j = 0; j < rb.size(); ++j) {
            b(j) = static_cast<arma::uword>(rb[j]);
        }
        pairs.push_back({a, b});
    }
    return pack_rotation_result(qe::means_rotation(points, pairs));
}

// =============================================================================
// Generalized Means Rotation (GMR)
// =============================================================================

//' Generalized Means Rotation
//'
//' Computes a rotation axis that best represents a target variable's
//' contribution to the ENA point space, after controlling for covariates via
//' Lasso (coordinate-descent, k-fold CV).  Mirrors
//' \code{rENA::ena.rotate.by.generalized()}.
//'
//' The caller is responsible for building \code{x_model_matrix} (e.g. via
//' \code{model.matrix()}) and identifying \code{x1_cols} (0-based indices of
//' the target variable's columns, which receive \code{penalty_factor = 0}).
//' Categorical targets should be encoded as 0-based integers.
//'
//' @param V              Numeric matrix (n_units x n_connections) — ENA points.
//' @param x_model_matrix Numeric matrix (n_units x p) — model matrix for x axis.
//' @param x_target       Numeric vector length n — target variable (raw float
//'   for numeric; 0-based integer codes for categorical).
//' @param x1_cols        Integer vector of 0-based column indices in
//'   \code{x_model_matrix} that belong to the target variable (unpenalized).
//' @param x_categorical  Logical — TRUE if target is categorical.
//' @param x_n_groups     Integer — number of distinct groups (categorical only).
//' @param x_subset       Integer vector of 0-based row indices to subset for
//'   the x-axis GMR step (pass \code{integer(0)} to use all rows).
//' @param has_y          Logical — TRUE to compute a second GMR axis.
//' @param y_model_matrix Numeric matrix (n_units x p) — model matrix for y axis
//'   (ignored when \code{has_y = FALSE}).
//' @param y_target       Numeric vector — y-axis target (ignored when
//'   \code{has_y = FALSE}).
//' @param y1_cols        0-based column indices for y target (ignored when
//'   \code{has_y = FALSE}).
//' @param y_categorical  Logical (ignored when \code{has_y = FALSE}).
//' @param y_n_groups     Integer (ignored when \code{has_y = FALSE}).
//' @param n_lambda       Length of the Lasso lambda path (default 50).
//' @param k_folds        Cross-validation folds for lambda selection (default 5).
//' @param lasso_eps      \code{lambda_min = lasso_eps * lambda_max} (default 0.01).
//' @return List with \code{rotation}, \code{eigenvalues}, \code{column_names}.
//' @export
// [[Rcpp::export]]
List generalized_means_rotation(
    arma::mat V,
    arma::mat x_model_matrix,
    arma::vec x_target,
    IntegerVector x1_cols,
    bool x_categorical,
    int  x_n_groups,
    IntegerVector x_subset,
    bool has_y,
    arma::mat y_model_matrix,
    arma::vec y_target,
    IntegerVector y1_cols,
    bool y_categorical,
    int  y_n_groups,
    int  n_lambda  = 50,
    int  k_folds   = 5,
    double lasso_eps = 0.01
) {
    // ── Unpack x1_cols ───────────────────────────────────────────────────────
    arma::uvec x1(x1_cols.size());
    for (R_xlen_t i = 0; i < x1_cols.size(); ++i)
        x1(i) = static_cast<arma::uword>(x1_cols[i]);

    // ── Unpack x_subset (empty IntegerVector → use all rows) ─────────────────
    arma::uvec x_sub;
    if (x_subset.size() > 0) {
        x_sub.set_size(x_subset.size());
        for (R_xlen_t i = 0; i < x_subset.size(); ++i)
            x_sub(i) = static_cast<arma::uword>(x_subset[i]);
    }

    // ── Unpack y1_cols ───────────────────────────────────────────────────────
    arma::uvec y1(y1_cols.size());
    for (R_xlen_t i = 0; i < y1_cols.size(); ++i)
        y1(i) = static_cast<arma::uword>(y1_cols[i]);

    // ── Build params ─────────────────────────────────────────────────────────
    qe::GeneralizedRotationParams p;
    p.x_model_matrix = x_model_matrix;
    p.x_target       = x_target;
    p.x1_cols        = x1;
    p.x_categorical  = x_categorical;
    p.x_n_groups     = static_cast<arma::uword>(x_n_groups);
    p.x_subset       = x_sub;
    p.has_y          = has_y;
    p.y_model_matrix = y_model_matrix;
    p.y_target       = y_target;
    p.y1_cols        = y1;
    p.y_categorical  = y_categorical;
    p.y_n_groups     = static_cast<arma::uword>(y_n_groups);
    p.n_lambda       = n_lambda;
    p.k_folds        = k_folds;
    p.lasso_eps      = lasso_eps;

    return pack_rotation_result(qe::generalized_means_rotation(V, p));
}

1		#' @useDynLib libqe, .registration = TRUE
2		#' @importFrom Rcpp evalCpp
3		NULL
4
5		.onLoad <- function(libname, pkgname) {
6		# Nothing to initialise — libqe is a header + Rcpp library.
7	!	invisible(NULL)
8		}

1		/** @file accumulation.hpp
2		* @brief Accumulation primitives shared between the rENA stanza-window model
3		* and the tma ground/response/tensor model.
4		*
5		* Both models reduce to the same core connection_matrix() operation; they
6		* differ in how ground and response vectors are assembled from the raw data.
7		*/
8		#ifndef LIBQE_ACCUMULATION_HPP
9		#define LIBQE_ACCUMULATION_HPP
10
11		#include <armadillo>
12		#include <functional>
13		#include <vector>
14		#include "adjacency.hpp"
15
16		namespace qe {
17
18		/// @name Core adjacency math
19		/// @{
20
21		/** @brief Compute the connection matrix for one ground + response event pair.
22		*
23		* @param[in] ground Row vector of accumulated ground codes (length p).
24		* @param[in] response Row vector of the focal (response) codes (length p).
25		* @param[in] response_weight Scalar weight applied to the response self-connection
26		* term. Defaults to 1.0.
27		* @param[in] ordered When @c true, produces a directed (asymmetric) matrix:
28		* @c ground→response cross-product plus
29		* @c 0.5 * response_weight * response self-connection
30		* (diagonal zeroed).
31		* When @c false, produces a symmetric undirected matrix:
32		* @c (response_weight * r⊗r) + g⊗r + r⊗g.
33		*
34		* @returns A p × p connection matrix.
35		*
36		* @note Equivalent to @c calculate_adjacency_matrix() in @c tma/code.cpp.
37		*/
38	24x	inline arma::mat connection_matrix(
39		arma::rowvec ground, arma::rowvec response,
40		double response_weight = 1.0, bool ordered = true
41		) {
42	24x	arma::mat resp_self = response.t() * response;
43	24x	arma::mat g_by_r = ground.t() * response;
44
45	24x	if (ordered) {
46	19x	resp_self.diag().zeros();
47	76x	return g_by_r + (0.5 * response_weight * resp_self);
48		}
49	10x	return (response_weight * resp_self) + g_by_r + (response.t() * ground);
50		}
51
52		/// @}
53
54		/// @name Traditional stanza-window accumulation (rENA model)
55		/// @{
56
57		/** @brief Accumulate connection vectors for every row in one conversation using
58		* a stanza window.
59		*
60		* For each focal row @c k in @p codes the function assembles a ground context
61		* from the surrounding window and calls connection_matrix().
62		*
63		* @par Ordering semantics
64		* - @b Undirected (@p ordered = @c false, default — rENA stanza model): the
65		* window spans [@c k - window_back, @c k + window_forward]; the upper-triangle
66		* outer-product (code_connections) is computed and back/forward-reference
67		* corrections are subtracted. Returns a matrix of shape
68		* @c n_rows × choose_two(n_codes).
69		* - @b Directed (@p ordered = @c true): focal row @c k is the response; the
70		* sum of prior rows [@c earliest, @c k−1] is the ground.
71		* connection_matrix() is called with @p ordered = @c true and the result is
72		* vectorised. @p window_forward is ignored (future rows cannot be causal
73		* ground context). Returns a matrix of shape @c n_rows × n_codes².
74		*
75		* @param[in] codes Code matrix for one conversation (n_rows × n_codes).
76		* @param[in] window_back Number of prior rows included in the window
77		* (1 = current row only; INT_MAX = all prior rows).
78		* @param[in] window_forward Number of future rows included in the window
79		* (ignored when @p ordered is @c true).
80		* @param[in] binary When @c true, clamp all positive entries to 1.
81		* @param[in] ordered When @c true, use directed accumulation; when
82		* @c false, use undirected stanza-window accumulation.
83		*
84		* @returns Connection matrix (n_rows × connection_vector_length) where
85		* connection_vector_length is choose_two(n_codes) for undirected or
86		* n_codes² for directed.
87		*
88		* @note Equivalent to @c ref_window_df() in @c rENA/ena.cpp (undirected case).
89		*/
90	10x	inline arma::mat accumulate_stanza(
91		arma::mat codes,
92		int window_back = 1,
93		int window_forward = 0,
94		bool binary = true,
95		bool ordered = false
96		) {
97	10x	int n_rows = codes.n_rows;
98	10x	int n_codes = codes.n_cols;
99	10x	const int INT_MAX_VAL = std::numeric_limits<int>::max();
100
101		// Shared helper: earliest row index for focal row k
102	33x	auto get_earliest = [&](int row) -> int {
103	33x	if (window_back == INT_MAX_VAL \|\| window_back == 0)
104	!	return (window_back == 0) ? row : 0;
105	33x	return std::max(0, row - (window_back - 1));
106		};
107
108	10x	if (ordered) {
109	5x	arma::mat out(n_rows, n_codes * n_codes, arma::fill::zeros);
110	20x	for (int row = 0; row < n_rows; row++) {
111	15x	int earliest = get_earliest(row);
112	30x	arma::rowvec response = codes.row(row);
113	15x	arma::rowvec ground(n_codes, arma::fill::zeros);
114	15x	if (row > earliest)
115	20x	ground = arma::sum(codes.rows(earliest, row - 1));
116	60x	out.row(row) = arma::vectorise(
117	45x	connection_matrix(ground, response, 1.0, true)).t();
118		}
119	8x	if (binary) out.elem(arma::find(out > 0)).ones();
120	5x	return out;
121		}
122
123		// Undirected: existing rENA stanza-window logic
124	5x	int n_tri = choose_two(n_codes);
125	5x	arma::mat out(n_rows, n_tri, arma::fill::zeros);
126
127	23x	for (int row = 0; row < n_rows; row++) {
128	18x	int earliest = get_earliest(row);
129
130	18x	int latest = row;
131	18x	if (window_forward == INT_MAX_VAL \|\| row + window_forward >= n_rows) {
132	!	latest = n_rows - 1;
133	18x	} else if (window_forward > 0) {
134	!	latest = std::min(n_rows - 1, row + window_forward);
135		}
136
137	36x	arma::mat window_rows = codes.rows(earliest, latest);
138	18x	arma::mat summed = arma::sum(window_rows);
139	18x	arma::rowvec to_ut = code_connections(summed);
140
141		// Back-reference correction
142	18x	int win_rows = latest - earliest + 1;
143	18x	if (win_rows > 0 && window_back > 1 && row - 1 >= 0) {
144	9x	int head_rows = win_rows - 1 - window_forward;
145	9x	if (head_rows > 0) {
146	9x	arma::mat refs = window_rows.head_rows(head_rows);
147	9x	arma::mat ref_sum = arma::sum(refs);
148	9x	to_ut -= code_connections(ref_sum);
149		}
150		}
151
152		// Forward-reference correction
153		if (window_forward > 0 && latest <= n_rows - 1) {
154	!	int tail_rows = latest - row;
155	!	if (tail_rows > 0) {
156	!	arma::mat refs = window_rows.tail_rows(tail_rows);
157	!	arma::mat ref_sum = arma::sum(refs);
158	!	to_ut -= code_connections(ref_sum);
159		}
160		}
161
162	36x	out.row(row) = to_ut;
163		}
164
165	8x	if (binary) out.elem(arma::find(out > 0)).ones();
166	5x	return out;
167		}
168
169		/// @}
170
171		/// @name Ground/response accumulation (tma model)
172		/// @{
173
174		/** @brief Accumulate connections for a single unit from its context rows.
175		*
176		* For every response row belonging to this unit, the function computes a
177		* weighted ground vector from all preceding context rows and calls
178		* connection_matrix().
179		*
180		* @par Ordering semantics
181		* When @p ordered is @c false (default), the result is folded into the
182		* upper-triangle representation (length choose_two(p)). When @p ordered is
183		* @c true, the full directed p² vector is returned.
184		*
185		* @param[in] codes Full context matrix visible to this unit (n × p).
186		* @param[in] unit_rows 0-based indices of the rows that belong to this unit.
187		* @param[in] decay_fn Callback with signature
188		* @c arma::vec(arma::vec distances) that maps a vector
189		* of distances (0 = current row, 1 = one step back, …)
190		* to a vector of scalar weights; default behaviour is a
191		* simple rectangular window.
192		* @param[in] ordered When @c true, return a directed flat vector (length p²);
193		* when @c false, return an undirected upper-triangle flat
194		* vector (length choose_two(p)).
195		*
196		* @returns Flat connection vector of length choose_two(p) (undirected) or p²
197		* (directed).
198		*
199		* @note Equivalent to @c accumulate_network() in @c tma/code.cpp, with the R
200		* function callback replaced by @c std::function.
201		*/
202	3x	inline arma::rowvec accumulate_unit(
203		const arma::mat& codes,
204		const std::vector<int>& unit_rows,
205		std::function<arma::vec(arma::vec)> decay_fn,
206		bool ordered = false
207		) {
208	3x	int code_cnt = codes.n_cols;
209	3x	arma::mat g_w_vec(code_cnt, code_cnt, arma::fill::zeros);
210
211	11x	for (int unit_row : unit_rows) {
212	8x	if (unit_row == 0) continue;
213
214	14x	arma::rowvec response = codes.row(unit_row);
215
216		// Ground rows: everything from 0 up to (and including) unit_row
217	7x	arma::uvec ground_indices = arma::regspace<arma::uvec>(0, 1, unit_row);
218	7x	arma::vec distances(ground_indices.n_elem);
219	32x	for (arma::uword k = 0; k < ground_indices.n_elem; k++)
220	75x	distances[k] = static_cast<double>(unit_row - ground_indices[k]);
221
222	7x	arma::vec decay_weights = decay_fn(distances);
223
224	7x	arma::mat ground_codes = codes.rows(ground_indices);
225	7x	arma::mat weighted = ground_codes.each_col() % decay_weights;
226	7x	arma::rowvec g_summed = arma::sum(weighted);
227
228		// Exclude the response row's own contribution from the ground
229	21x	arma::rowvec g_no_resp = g_summed - weighted.tail_rows(1).row(0);
230
231	7x	g_w_vec += connection_matrix(g_no_resp, response, 1.0, ordered);
232		}
233
234	3x	if (!ordered) return fold_directed_network(arma::vectorise(g_w_vec));
235	2x	return arma::vectorise(g_w_vec).t();
236		}
237
238		/// @}
239
240		/// @name Extended ground/response accumulation — returns per-row connection data
241		/// @{
242
243		/** @brief Result of accumulate_unit_with_rows().
244		*
245		* Bundles the unit-level flat connection vector together with the per-response-row
246		* full p² connection matrices needed by tma's @c accumulate_network().
247		*/
248		struct UnitNetworks {
249		arma::rowvec networks; ///< Flat connection vector (choose_two(p) or p²).
250		arma::mat row_networks; ///< Per-response-row full p² matrix (n_unit_rows × p²).
251		};
252
253		/** @brief Like accumulate_unit() but also returns the per-response-row connection
254		* matrix needed by tma's @c accumulate_network().
255		*
256		* @param[in] codes Full context matrix visible to this unit (n × p).
257		* @param[in] unit_rows 0-based indices of the rows that belong to this unit.
258		* @param[in] decay_fn Callback with signature
259		* @c arma::vec(int unit_row, arma::uvec ground_indices)
260		* that returns a weight vector of length
261		* @c ground_indices.n_elem. The two-argument form lets
262		* callers (e.g. the tma Rcpp wrapper) set R environment
263		* variables before invoking the actual R decay function,
264		* without any R-specific code leaking into libqe.
265		* @param[in] ordered When @c true, return directed flat vectors (length p²);
266		* when @c false, return undirected upper-triangle flat
267		* vectors (length choose_two(p)).
268		*
269		* @returns A UnitNetworks struct containing the aggregated connection vector
270		* and the per-response-row connection matrices.
271		*
272		* @note Equivalent to @c accumulate_network() in @c tma/code.cpp (extended form).
273		*/
274	!	inline UnitNetworks accumulate_unit_with_rows(
275		const arma::mat& codes,
276		const std::vector<int>& unit_rows,
277		std::function<arma::vec(int, arma::uvec)> decay_fn,
278		bool ordered = false
279		) {
280	!	int code_cnt = codes.n_cols;
281	!	int n_unit_rows = unit_rows.size();
282
283	!	arma::mat g_w_mat(code_cnt, code_cnt, arma::fill::zeros);
284	!	arma::mat row_networks(n_unit_rows, code_cnt * code_cnt, arma::fill::zeros);
285
286	!	for (int i = 0; i < n_unit_rows; ++i) {
287	!	int unit_row = unit_rows[i];
288
289	!	arma::uvec ground_indices = arma::regspace<arma::uvec>(0, 1, unit_row);
290	!	arma::vec decay_weights = decay_fn(unit_row, ground_indices);
291
292	!	arma::mat ground_codes = codes.rows(ground_indices);
293	!	arma::mat weighted = ground_codes.each_col() % decay_weights;
294	!	arma::rowvec g_summed = arma::sum(weighted);
295	!	arma::rowvec g_no_resp = g_summed - weighted.tail_rows(1).row(0);
296
297	!	arma::rowvec response = codes.row(unit_row);
298	!	arma::mat conn = connection_matrix(g_no_resp, response, 1.0, ordered);
299	!	g_w_mat += conn;
300	!	row_networks.row(i) = arma::vectorise(conn).t();
301		}
302
303	!	UnitNetworks result;
304	!	if (!ordered)
305	!	result.networks = fold_directed_network(arma::vectorise(g_w_mat));
306		else
307	!	result.networks = arma::vectorise(g_w_mat).t();
308	!	result.row_networks = row_networks;
309	!	return result;
310		}
311
312		/// @}
313
314		/// @name Tensor-based multi-modal accumulation (tma model)
315		/// @{
316
317		/** @brief Compute the linear index into a column-major multi-dimensional array.
318		*
319		* @param[in] indices Per-dimension index values (0-based).
320		* @param[in] dims Size of each dimension.
321		*
322		* @returns The scalar column-major flat index.
323		*
324		* @note Equivalent to @c flat_index() in @c tma/code.cpp.
325		*/
326	!	inline int flat_index(const std::vector<int>& indices,
327		const std::vector<int>& dims) {
328	!	if (indices.size() != dims.size())
329	!	throw std::invalid_argument("Number of indices must match number of dimensions.");
330	!	size_t linear = 0, stride = 1;
331	!	for (size_t v = 0; v < indices.size(); ++v) {
332	!	linear += static_cast<size_t>(indices[v]) * stride;
333	!	stride *= static_cast<size_t>(dims[v]);
334		}
335	!	return static_cast<int>(linear);
336		}
337
338		/** @brief Result of apply_tensor_unit().
339		*
340		* Bundles the unit-level aggregated connection vector together with the
341		* per-response-row connection matrices used by the tma tensor accumulation.
342		*/
343		struct TensorNetworks {
344		arma::rowvec connection_counts; ///< Unit-level flat vector (p²).
345		arma::mat row_connection_counts; ///< Per-response-row connection matrix (n_unit_rows × p²).
346		};
347
348		/** @brief Pure-C++ port of the tma @c apply_tensor() inner logic.
349		*
350		* For each response row in @p unit_rows, the function looks up window and
351		* weight values from the tensor, collects all ground rows that fall within the
352		* response row's window, and calls connection_matrix() to accumulate the
353		* connection counts.
354		*
355		* @param[in] tensor Flat column-major array containing window and
356		* weight values indexed by the factor dimensions.
357		* @param[in] dims Sizes of each dimension of @p tensor.
358		* @param[in] dims_sender Tensor axis indices corresponding to sender factors.
359		* @param[in] dims_receiver Tensor axis indices corresponding to receiver
360		* factors; these axes are overridden to the response
361		* row's values when looking up window sizes for
362		* ground rows.
363		* @param[in] dims_mode Tensor axis indices corresponding to mode factors.
364		* @param[in] context_lookup Integer matrix (n_context_rows × n_factors, 0-based)
365		* mapping each context row to its factor level indices.
366		* @param[in] unit_rows 0-based response-row indices for this unit.
367		* @param[in] codes Full context code matrix (n_context_rows × n_codes).
368		* @param[in] times Timestamp associated with each context row
369		* (length n_context_rows).
370		* @param[in] ordered When @c true, produce a directed full p² result;
371		* when @c false, produce an undirected upper-triangle
372		* result.
373		*
374		* @returns A TensorNetworks struct containing the unit-level connection counts
375		* and the per-response-row connection counts.
376		*
377		* @note Equivalent to the inner loop of @c apply_tensor() in @c tma/code.cpp.
378		*/
379	!	inline TensorNetworks apply_tensor_unit(
380		const arma::vec& tensor,
381		const std::vector<int>& dims,
382		const std::vector<int>& dims_sender,
383		const std::vector<int>& dims_receiver,
384		const std::vector<int>& dims_mode,
385		const arma::imat& context_lookup,
386		const std::vector<int>& unit_rows,
387		const arma::mat& codes,
388		const arma::vec& times,
389		bool ordered = true
390		) {
391	!	const int WINDOW_DIM = 1;
392	!	const int WEIGHT_DIM = 0;
393	!	const bool IS_DEFAULT = (dims.size() == 1 && dims[0] == 2);
394
395	!	int code_cnt = codes.n_cols;
396	!	int n_unit_rows = static_cast<int>(unit_rows.size());
397	!	int ctx_cols = static_cast<int>(context_lookup.n_cols);
398
399	!	arma::mat g_w_mat(code_cnt, code_cnt, arma::fill::zeros);
400	!	arma::mat row_conn(n_unit_rows, code_cnt * code_cnt, arma::fill::zeros);
401
402	!	int response_win = 0;
403	!	int response_weight = 0;
404	!	if (IS_DEFAULT) {
405	!	response_win = static_cast<int>(tensor[1]);
406	!	response_weight = static_cast<int>(tensor[0]);
407		}
408
409	!	for (int i = 0; i < n_unit_rows; ++i) {
410	!	int ri = unit_rows[i];
411	!	double response_time = times[ri];
412
413	!	std::vector<int> resp_ctx(ctx_cols + 1);
414	!	for (int j = 0; j < ctx_cols; ++j) resp_ctx[j] = context_lookup(ri, j);
415	!	resp_ctx[ctx_cols] = WINDOW_DIM;
416
417	!	if (!IS_DEFAULT)
418	!	response_win = static_cast<int>(tensor[flat_index(resp_ctx, dims)]);
419
420	!	std::vector<int> gri_v;
421	!	std::vector<double> grw_v;
422	!	arma::rowvec g_ws(code_cnt, arma::fill::zeros);
423
424	!	if (ri > 0) {
425	!	for (int gr = ri - 1; gr >= 0; --gr) {
426	!	std::vector<int> row_v(ctx_cols + 1);
427	!	for (int j = 0; j < ctx_cols; ++j) row_v[j] = context_lookup(gr, j);
428	!	row_v[ctx_cols] = WINDOW_DIM;
429	!	for (int dim : dims_receiver) row_v[dim] = resp_ctx[dim];
430
431	!	double row_win = static_cast<double>(response_win);
432	!	if (!IS_DEFAULT)
433	!	row_win = tensor[flat_index(row_v, dims)];
434
435	!	if (times[gr] + row_win > response_time) {
436	!	gri_v.push_back(gr);
437	!	row_v[ctx_cols] = WEIGHT_DIM;
438	!	double row_wgt = static_cast<double>(response_weight);
439	!	if (!IS_DEFAULT)
440	!	row_wgt = tensor[flat_index(row_v, dims)];
441	!	grw_v.push_back(row_wgt);
442		}
443		}
444
445	!	if (!gri_v.empty()) {
446	!	arma::uvec gri_u(gri_v.size());
447	!	for (size_t k = 0; k < gri_v.size(); ++k) gri_u[k] = gri_v[k];
448	!	arma::mat gc = codes.rows(gri_u);
449	!	arma::colvec wts = arma::conv_to<arma::colvec>::from(grw_v);
450	!	g_ws = arma::sum(gc.each_col() % wts, 0);
451		}
452		}
453
454	!	if (!IS_DEFAULT) {
455	!	resp_ctx[ctx_cols] = WEIGHT_DIM;
456	!	response_weight = static_cast<int>(tensor[flat_index(resp_ctx, dims)]);
457		}
458
459	!	arma::rowvec row_vec = codes.row(ri);
460		arma::mat resp = connection_matrix(
461	!	g_ws, row_vec, static_cast<double>(response_weight), ordered);
462	!	g_w_mat += resp;
463	!	row_conn.row(i) = arma::vectorise(resp).t();
464		}
465
466	!	TensorNetworks result;
467	!	result.connection_counts = arma::vectorise(g_w_mat).t();
468	!	result.row_connection_counts = row_conn;
469	!	return result;
470		}
471
472		/// @}
473
474		/// @name Per-row co-occurrence and rolling window (rENA accumulation primitives)
475		/// @{
476
477		/** @brief Compute the upper-triangle co-occurrence vector for each row.
478		*
479		* For each row, calls code_connections() and optionally binarizes the result.
480		*
481		* @param[in] codes Code matrix (n_rows × n_codes).
482		* @param[in] binary When @c true, clamp all positive entries to 1.
483		*
484		* @returns Matrix of shape n_rows × choose_two(n_codes).
485		*
486		* @note Equivalent to @c rows_to_co_occurrences() in @c rENA/ena.cpp.
487		*/
488	5x	inline arma::mat row_connections(
489		const arma::mat& codes,
490		bool binary = true
491		) {
492	5x	int n_rows = codes.n_rows;
493	5x	int n_tri = choose_two(codes.n_cols);
494	5x	arma::mat out(n_rows, n_tri, arma::fill::zeros);
495	16x	for (int row = 0; row < n_rows; ++row)
496	33x	out.row(row) = code_connections(codes.row(row));
497	8x	if (binary) out.elem(arma::find(out > 0)).ones();
498	5x	return out;
499		}
500
501		/** @brief Rolling backward window sum of the raw code matrix.
502		*
503		* For each focal row @c k, sums rows [@c max(0, k - window_size + 1), @c k].
504		* Returns a matrix of the same shape as @p codes — no upper-triangle transform
505		* is applied. A @p window_size of 0 or less is treated as 1 (current row only).
506		*
507		* @param[in] codes Code matrix (n_rows × n_codes).
508		* @param[in] window_size Number of rows to include in the backward window
509		* (including the focal row). Values <= 0 are clamped
510		* to 1.
511		*
512		* @returns Matrix of the same shape as @p codes containing the windowed sums.
513		*
514		* @note Equivalent to @c ref_window_lag() in @c rENA/ena.cpp.
515		*/
516	4x	inline arma::mat rolling_window_sum(
517		const arma::mat& codes,
518		int window_size = 1
519		) {
520	4x	int n_rows = codes.n_rows;
521	4x	arma::mat out(n_rows, codes.n_cols, arma::fill::zeros);
522	16x	for (int row = 0; row < n_rows; ++row) {
523		int earliest = (window_size > 0)
524		? std::max(0, row - (window_size - 1))
525	!	: row; // guard: window_size <= 0 -> single row
526	36x	out.row(row) = arma::sum(codes.rows(earliest, row));
527		}
528	4x	return out;
529		}
530
531		/// @}
532
533		} // namespace qe
534
535		#endif // LIBQE_ACCUMULATION_HPP

1		/**
2		* @file adjacency.hpp
3		* @brief Combinatorics and vector/matrix utilities for ENA connection networks.
4		*
5		* Pure C++ / Armadillo — no Rcpp dependency.
6		* Include @c <RcppArmadillo.h> before this header when building inside an R package.
7		*/
8		#ifndef LIBQE_ADJACENCY_HPP
9		#define LIBQE_ADJACENCY_HPP
10
11		#include <armadillo>
12		#include <string>
13		#include <vector>
14		#include <cmath>
15
16		namespace qe {
17
18		// ---------------------------------------------------------------------------
19		/// @name Combinatorics
20		/// @{
21		// ---------------------------------------------------------------------------
22
23		/**
24		* @brief Number of unordered pairs from @p n items (n choose 2).
25		*
26		* @param n Number of codes/nodes.
27		* @returns @c (n * (n - 1)) / 2.
28		*/
29	67x	inline int choose_two(int n) {
30	67x	return (n * (n - 1)) / 2;
31		}
32
33		/**
34		* @brief Upper-triangle index pairs for a symmetric matrix of side @p len.
35		*
36		* @param len Side length of the square matrix (number of codes).
37		* @param row Controls what is returned:
38		* - @c -1 (default): 2 × k matrix of [row_idx; col_idx].
39		* - @c 0: row indices only (1 × k).
40		* - @c 1: column indices only (1 × k).
41		* @returns An @c arma::umat of shape 2×k or 1×k depending on @p row.
42		* @note Equivalent to @c triIndices() in rENA/ena.cpp and tma/code.cpp.
43		*/
44	9x	inline arma::umat connection_indices(int len, int row = -1) {
45	9x	int vS = choose_two(len);
46	9x	int s = 0;
47
48	9x	arma::umat vR(2, vS, arma::fill::zeros);
49	9x	arma::umat vRone(1, vS, arma::fill::zeros);
50
51	45x	for (int i = 2; i <= len; i++) {
52	145x	for (int j = 0; j < i - 1; j++) {
53	109x	vR(0, s) = j;
54	109x	vR(1, s) = i - 1;
55	115x	if (row == 0) vRone[s] = j;
56	109x	else if (row == 1) vRone[s] = i - 1;
57	109x	s++;
58		}
59		}
60	18x	return (row == -1) ? vR : vRone;
61		}
62
63		/// @}
64
65		// ---------------------------------------------------------------------------
66		/// @name Vector ↔ upper-triangle conversions
67		/// @{
68		// ---------------------------------------------------------------------------
69
70		/**
71		* @brief Compute pairwise products and return as a flat upper-triangle vector.
72		*
73		* For each pair @c (j, i) with @c j < i, outputs @c v[j] * v[i].
74		* The result has length @c choose_two(v.size()).
75		*
76		* @param v A 1 × p row vector (or p-element matrix).
77		* @returns A row vector of length @c choose_two(p).
78		* @note Equivalent to @c vector_to_ut() in rENA/ena.cpp.
79		*/
80	40x	inline arma::rowvec code_connections(arma::mat v) {
81	40x	int vL = v.size();
82	40x	int vS = choose_two(vL);
83	40x	arma::rowvec out(vS, arma::fill::zeros);
84	40x	int s = 0;
85	130x	for (int i = 2; i <= vL; i++) {
86	240x	for (int j = 0; j < i - 1; j++) {
87	450x	out[s] = v[j] * v[i - 1];
88	150x	s++;
89		}
90		}
91	40x	return out;
92		}
93
94		/**
95		* @brief Fold a directed n² vector into an undirected upper-triangle vector.
96		*
97		* Symmetric elements are summed: A→B + B→A for every pair.
98		*
99		* @param v Flat directed vector of length n², column-major (n = sqrt(v.size())).
100		* @returns Upper-triangle row vector of length @c choose_two(n).
101		* @note Equivalent to @c vector_to_summed_uppertri() in tma/code.cpp.
102		*/
103	6x	inline arma::rowvec fold_directed_network(arma::vec v) {
104	6x	int n = static_cast<int>(std::round(std::sqrt(static_cast<double>(v.size()))));
105	6x	int tris = choose_two(n);
106
107	6x	arma::mat m(v);
108	6x	m.reshape(n, n);
109	12x	m = m + m.t();
110
111	6x	arma::colvec flat = m.as_col();
112	6x	arma::uvec up_inds = arma::trimatu_ind(arma::size(m), 1);
113	12x	return arma::conv_to<arma::rowvec>::from(flat.elem(up_inds));
114		}
115
116		/**
117		* @brief Flatten an adjacency matrix to a vector.
118		*
119		* @param x Square adjacency matrix (n × n).
120		* @param full If @c true (default), returns the full n² vector (directed).
121		* If @c false, returns the upper-triangle only (undirected).
122		* @returns Row vector of length n² or @c choose_two(n).
123		* @note Equivalent to @c adjacency_matrix_to_vector() in tma/code.cpp.
124		*/
125	2x	inline arma::rowvec network_to_vector(arma::mat x, bool full = true) {
126	3x	if (full) return arma::vectorise(x).t();
127	3x	arma::mat combined = arma::trimatu(x, 1) + arma::trimatl(x, -1);
128	1x	arma::uvec up_inds = arma::trimatu_ind(arma::size(combined), 1);
129	2x	return arma::vectorise(combined(up_inds)).t();
130		}
131
132		/// @}
133
134		// ---------------------------------------------------------------------------
135		/// @name String utilities
136		/// @{
137		// ---------------------------------------------------------------------------
138
139		/**
140		* @brief Generate "A & B" pair names for every upper-triangle position.
141		*
142		* For code names @c {"A", "B", "C"} returns @c {"A & B", "A & C", "B & C"}.
143		*
144		* @param v Ordered list of code names (length p).
145		* @returns Vector of @c choose_two(p) label strings.
146		* @note Equivalent to @c svector_to_ut() in rENA/ena.cpp.
147		*/
148	2x	inline std::vector<std::string> connection_names(std::vector<std::string> v) {
149	2x	int vL = v.size();
150	2x	int vS = choose_two(vL);
151	2x	std::vector<std::string> out(vS);
152	2x	int s = 0;
153	7x	for (int i = 2; i <= vL; i++) {
154	14x	for (int j = 0; j < i - 1; j++) {
155	9x	out[s] = v[j] + " & " + v[i - 1];
156	9x	s++;
157		}
158		}
159	2x	return out;
160		}
161
162		/// @}
163
164		} // namespace qe
165
166		#endif // LIBQE_ADJACENCY_HPP

1		/**
2		* @file generalized_rotation.hpp
3		* @brief Generalized Means Rotation (GMR) with Lasso-based covariate adjustment.
4		*
5		* @details
6		* Mirrors rENA's `ena.rotate.by.generalized()` + `gmr()` + `get_x1_main_effect()`.
7		* Authoritative reference: commit 46776a1981a90b3a3b2861ed1010e9dbb7acf901
8		* ("Fixed bugs in rotation functions") by Zhiqiang Cai.
9		*
10		* Algorithm overview
11		*
12		* 1. Compute x_vector: the rotation axis that best represents the target
13		* variable's contribution to the ENA point space, after controlling for
14		* covariates via Lasso (`lasso_x1_contribution`).
15		* 2. Deflate V by x_vector → defA = V - Vxx'.
16		* 3. Optionally compute secondary axis x1: the normalised group-mean
17		* difference in the original V space (not defA) for groups 0 vs 1.
18		* x1 stays empty when the target is not categorical or there are not
19		* exactly 2 groups — there is no SVD fallback.
20		* 4. Compute y_vector from a GMR call on defA, or from SVD of defA.
21		* Explicitly orthogonalise y_vector against x_vector, and against x1
22		* when x1 is available.
23		* 5. Double-deflate V by x_vector and y_vector, then call `complete_rotation()`
24		* to fill remaining dimensions from the SVD of the doubly-deflated space.
25		*
26		* Caller responsibilities
27		*
28		* - Build the model matrix (`X_raw`) from the data frame, including dummy
29		* variables for categorical predictors and any desired interaction terms.
30		* No formula parsing is done here.
31		* - Identify `x1_cols`: the 0-based column indices in `X_raw` that correspond to
32		* the target variable (these are unpenalized in the Lasso).
33		* - Encode categorical targets as 0-based integer codes in `x_target`.
34		* - Set `x_n_groups` to the number of distinct groups when `x_categorical=true`.
35		*
36		* The R wrapper (`rENA::ena.rotate.by.generalized`) handles these steps and
37		* calls `libqe::generalized_means_rotation()` with the prepared matrices.
38		*/
39
40		#ifndef LIBQE_GENERALIZED_ROTATION_HPP
41		#define LIBQE_GENERALIZED_ROTATION_HPP
42
43		#include "linalg_fallback.hpp" // qe::linalg::eig_sym (works w/o LAPACK)
44		#include "rotation.hpp" // ena_svd, complete_rotation, RotationResult
45		#include "lasso.hpp" // lasso_x1_contribution
46
47		#include <armadillo>
48		#include <stdexcept>
49		#include <string>
50		#include <vector>
51
52		namespace qe {
53
54		/// @name Between-group scatter
55		/// @{
56
57		/**
58		* @brief Compute the between-group scatter matrix S_B.
59		*
60		* @details
61		* Computes S_B = Σ_g n_g * (μ_g − μ)(μ_g − μ)' where μ is the grand mean.
62		*
63		* @note Mirrors rENA's `compute_SB()` (commit 46776a1981a90b3a3b2861ed1010e9dbb7acf901).
64		*
65		* @param[in] V n×p matrix of ENA points (or covariate-adjusted contribution).
66		* @param[in] labels n×1 0-based integer group labels.
67		* @param[in] n_groups Number of distinct groups.
68		*
69		* @returns p×p symmetric between-group scatter matrix.
70		*/
71	4x	inline arma::mat between_group_scatter(
72		const arma::mat& V,
73		const arma::uvec& labels,
74		arma::uword n_groups
75		) {
76	4x	const arma::uword p = V.n_cols;
77	4x	arma::rowvec mu = arma::mean(V, 0);
78	4x	arma::mat SB(p, p, arma::fill::zeros);
79
80	12x	for (arma::uword g = 0; g < n_groups; ++g) {
81	8x	arma::uvec idx = arma::find(labels == g);
82	8x	if (idx.is_empty()) continue;
83	21x	arma::vec diff = (arma::mean(V.rows(idx), 0) - mu).t();
84	14x	SB += static_cast<double>(idx.n_elem) * diff * diff.t();
85		}
86	8x	return SB;
87		}
88
89		/// @}
90
91		/// @name Rotation direction
92		/// @{
93
94		/**
95		* @brief Compute the unit rotation direction from a covariate-adjusted contribution matrix.
96		*
97		* @details
98		* Numeric target:
99		* Centres the target, then computes β = (t_c' t_c)^{-1} t_c' Vx (OLS slope from
100		* lm(Vx ~ target)). Mirrors R: `model = lm(Vx ~ target); beta = coefficients[2,]`.
101		*
102		* Categorical target:
103		* Computes the between-group scatter matrix S_B and returns its leading
104		* eigenvector (the direction of maximum between-group variance).
105		* Mirrors R: `sb = compute_SB(Vx, target); r = svd(sb)$v[,1]`.
106		*
107		* @note Mirrors rENA's `gmr()` direction computation
108		* (commit 46776a1981a90b3a3b2861ed1010e9dbb7acf901).
109		*
110		* @param[in] Vx n×q covariate-adjusted contribution matrix.
111		* @param[in] target n×1 target variable (numeric float, or 0-based integer codes).
112		* @param[in] categorical `true` if the target is categorical (uses between-group scatter);
113		* `false` for a numeric target (uses OLS slope).
114		* @param[in] n_groups Number of distinct groups; only used when `categorical` is `true`.
115		*
116		* @returns Unit vector (p×1) representing the rotation direction.
117		*
118		* @throws std::runtime_error If `categorical` is `true` and `n_groups < 2`.
119		* @throws std::runtime_error If the numeric target has zero variance after centering.
120		* @warning Throws `std::runtime_error` if the resulting direction has zero norm,
121		* which can occur when the Lasso-adjusted contribution collapses to zero.
122		*/
123	15x	inline arma::vec gmr_direction(
124		const arma::mat& Vx, // n×q covariate-adjusted contribution
125		const arma::vec& target, // n×1 (numeric float, or 0-based int codes)
126		bool categorical,
127		arma::uword n_groups = 0
128		) {
129	15x	arma::vec r;
130
131	15x	if (categorical) {
132	4x	if (n_groups < 2)
133		throw std::runtime_error(
134	!	"gmr_direction: x_n_groups must be >= 2 for categorical target");
135	4x	arma::uvec labels = arma::conv_to<arma::uvec>::from(target);
136	4x	arma::mat SB = between_group_scatter(Vx, labels, n_groups);
137
138	4x	arma::vec eigval; arma::mat eigvec;
139	4x	qe::linalg::eig_sym(eigval, eigvec, SB); // ascending order
140	8x	r = eigvec.col(eigvec.n_cols - 1); // largest eigenvalue → last col
141		} else {
142		// Centre target to get the OLS slope (mirrors R's lm() intercept handling)
143	22x	arma::vec tc = target - arma::mean(target);
144	11x	double tTt = arma::dot(tc, tc);
145	11x	if (tTt < 1e-12)
146		throw std::runtime_error(
147	!	"gmr_direction: target has zero variance after centering");
148	11x	arma::rowvec beta = (tc.t() * Vx) / tTt; // 1×q slope vector
149	11x	r = beta.t();
150		}
151
152	15x	double norm = arma::norm(r);
153	15x	if (norm < 1e-12)
154	!	throw std::runtime_error("gmr_direction: resulting direction has zero norm");
155	30x	return r / norm;
156		}
157
158		/// @}
159
160		/**
161		* @brief Result of one GMR axis step.
162		*/
163		struct GMRStepResult {
164		arma::vec direction; ///< Unit vector in connection space (p×1).
165		};
166
167		/// @name GMR axis step
168		/// @{
169
170		/**
171		* @brief Compute the rotation direction for one axis of the Generalized Means Rotation.
172		*
173		* @details
174		* Applies optional row subsetting, performs Lasso-based covariate adjustment
175		* (or a simple OLS projection when all columns are target columns), then
176		* delegates to `gmr_direction()` to produce a unit direction vector.
177		*
178		* When `x1_cols.n_elem >= p` (all columns are target columns and there are no
179		* penalized covariates), the function falls back to a simple OLS projection:
180		* Vx = fitted values from lm(V ~ t).
181		*
182		* @note Mirrors rENA's `gmr()` step logic
183		* (commit 46776a1981a90b3a3b2861ed1010e9dbb7acf901).
184		*
185		* @param[in] V n×q matrix of ENA points (full data; subset applied internally).
186		* @param[in] X_model n×p model matrix (caller-built; target cols + covariate cols).
187		* @param[in] target n×1 target variable (raw float or 0-based integer codes).
188		* @param[in] x1_cols 0-based indices of target columns in `X_model` (unpenalized in Lasso).
189		* @param[in] categorical `true` if the target is categorical.
190		* @param[in] n_groups Number of distinct groups (only used when `categorical` is `true`).
191		* @param[in] subset Optional 0-based row indices to use; empty means use all rows.
192		* @param[in] n_lambda Length of the Lasso lambda path (forwarded to `lasso_x1_contribution`).
193		* @param[in] k_folds Number of cross-validation folds (forwarded to `lasso_x1_contribution`).
194		* @param[in] lasso_eps lambda_min = lasso_eps * lambda_max (forwarded to `lasso_x1_contribution`).
195		*
196		* @returns `GMRStepResult` containing the unit direction vector.
197		*
198		* @throws std::runtime_error Propagated from `gmr_direction()` if the direction
199		* is degenerate or the target has zero variance.
200		*/
201	15x	inline GMRStepResult gmr_step(
202		const arma::mat& V,
203		const arma::mat& X_model,
204		const arma::vec& target,
205		const arma::uvec& x1_cols,
206		bool categorical,
207		arma::uword n_groups,
208		const arma::uvec& subset,
209		int n_lambda = 50,
210		int k_folds = 5,
211		double lasso_eps = 0.01
212		) {
213	15x	const arma::uword n = V.n_rows;
214	15x	const arma::uword p = X_model.n_cols;
215
216		// ── Select rows ──────────────────────────────────────────────────────────
217	15x	arma::uvec rows = subset.is_empty()
218	15x	? arma::regspace<arma::uvec>(0, n - 1)
219	15x	: subset;
220
221	15x	const arma::mat V_sub = V.rows(rows);
222	15x	const arma::mat X_sub = X_model.rows(rows);
223	30x	const arma::vec t_sub = target.rows(rows);
224
225		// ── Lasso-adjusted contribution Vx ───────────────────────────────────────
226	15x	arma::mat Vx_sub;
227	15x	if (x1_cols.n_elem >= p) {
228		// All columns are target columns (no penalized covariates):
229		// fall back to simple OLS projection (Vx = fitted values from lm(V~t))
230	30x	arma::vec tc = t_sub - arma::mean(t_sub);
231	15x	double tTt = arma::dot(tc, tc);
232	15x	if (tTt > 1e-12) {
233	14x	arma::rowvec b1 = (tc.t() * V_sub) / tTt;
234	14x	Vx_sub = tc * b1;
235		} else {
236	1x	Vx_sub.zeros(V_sub.n_rows, V_sub.n_cols);
237		}
238		} else {
239	!	arma::vec pf(p, arma::fill::ones);
240	!	for (arma::uword j : x1_cols) pf(j) = 0.0;
241
242	!	Vx_sub = lasso_x1_contribution(
243	!	X_sub, V_sub, x1_cols, pf, n_lambda, k_folds, lasso_eps);
244		}
245
246		// ── Rotation direction ───────────────────────────────────────────────────
247	15x	arma::vec direction = gmr_direction(Vx_sub, t_sub, categorical, n_groups);
248
249	30x	return {direction};
250		}
251
252		/// @}
253
254		/**
255		* @brief Parameters for `generalized_means_rotation()`.
256		*
257		* @details
258		* Groups all inputs needed to specify both the X and Y rotation axes, along
259		* with Lasso tuning knobs. The Y axis is optional: when `has_y` is `false`
260		* the second axis is taken from the leading SVD of the x-deflated space.
261		*/
262		struct GeneralizedRotationParams {
263		/// @name X axis (required)
264		/// @{
265		arma::mat x_model_matrix; ///< n×p model matrix for the X axis.
266		arma::vec x_target; ///< n×1 target variable (raw float or 0-based integer codes).
267		arma::uvec x1_cols; ///< 0-based target column indices in `x_model_matrix`.
268		bool x_categorical = false; ///< `true` if the X target is categorical.
269		arma::uword x_n_groups = 0; ///< Number of distinct groups (used when `x_categorical` is `true`).
270		arma::uvec x_subset; ///< Optional row subset (0-based); empty means use all rows.
271		/// @}
272
273		/// @name Y axis (optional)
274		/// @{
275		/// When `has_y` is `false`, the Y axis is taken from the leading SVD of the x-deflated space.
276		bool has_y = false; ///< `true` if a Y-axis GMR target is provided.
277		arma::mat y_model_matrix; ///< n×p model matrix for the Y axis.
278		arma::vec y_target; ///< n×1 target variable for the Y axis.
279		arma::uvec y1_cols; ///< 0-based target column indices in `y_model_matrix`.
280		bool y_categorical = false; ///< `true` if the Y target is categorical.
281		arma::uword y_n_groups = 0; ///< Number of distinct groups for the Y axis.
282		/// The Y axis always uses all rows (no subset parameter).
283		/// @}
284
285		/// @name Lasso tuning
286		/// @{
287		int n_lambda = 50; ///< Length of the lambda path.
288		int k_folds = 5; ///< Number of cross-validation folds.
289		double lasso_eps = 0.01; ///< lambda_min = lasso_eps * lambda_max.
290		/// @}
291		};
292
293		/// @name Generalized Means Rotation
294		/// @{
295
296		/**
297		* @brief Full Generalized Means Rotation (GMR) producing a complete q×q rotation matrix.
298		*
299		* @details
300		* C++/Armadillo implementation of `ena.rotate.by.generalized()`, matching the
301		* authoritative R implementation in commit 46776a1981a90b3a3b2861ed1010e9dbb7acf901
302		* by Zhiqiang Cai.
303		*
304		* Steps performed:
305		*
306		* 1. X axis via GMR — calls `gmr_step()` on V with the X-axis parameters.
307		* 2. Deflate V — computes defA = V − V·x·x' for subsequent Y-axis work.
308		* 3. Secondary axis x1 — when the X target is categorical with ≥ 2 groups,
309		* computes x1 = normalize(mean(V[group0]) − mean(V[group1])) using the
310		* original V (not defA) and the full-length target (all n rows).
311		* x1 is used only to orthogonalise y_vector; it does not deflate defA.
312		* There is no SVD fallback: x1 stays empty if the condition is not met.
313		* 4. Y axis — either GMR on defA (when `params.has_y` is `true`) or the
314		* leading SVD direction of defA.
315		* 5. Orthogonalise y_vector — mirrors R:
316		* `y_vector <- y_vector - (t(y_vector) %% x_vector) x_vector`
317		* and, when x1 is available,
318		* `y_vector <- y_vector - (t(y_vector) %% x1) x1`,
319		* followed by safe normalisation.
320		* 6. Complete with SVD — double-deflates V by both x_vector and y_vector,
321		* then calls `complete_rotation()` to fill remaining dimensions.
322		*
323		* @note Mirrors rENA's `ena.rotate.by.generalized()`
324		* (commit 46776a1981a90b3a3b2861ed1010e9dbb7acf901).
325		*
326		* @param[in] V n×q matrix of ENA points (line weights; caller normalises/
327		* centres upstream).
328		* @param[in] params Fully populated `GeneralizedRotationParams` struct.
329		*
330		* @returns `RotationResult` with:
331		* - `rotation` q×q full rotation matrix (column j = axis j).
332		* - `eigenvalues` q×1 variance per SVD-filled axis (named axes carry 0).
333		* - `column_names` "GMR1", "GMR2" or "SVD2", "SVD3", …, "SVD{q}".
334		*
335		* @throws std::runtime_error If `gmr_direction()` produces a degenerate direction
336		* for the X or Y axis.
337		* @throws std::runtime_error If y_vector has zero norm after orthogonalisation
338		* (can happen when defA is rank-deficient or perfectly aligned with x_vector).
339		*/
340	13x	inline RotationResult generalized_means_rotation(
341		const arma::mat& V,
342		const GeneralizedRotationParams& params
343		) {
344		// ── Step 1: X axis via GMR ────────────────────────────────────────────
345		GMRStepResult x_gmr = gmr_step(
346		V,
347	13x	params.x_model_matrix,
348	13x	params.x_target,
349	13x	params.x1_cols,
350	13x	params.x_categorical,
351	13x	params.x_n_groups,
352	13x	params.x_subset,
353	13x	params.n_lambda,
354	13x	params.k_folds,
355	13x	params.lasso_eps);
356
357	13x	const arma::vec x_vector = x_gmr.direction;
358
359		// ── Step 2: Deflate V by x_vector ─────────────────────────────────────
360		// defA is used only for y_vector computation; x1 uses original V.
361	26x	const arma::mat defA = V - V * x_vector * x_vector.t();
362
363		// ── Step 3: Secondary axis x1 ─────────────────────────────────────────
364		// Mirrors R: x1 = normalize(mean(V[group0]) - mean(V[group1])).
365		//
366		// Key points matching the authoritative R:
367		// - Uses original V (not defA)
368		// - Uses the full-length target (all n rows), not just the subset
369		// - Groups are always 0 (first) and 1 (second) in 0-based encoding
370		// - NO fallback: x1 stays empty when categorical/2-group condition fails
371		// - x1 does NOT deflate defA; it is only used to orthogonalise y_vector
372	13x	arma::vec x1;
373
374	13x	if (params.x_categorical && params.x_n_groups >= 2) {
375	4x	arma::uvec labels = arma::conv_to<arma::uvec>::from(params.x_target);
376	4x	arma::uvec g0 = arma::find(labels == 0);
377	4x	arma::uvec g1 = arma::find(labels == 1);
378
379		if (!g0.is_empty() && !g1.is_empty()) {
380	4x	arma::rowvec m0 = arma::mean(V.rows(g0), 0); // original V, not defA
381	4x	arma::rowvec m1 = arma::mean(V.rows(g1), 0);
382	4x	arma::vec diff = (m0 - m1).t();
383	4x	double dlen = arma::norm(diff);
384	8x	if (dlen > 1e-10) x1 = diff / dlen;
385		}
386		}
387
388		// ── Step 4: Y axis ────────────────────────────────────────────────────
389		// GMR on defA (deflated by x_vector only), or SVD of defA.
390	13x	arma::vec y_vector;
391	13x	std::string y_name = "SVD2";
392
393	13x	if (params.has_y) {
394		GMRStepResult y_gmr = gmr_step(
395		defA,
396	2x	params.y_model_matrix,
397	2x	params.y_target,
398	2x	params.y1_cols,
399	2x	params.y_categorical,
400	2x	params.y_n_groups,
401		{}, // no row subset for y axis
402	2x	params.n_lambda,
403	2x	params.k_folds,
404	2x	params.lasso_eps);
405	2x	y_vector = y_gmr.direction;
406	2x	y_name = "GMR2";
407		} else {
408	11x	RotationResult svd_r = ena_svd(defA);
409	11x	y_vector = svd_r.rotation.col(0);
410		}
411
412		// ── Step 5: Orthogonalise y_vector ────────────────────────────────────
413		// Mirrors R (authoritative commit):
414		// y_vector <- y_vector - (t(y_vector) %% x_vector) x_vector
415		// if (!is.null(x1)) y_vector <- y_vector - (t(y_vector) %% x1) x1
416		// y_vector <- safe_normalize(y_vector)
417	26x	y_vector -= arma::dot(y_vector, x_vector) * x_vector;
418	13x	if (!x1.is_empty())
419	8x	y_vector -= arma::dot(y_vector, x1) * x1;
420
421	13x	double yn = arma::norm(y_vector);
422	13x	if (yn < 1e-12)
423		throw std::runtime_error(
424	!	"generalized_means_rotation: y_vector has zero norm after orthogonalization");
425	13x	y_vector /= yn;
426
427		// ── Step 6: Combine and complete with SVD ─────────────────────────────
428		// Double-deflate the original data (mirrors R):
429		// defA <- A - A %% x %% t(x) - A %% y %% t(y)
430		// then pass to prcomp (equivalent: SVD of defA_final).
431		arma::mat defA_final = V
432	13x	- V * x_vector * x_vector.t()
433	26x	- V * y_vector * y_vector.t();
434
435	13x	arma::mat named_axes = arma::join_rows(x_vector, y_vector);
436	65x	return complete_rotation(defA_final, named_axes, {"GMR1", y_name});
437		}
438
439		/// @}
440
441		} // namespace qe
442		#endif // LIBQE_GENERALIZED_ROTATION_HPP

1		/**
2		* @file lasso.hpp
3		* @brief Coordinate-descent Lasso for multivariate response with k-fold CV.
4		*
5		* Mirrors `glmnet(x, y, family = "mgaussian", penalty.factor = ...)` with
6		* k-fold cross-validation to select lambda, and warm-starts along the lambda
7		* path for efficiency.
8		*
9		* The public entry point is qe::lasso_x1_contribution(), which returns an
10		* n×q matrix of fitted values attributable to the "target" predictors at the
11		* CV-selected lambda. It is used by `generalized_means_rotation` to isolate
12		* the target variable's contribution to the ENA point space after controlling
13		* for covariates.
14		*
15		* Penalty factors (`pf`):
16		* - `0.0` — predictor is always included (forced in; no shrinkage)
17		* - `1.0` — standard L1 penalty
18		*
19		* The caller is responsible for building the model matrix (main effects and/or
20		* interactions); this function accepts a numeric matrix, not a formula.
21		*/
22
23		#ifndef LIBQE_LASSO_HPP
24		#define LIBQE_LASSO_HPP
25
26		#include <armadillo>
27		#include <cmath>
28		#include <limits>
29		#include <stdexcept>
30
31		namespace qe {
32
33		/**
34		* @brief Soft-threshold (shrinkage) operator used by coordinate descent.
35		*
36		* @details Computes the scalar soft-threshold function:
37		* \f$ S(z,\gamma) = \operatorname{sign}(z)\,\max(\|z\|-\gamma,\,0) \f$
38		*
39		* @param[in] z Input value.
40		* @param[in] gamma Threshold amount (must be non-negative for meaningful use).
41		* @returns Soft-thresholded value; exactly zero when `\|z\| <= gamma`.
42		*/
43	!	inline double soft_threshold(double z, double gamma) noexcept {
44	!	return z > gamma ? z - gamma : z < -gamma ? z + gamma : 0.0;
45		}
46
47		/**
48		* @brief Single-response Lasso via cyclic coordinate descent at a fixed lambda.
49		*
50		* @details Fits a Lasso regression for a single response vector using cyclic
51		* coordinate descent (naive updates). The partial-residual update for
52		* predictor \f$j\f$ is:
53		* \f[
54		* z_j = \frac{X_j^\top r}{n} + \frac{X_j^\top X_j}{n}\,\beta_j
55		* \f]
56		* \f[
57		* \beta_j \leftarrow \frac{S\!\left(z_j,\;\lambda\,\mathit{pf}_j\right)}{X_j^\top X_j / n}
58		* \f]
59		* where \f$r = y - X\beta\f$ is the residual maintained incrementally.
60		* Predictors with \f$\mathit{pf}_j = 0\f$ skip the soft-threshold and receive
61		* the unconstrained OLS update instead.
62		*
63		* @param[in] X n×p column-standardized predictor matrix.
64		* @param[in] y n×1 response vector (centering recommended but not required).
65		* @param[in] lambda Regularization strength, on the standardized predictor scale.
66		* @param[in] pf p×1 penalty factors (0 = unpenalized / forced in, 1 = standard L1).
67		* @param[in] beta0 Warm-start coefficient vector; pass an empty `arma::vec{}`
68		* for a cold start from zero.
69		* @param[in] max_iter Maximum number of coordinate-descent passes (default 1000).
70		* @param[in] tol Convergence tolerance on the maximum coefficient change
71		* per pass (default 1e-7).
72		* @returns p×1 coefficient vector on the standardized predictor scale.
73		*/
74	!	inline arma::vec lasso_cd(
75		const arma::mat& X,
76		const arma::vec& y,
77		double lambda,
78		const arma::vec& pf,
79		arma::vec beta0 = {},
80		int max_iter = 1000,
81		double tol = 1e-7
82		) {
83	!	const arma::uword p = X.n_cols;
84	!	const double n = static_cast<double>(X.n_rows);
85
86		// Precompute X_j'X_j / n for each predictor
87	!	arma::vec xTx(p);
88	!	for (arma::uword j = 0; j < p; ++j)
89	!	xTx(j) = arma::dot(X.col(j), X.col(j)) / n;
90
91	!	arma::vec beta = beta0.is_empty()
92	!	? arma::vec(p, arma::fill::zeros)
93	!	: beta0;
94	!	arma::vec r = y - X * beta; // residual, maintained incrementally
95
96	!	for (int it = 0; it < max_iter; ++it) {
97	!	double max_delta = 0.0;
98
99	!	for (arma::uword j = 0; j < p; ++j) {
100	!	if (xTx(j) < 1e-14) continue; // skip zero-variance predictor
101
102		// Partial correlation statistic
103	!	double z = arma::dot(X.col(j), r) / n + xTx(j) * beta(j);
104
105	!	double b_new = (pf(j) < 1e-12)
106	!	? z / xTx(j) // forced in: OLS
107	!	: soft_threshold(z, lambda * pf(j)) / xTx(j); // L1
108
109	!	double delta = b_new - beta(j);
110	!	if (std::abs(delta) > max_delta) max_delta = std::abs(delta);
111	!	r -= X.col(j) * delta;
112	!	beta(j) = b_new;
113		}
114
115	!	if (max_delta < tol) break;
116		}
117	!	return beta;
118		}
119
120		/**
121		* @brief Compute the smallest lambda that drives all penalized coefficients to zero.
122		*
123		* @details For column-standardized `X` and column-centered `Yc`, lambda_max is:
124		* \f[
125		* \lambda_{\max} = \max_{\substack{j:\,\mathit{pf}_j > 0 \\ k}} \frac{\|X_j^\top Y_{c,k}\|}{n}
126		* \f]
127		* Above this value every penalized coefficient is exactly zero (unpenalized
128		* predictors are unaffected and remain in via OLS regardless of lambda).
129		*
130		* @param[in] X n×p column-standardized predictor matrix.
131		* @param[in] Yc n×q column-centered response matrix.
132		* @param[in] pf p×1 penalty factors; predictors with `pf(j) < 1e-12` are skipped.
133		* @returns Scalar lambda_max value.
134		*/
135	!	inline double lasso_lambda_max(
136		const arma::mat& X, // column-standardized
137		const arma::mat& Yc, // column-centered responses
138		const arma::vec& pf
139		) {
140	!	const double n = static_cast<double>(X.n_rows);
141	!	double lmax = 0.0;
142	!	for (arma::uword j = 0; j < X.n_cols; ++j) {
143	!	if (pf(j) < 1e-12) continue; // skip unpenalized predictors
144	!	for (arma::uword k = 0; k < Yc.n_cols; ++k) {
145	!	double v = std::abs(arma::dot(X.col(j), Yc.col(k))) / n;
146	!	if (v > lmax) lmax = v;
147		}
148		}
149	!	return lmax;
150		}
151
152		/**
153		* @brief Cross-validated multivariate Lasso; returns fitted values for target columns only.
154		*
155		* @details Fits a multivariate Lasso (one independent single-response problem per
156		* response column) along a log-spaced lambda path from lambda_max down to
157		* `eps * lambda_max`. Lambda is selected by k-fold cross-validation
158		* (mean squared error, averaged over folds and response columns), and the
159		* model is refit on the full data at the selected lambda using warm-starts
160		* along the path.
161		*
162		* Preprocessing performed internally:
163		* - `X_raw` is column-standardized to zero mean and unit variance (zero-variance
164		* columns are left as-is to avoid division by zero).
165		* - `Y` is column-centered to absorb the intercept.
166		* - Coefficients are unstandardized before the contribution is computed.
167		*
168		* Fold assignment: row \f$i\f$ is assigned to fold \f$i \bmod k\f$
169		* (deterministic, order-dependent).
170		*
171		* Return value: The n×q matrix \f$X_{\text{raw},x_1} \hat{B}_{x_1}\f$
172		* represents the portion of `Y` (mean-removed) that is linearly explained by
173		* the target predictors after penalizing covariate coefficients. A zero
174		* matrix is returned when the target predictors carry no signal at any lambda
175		* (e.g., the target is constant after subsetting). Constant shifts cancel
176		* in the downstream between-group scatter (categorical) and slope regression
177		* (numeric), so no `ymu` adjustment is applied to the return value.
178		*
179		* @param[in] X_raw n×p raw model matrix built by the caller (main effects
180		* and/or interactions); no formula parsing is done here.
181		* @param[in] Y n×q ENA point matrix (multivariate response).
182		* @param[in] x1_cols 0-based column indices in `X_raw` that belong to the
183		* target variable; these receive `pf = 0` (always included).
184		* @param[in] pf p×1 penalty factors: 0 for `x1_cols`, 1 elsewhere.
185		* The caller constructs this; non-standard weightings are
186		* supported.
187		* @param[in] n_lambda Length of the lambda path (default 50).
188		* @param[in] k_folds Number of cross-validation folds (default 5; clamped to
189		* `[2, n/2]`).
190		* @param[in] eps Ratio `lambda_min / lambda_max`; controls how far down
191		* the path to search (default 0.01).
192		* @returns n×q matrix of fitted values attributable to `x1_cols` at the
193		* CV-selected lambda (lambda.min), on the original `Y` scale.
194		*/
195	!	inline arma::mat lasso_x1_contribution(
196		const arma::mat& X_raw,
197		const arma::mat& Y,
198		const arma::uvec& x1_cols,
199		const arma::vec& pf,
200		int n_lambda = 50,
201		int k_folds = 5,
202		double eps = 0.01
203		) {
204	!	const arma::uword n = X_raw.n_rows;
205	!	const arma::uword p = X_raw.n_cols;
206	!	const arma::uword q = Y.n_cols;
207
208		// Guard: need at least 2 folds and 2 observations per fold
209	!	k_folds = std::max(2, std::min(k_folds, static_cast<int>(n) / 2));
210
211		// ── Standardize X (μ=0, σ=1); guard zero-variance predictors ────────────
212	!	arma::rowvec xmu = arma::mean(X_raw, 0);
213	!	arma::rowvec xstd = arma::stddev(X_raw, 0, 0);
214	!	xstd.elem(arma::find(xstd < 1e-12)).ones(); // avoid divide-by-zero
215	!	arma::mat X = (X_raw.each_row() - xmu).each_row() / xstd;
216
217		// ── Center Y (absorbs intercept into the standardized-scale fit) ─────────
218	!	arma::rowvec ymu = arma::mean(Y, 0);
219	!	arma::mat Yc = Y.each_row() - ymu;
220
221		// ── Build lambda path: lambda_max → lambda_min on log scale ─────────────
222	!	double lmax = lasso_lambda_max(X, Yc, pf);
223	!	if (lmax < 1e-12) // target uncorrelated with Y after standardization
224	!	return arma::mat(n, q, arma::fill::zeros);
225
226	!	arma::vec lambdas = arma::exp(
227	!	arma::linspace(std::log(lmax),
228		std::log(eps * lmax),
229	!	n_lambda));
230
231		// ── Stratified k-fold CV (fold assignment: row i → fold i % k_folds) ────
232	!	arma::uvec fold_id(n);
233	!	for (arma::uword i = 0; i < n; ++i)
234	!	fold_id(i) = i % static_cast<arma::uword>(k_folds);
235
236	!	arma::vec cv_err(n_lambda, arma::fill::zeros);
237
238	!	for (int fold = 0; fold < k_folds; ++fold) {
239	!	arma::uvec tr = arma::find(fold_id != static_cast<arma::uword>(fold));
240	!	arma::uvec va = arma::find(fold_id == static_cast<arma::uword>(fold));
241
242	!	const arma::mat X_tr = X.rows(tr), X_va = X.rows(va);
243	!	const arma::mat Y_tr = Yc.rows(tr), Y_va = Yc.rows(va);
244	!	const double n_va_q = static_cast<double>(va.n_elem * q);
245
246		// Warm-start along path for this fold (start cold at lambda_max)
247	!	arma::mat bw(p, q, arma::fill::zeros);
248	!	for (int l = 0; l < n_lambda; ++l) {
249	!	arma::mat b(p, q);
250	!	for (arma::uword k = 0; k < q; ++k)
251	!	b.col(k) = lasso_cd(X_tr, Y_tr.col(k), lambdas(l), pf, bw.col(k));
252	!	bw = b;
253
254	!	arma::mat err = Y_va - X_va * b;
255	!	cv_err(l) += arma::accu(err % err) / n_va_q;
256		}
257		}
258	!	cv_err /= static_cast<double>(k_folds);
259
260		// ── Refit on full data at lambda_min, warming along the path ─────────────
261	!	arma::uword best = cv_err.index_min();
262	!	arma::mat bfull(p, q, arma::fill::zeros);
263	!	for (arma::uword l = 0; l <= best; ++l) {
264	!	arma::mat b(p, q);
265	!	for (arma::uword k = 0; k < q; ++k)
266	!	b.col(k) = lasso_cd(X, Yc.col(k), lambdas(l), pf, bfull.col(k));
267	!	bfull = b;
268		}
269
270		// ── Unstandardize: beta_orig[j] = beta_std[j] / xstd[j] ─────────────────
271	!	for (arma::uword j = 0; j < p; ++j)
272	!	bfull.row(j) /= xstd(j);
273
274		// ── Return fitted contribution from x1 columns only ──────────────────────
275		// This is the portion of Y (mean-removed) linearly explained by the target
276		// predictors after covariate adjustment. Constant shifts cancel in both
277		// the between-group scatter (categorical) and slope regression (numeric)
278		// that follow, so no ymu adjustment is needed here.
279	!	return X_raw.cols(x1_cols) * bfull.rows(x1_cols);
280		}
281
282		} // namespace qe
283		#endif // LIBQE_LASSO_HPP

1		/** @file linalg_fallback.hpp
2		* @brief Thin LAPACK-free implementations of SVD, QR, and symmetric
3		* eigendecomposition for use in WASM / no-LAPACK builds.
4		*
5		* When @c ARMA_USE_LAPACK is defined these wrappers delegate directly to the
6		* standard Armadillo routines (@c arma::svd, @c arma::qr, @c arma::eig_sym),
7		* so there is zero overhead in normal builds. When LAPACK is not available
8		* (e.g. Emscripten WASM builds) the wrappers fall back to pure C++
9		* implementations that work for the small matrices typical in ENA
10		* (p ≤ ~100 connection dimensions).
11		*
12		* All routines live in namespace @c qe::linalg and operate on @c arma::mat
13		* (double precision).
14		*
15		* Fallback algorithms:
16		* - SVD — one-sided Jacobi iterations on \f$A^T\f$ (Demmel & Veselić 1992)
17		* - QR — modified Gram-Schmidt
18		* - eig_sym — cyclic Jacobi (classical Jacobi sweep)
19		*/
20
21		#ifndef LIBQE_LINALG_FALLBACK_HPP
22		#define LIBQE_LINALG_FALLBACK_HPP
23
24		#include <armadillo>
25		#include <cmath>
26		#include <limits>
27
28		namespace qe {
29		namespace linalg {
30
31		/// @cond INTERNAL
32		// ── helpers ───────────────────────────────────────────────────────────────────
33
34		namespace detail {
35
36		/// @brief Modified Gram-Schmidt orthonormalization applied in-place.
37		///
38		/// Columns of @p Q are successively orthonormalized against all previously
39		/// processed columns. A column whose norm falls below 1e-14 after projection
40		/// is left unchanged (not zeroed) to avoid introducing NaNs.
41		///
42		/// @param Q Matrix whose columns are orthonormalized in-place (modified).
43		inline void mgs(arma::mat& Q) {
44		const arma::uword p = Q.n_cols;
45		for (arma::uword j = 0; j < p; ++j) {
46		for (arma::uword i = 0; i < j; ++i)
47		Q.col(j) -= arma::dot(Q.col(i), Q.col(j)) * Q.col(i);
48		double n = arma::norm(Q.col(j));
49		if (n > 1e-14) Q.col(j) /= n;
50		}
51		}
52
53		/// @brief One cyclic Jacobi sweep on a p×p symmetric matrix.
54		///
55		/// Iterates over all super-diagonal index pairs @c (q,r) and applies a
56		/// two-sided Jacobi rotation that annihilates @c A(q,r). The same rotation
57		/// is accumulated in @p V so that, starting with @c V = I, the full sequence
58		/// of sweeps yields @c V = eigenvector matrix.
59		///
60		/// @param A Symmetric p×p matrix, updated in-place toward diagonal form.
61		/// @param V Rotation accumulator (p×p); initialize to identity to recover
62		/// eigenvectors.
63		///
64		/// @note Algorithm follows the classical cyclic-by-rows Jacobi scheme.
65		/// Off-diagonal entries no larger than 1e-14 × (\|a_qq\| + \|a_rr\|) are
66		/// skipped for numerical stability. The `<=` (not `<`) guard correctly
67		/// handles the all-zero case (a_qq = a_rr = a_qr = 0) that would
68		/// otherwise produce NaN via 0/0 in the theta computation.
69		inline void jacobi_sweep(arma::mat& A, arma::mat& V) {
70		const arma::uword p = A.n_rows;
71		for (arma::uword q = 0; q < p - 1; ++q) {
72		for (arma::uword r = q + 1; r < p; ++r) {
73		double aqq = A(q, q), arr = A(r, r), aqr = A(q, r);
74		if (std::abs(aqr) <= 1e-14 * (std::abs(aqq) + std::abs(arr)))
75		continue;
76		double theta = (arr - aqq) / (2.0 * aqr);
77		double t = (theta >= 0.0)
78		? 1.0 / (theta + std::sqrt(1.0 + theta * theta))
79		: 1.0 / (theta - std::sqrt(1.0 + theta * theta));
80		double c = 1.0 / std::sqrt(1.0 + t * t); ///< cosine of Jacobi rotation angle
81		double s = t * c; ///< sine of Jacobi rotation angle
82		// Apply Jacobi rotation to A (symmetric update)
83		double new_qq = aqq - t * aqr;
84		double new_rr = arr + t * aqr;
85		A(q, q) = new_qq; A(r, r) = new_rr; A(q, r) = A(r, q) = 0.0;
86		for (arma::uword k = 0; k < p; ++k) {
87		if (k == q \|\| k == r) continue;
88		double akq = A(k, q), akr = A(k, r);
89		A(k, q) = A(q, k) = c * akq - s * akr;
90		A(k, r) = A(r, k) = s * akq + c * akr;
91		}
92		// Accumulate rotation in V
93		for (arma::uword k = 0; k < p; ++k) {
94		double vkq = V(k, q), vkr = V(k, r);
95		V(k, q) = c * vkq - s * vkr;
96		V(k, r) = s * vkq + c * vkr;
97		}
98		}
99		}
100		}
101
102		} // namespace detail
103
104		/// @endcond
105
106		/// @name Eigendecomposition
107		/// @{
108
109		// ── eig_sym ───────────────────────────────────────────────────────────────────
110
111		/// @brief Eigendecomposition of a real symmetric matrix.
112		///
113		/// Computes eigenvalues and eigenvectors of the symmetric p×p matrix @p S and
114		/// returns them sorted in ascending eigenvalue order.
115		///
116		/// @param[out] eigval p×1 vector of eigenvalues, ascending.
117		/// @param[out] eigvec p×p matrix whose columns are the corresponding
118		/// orthonormal eigenvectors.
119		/// @param[in] S Real symmetric p×p input matrix (not modified).
120		///
121		/// @warning With @c ARMA_USE_LAPACK defined this calls @c arma::eig_sym
122		/// directly. Without LAPACK the fallback runs up to @c 50*p cyclic
123		/// Jacobi sweeps; convergence is guaranteed for well-conditioned
124		/// matrices but may be slow for p > 200.
125		///
126		/// @note Fallback algorithm: classical cyclic-by-rows Jacobi iteration.
127		/// Convergence is declared when the Frobenius norm of the strictly
128		/// off-diagonal part drops below 1e-13.
129	4x	inline void eig_sym(arma::vec& eigval, arma::mat& eigvec, const arma::mat& S) {
130		#if defined(ARMA_USE_LAPACK)
131	4x	arma::eig_sym(eigval, eigvec, S);
132		#else
133		const arma::uword p = S.n_rows;
134		arma::mat A = S; ///< working copy of S
135		eigvec.eye(p, p); ///< accumulate rotations; starts as identity
136
137		for (int sweep = 0; sweep < 50 * static_cast<int>(p); ++sweep) {
138		// Check off-diagonal convergence
139		double off = 0.0;
140		for (arma::uword q = 0; q < p - 1; ++q)
141		for (arma::uword r = q + 1; r < p; ++r)
142		off += A(q, r) * A(q, r);
143		if (std::sqrt(off) < 1e-13) break;
144		detail::jacobi_sweep(A, eigvec);
145		}
146		eigval = A.diag();
147
148		// Sort ascending
149		arma::uvec idx = arma::sort_index(eigval);
150		eigval = eigval(idx);
151		eigvec = eigvec.cols(idx);
152		#endif
153		}
154
155		/// @}
156
157		/// @name QR Factorization
158		/// @{
159
160		// ── qr_full ───────────────────────────────────────────────────────────────────
161
162		/// @brief Full (square) QR factorization of an n×p matrix.
163		///
164		/// Produces a square n×n orthogonal factor @p Q and an n×p upper-trapezoidal
165		/// factor @p R such that @c A = Q*R. This matches the convention of
166		/// @c arma::qr(), which returns the full (non-economy) @p Q.
167		///
168		/// @param[out] Q n×n orthogonal matrix.
169		/// @param[out] R n×p upper-trapezoidal matrix.
170		/// @param[in] A n×p input matrix (not modified).
171		///
172		/// @warning With @c ARMA_USE_LAPACK defined this calls @c arma::qr directly.
173		/// Without LAPACK the fallback constructs the full ONB by seeding
174		/// columns beyond @c A.n_cols with standard basis vectors and then
175		/// applying modified Gram-Schmidt; numerical quality degrades for
176		/// nearly rank-deficient inputs.
177		///
178		/// @note The caller (@c orthogonal_svd) depends on @p Q being n×n so that
179		/// @c Q.cols(k, n-1) spans the null space of @c A^T.
180	8x	inline void qr_full(arma::mat& Q, arma::mat& R, const arma::mat& A) {
181		#if defined(ARMA_USE_LAPACK)
182	8x	arma::qr(Q, R, A);
183		#else
184		// Build a full n×n orthogonal matrix.
185		// Step 1: thin QR via modified Gram-Schmidt (gives first k=A.n_cols columns).
186		// Step 2: extend to a full ONB using random vectors + re-orthogonalisation.
187		const arma::uword n = A.n_rows, p = A.n_cols;
188
189		// Start with all n columns of A padded with random vectors if n > p.
190		arma::mat Qfull(n, n, arma::fill::zeros);
191		// Copy A's columns first
192		for (arma::uword j = 0; j < p && j < n; ++j)
193		Qfull.col(j) = A.col(j);
194		// Fill remaining columns with standard basis vectors (or random fallback)
195		for (arma::uword j = p; j < n; ++j) {
196		Qfull(j, j) = 1.0; ///< standard basis vector e_j used as seed
197		}
198		// Full MGS orthonormalization
199		detail::mgs(Qfull);
200		Q = Qfull;
201
202		// R is n×p (upper trapezoidal); only first p rows are non-zero
203		R = Q.t() * A;
204		// Zero the strictly lower-triangular part of R[0:p, 0:p]
205		for (arma::uword j = 0; j < p; ++j)
206		for (arma::uword i = j + 1; i < n; ++i)
207		R(i, j) = 0.0;
208		#endif
209		}
210
211		/// @}
212
213		/// @name Singular Value Decomposition
214		/// @{
215
216		// ── svd ───────────────────────────────────────────────────────────────────────
217
218		/// @brief Full SVD: @c A ≈ U * diag(s) * V', singular values descending.
219		///
220		/// Decomposes the n×p matrix @p A into left singular vectors @p U, singular
221		/// values @p s, and right singular vectors @p V.
222		///
223		/// @param[out] U n×min(n,p) matrix of left singular vectors.
224		/// @param[out] s min(n,p)×1 vector of singular values, descending.
225		/// @param[out] V p×p matrix of right singular vectors (all p columns).
226		/// @param[in] A n×p input matrix (not modified).
227		///
228		/// @warning With @c ARMA_USE_LAPACK defined this calls @c arma::svd directly.
229		/// Without LAPACK the fallback computes @c A^T*A, calls @c eig_sym,
230		/// and derives @p U via @c U[:,j] = A*V[:,j]/s_j. Columns with
231		/// @c s_j < 1e-14 are zeroed. Accuracy is governed by the
232		/// conditioning of @c A^T*A (squares the condition number of @p A).
233		///
234		/// @note Fallback algorithm: eigendecomposition of \f$A^T A\f$ via cyclic
235		/// Jacobi (Demmel & Veselić 1992). Suitable for the small, moderately
236		/// conditioned matrices typical in ENA (p ≤ ~100).
237	40x	inline void svd(arma::mat& U, arma::vec& s, arma::mat& V, const arma::mat& A) {
238		#if defined(ARMA_USE_LAPACK)
239	40x	arma::svd(U, s, V, A);
240		#else
241		// One-sided Jacobi on A^T:
242		// We work with B = A^T (p×n). The right singular vectors of A are the
243		// left singular vectors of B (= columns of U_B after Jacobi on B^T B).
244		// We use the eigendecomposition of A^T * A to get V and s.
245		const arma::uword n = A.n_rows, p = A.n_cols;
246		const arma::uword k = std::min(n, p);
247
248		arma::mat AtA = A.t() * A; ///< p×p symmetric Gram matrix
249		arma::vec eigval;
250		arma::mat eigvec;
251		eig_sym(eigval, eigvec, AtA);
252
253		// Eigenvalues from eig_sym are ascending; we want descending.
254		arma::uvec idx = arma::sort_index(eigval, "descend");
255		eigval = eigval(idx);
256		eigvec = eigvec.cols(idx);
257
258		// Singular values (non-negative square roots)
259		s.set_size(k);
260		for (arma::uword j = 0; j < k; ++j)
261		s(j) = std::sqrt(std::max(0.0, eigval(j)));
262
263		// V = eigvec (right singular vectors of A are eigvectors of A^T*A)
264		V = eigvec;
265
266		// U = A * V / s (left singular vectors, only first k columns needed)
267		U.set_size(n, k);
268		for (arma::uword j = 0; j < k; ++j) {
269		if (s(j) > 1e-14)
270		U.col(j) = A * V.col(j) / s(j);
271		else
272		U.col(j).zeros();
273		}
274		#endif
275		}
276
277		// ── svd_right ─────────────────────────────────────────────────────────────────
278
279		/// @brief Singular values and right singular vectors only (left vectors skipped).
280		///
281		/// A lighter-weight variant of @c svd() for callers that only need @p s and
282		/// @p V. Under LAPACK the full @c arma::svd is still called internally; the
283		/// savings come in WASM builds where computing @p U is avoided.
284		///
285		/// @param[out] s min(n,p)×1 vector of singular values, descending.
286		/// @param[out] V p×p matrix of right singular vectors.
287		/// @param[in] A n×p input matrix (not modified).
288		///
289		/// @warning With @c ARMA_USE_LAPACK defined this still invokes the full
290		/// @c arma::svd; the left-vector output is discarded.
291		inline void svd_right(arma::vec& s, arma::mat& V, const arma::mat& A) {
292		#if defined(ARMA_USE_LAPACK)
293		arma::mat U;
294		arma::svd(U, s, V, A);
295		#else
296		arma::mat U_unused;
297		svd(U_unused, s, V, A);
298		#endif
299		}
300
301		/// @}
302
303		/// @name Linear System Solve
304		/// @{
305
306		// ── solve_spd ─────────────────────────────────────────────────────────────────
307
308		/// @brief Solve @c A*x = b for symmetric positive (semi-)definite @p A.
309		///
310		/// Returns the n×k solution matrix @p x given the n×n system matrix @p A and
311		/// the n×k right-hand side @p b. Near-zero eigenvalues are handled gracefully,
312		/// making this a pseudo-inverse solve safe for rank-deficient Gram matrices.
313		///
314		/// @param[in] A Real symmetric positive (semi-)definite n×n matrix.
315		/// @param[in] b n×k right-hand side matrix.
316		/// @param[in] tol Relative threshold below which eigenvalues are treated as
317		/// zero. Eigenvalues with @c \|λ\| < tol * max(\|λ\|) are
318		/// inverted as zero (i.e. those directions are projected out).
319		/// Default: 1e-12.
320		/// @returns n×k solution matrix @p x satisfying @c A*x ≈ b.
321		///
322		/// @warning With @c ARMA_USE_LAPACK defined this delegates to
323		/// @c arma::solve(A, b, arma::solve_opts::fast), which uses a
324		/// Cholesky factorization and does not handle rank deficiency.
325		/// Without LAPACK the eigendecomposition-based pseudo-inverse is used,
326		/// which is safe for rank-deficient inputs but slower.
327		///
328		/// @note Without LAPACK: @c A = V * diag(λ) * V^T is computed via @c eig_sym;
329		/// the solution is @c x = V * diag(1/λ) * V^T * b with small λ skipped.
330	10x	inline arma::mat solve_spd(const arma::mat& A, const arma::mat& b,
331		double tol = 1e-12) {
332		#if defined(ARMA_USE_LAPACK)
333	10x	return arma::solve(A, b, arma::solve_opts::fast);
334		#else
335		arma::vec eigval;
336		arma::mat eigvec;
337		eig_sym(eigval, eigvec, A); ///< A = eigvec * diag(eigval) * eigvec^T
338
339		const double thresh = tol * std::abs(eigval.max());
340		arma::vec inv_eigval(eigval.n_elem, arma::fill::zeros);
341		for (arma::uword i = 0; i < eigval.n_elem; ++i)
342		if (std::abs(eigval(i)) > thresh)
343		inv_eigval(i) = 1.0 / eigval(i);
344
345		// x = eigvec * diag(inv_eigval) * eigvec^T * b
346		return eigvec * arma::diagmat(inv_eigval) * eigvec.t() * b;
347		#endif
348		}
349
350		/// @}
351
352		} // namespace linalg
353		} // namespace qe
354
355		#endif // LIBQE_LINALG_FALLBACK_HPP

1		/**
2		* @file normalization.hpp
3		* @brief Row-wise normalization utilities for ENA connection networks.
4		*/
5		#ifndef LIBQE_NORMALIZATION_HPP
6		#define LIBQE_NORMALIZATION_HPP
7
8		#include <armadillo>
9		#include <cmath>
10
11		namespace qe {
12
13		/**
14		* @brief Normalize each row to unit L2 norm (project onto the unit hypersphere).
15		*
16		* Rows whose L2 norm is zero are left unchanged.
17		*
18		* @param m Input matrix (n_units × n_connections), row-major.
19		* @returns Matrix of the same shape with each row divided by its L2 norm.
20		* @note Equivalent to @c fun_sphere_norm() in rENA/ena.cpp.
21		*/
22	4x	inline arma::mat normalize_networks(arma::mat m) {
23	4x	arma::mat out(m.n_rows, m.n_cols, arma::fill::zeros);
24	14x	for (arma::uword r = 0; r < m.n_rows; r++) {
25	10x	double len = arma::norm(m.row(r), 2);
26	18x	if (std::isfinite(len) && len > 0.0) out.row(r) = m.row(r) / len;
27		}
28	4x	return out;
29		}
30
31		/**
32		* @brief Scale every entry by the reciprocal of the largest row L2 norm.
33		*
34		* Preserves relative magnitudes across rows; does not normalise each row
35		* individually. If the largest row norm is zero, the matrix is returned
36		* unchanged.
37		*
38		* @param m Input matrix (n_units × n_connections), modified in place.
39		* @returns Scaled matrix (same shape as @p m).
40		* @note Equivalent to @c fun_skip_sphere_norm() in rENA/ena.cpp.
41		*/
42	3x	inline arma::mat scale_networks(arma::mat m) {
43	3x	double largest = 0.0;
44	10x	for (arma::uword r = 0; r < m.n_rows; r++) {
45	7x	double len = arma::norm(m.row(r), 2);
46	7x	if (std::isfinite(len)) largest = std::max(largest, len);
47		}
48	3x	if (largest > 0.0) m /= largest;
49	6x	return m;
50		}
51
52		} // namespace qe
53
54		#endif // LIBQE_NORMALIZATION_HPP

1		/**
2		* @file rotation.hpp
3		* @brief Rotation routines for Epistemic Network Analysis (ENA):
4		* SVD, deflation, orthogonal-SVD, complete (generalized), and
5		* means rotation. All routines are designed to match the
6		* numerical output of the reference R implementation in rENA.
7		*/
8
9		#ifndef LIBQE_ROTATION_HPP
10		#define LIBQE_ROTATION_HPP
11
12		#include "linalg_fallback.hpp"
13		#include <armadillo>
14		#include <stdexcept>
15		#include <string>
16		#include <utility>
17		#include <vector>
18
19		namespace qe {
20
21		/// @name Return types
22		/// @{
23
24		/**
25		* @brief Aggregated result returned by every rotation routine.
26		*
27		* The layout mirrors the output of R's `prcomp()`: `rotation` holds the
28		* right-singular vectors (principal axes), `eigenvalues` holds the
29		* explained-variance values (sdev^2 in R parlance), and `column_names`
30		* carries the human-readable axis labels used downstream for network plots
31		* and loadings tables.
32		*/
33		struct RotationResult {
34		arma::mat rotation; ///< p x p orthogonal matrix; column j is rotation axis j.
35		arma::vec eigenvalues; ///< Length-p vector of sdev^2 values, matching rENA's convention.
36		std::vector<std::string> column_names; ///< Axis labels, e.g. "MR1", "SVD2", "GMR1".
37		};
38
39		/**
40		* @brief A pair of 0-based row-index vectors identifying two groups within a
41		* points matrix.
42		*
43		* Used by means_rotation() to specify which rows belong to group A and which
44		* belong to group B for each successive mean-difference axis.
45		*/
46		struct GroupPair {
47		arma::uvec a; ///< Row indices of group A (0-based).
48		arma::uvec b; ///< Row indices of group B (0-based).
49		};
50
51		/// @}
52
53		/// @name SVD rotation
54		/// @{
55
56		/**
57		* @brief Compute a full SVD rotation of a (pre-centered) points matrix.
58		*
59		* Produces the right-singular vectors V as the rotation matrix and scales
60		* each singular value to an eigenvalue (sdev^2) compatible with rENA's
61		* `prcomp()` output.
62		*
63		* @param points n x p data matrix. The caller is responsible for
64		* centering upstream; this function applies no centering.
65		*
66		* @returns A RotationResult where:
67		* - `rotation` = V (p x p full SVD; trailing columns span the null
68		* space when the matrix is rank-deficient).
69		* - `eigenvalues[j]` = s[j]^2 / max(1, n - 1) (== sdev^2 in R).
70		* - `column_names` = {"SVD1", "SVD2", ..., "SVDp"}.
71		*
72		* @note Equivalent to `prcomp(points, retx=FALSE, scale=FALSE,
73		* center=FALSE, tol=0)` in rENA/R.
74		*
75		* @note Sign convention: none. Signs follow the underlying LAPACK SVD,
76		* matching rENA's long-standing behavior. A deterministic sign rule
77		* (e.g. svd_flip) may be added later as an opt-in flag.
78		*/
79	40x	inline RotationResult ena_svd(const arma::mat& points) {
80	40x	if (!points.is_finite())
81	!	throw std::invalid_argument("ena_svd: input matrix must not contain NaN or Inf");
82	40x	arma::mat U, V;
83	40x	arma::vec s;
84	40x	qe::linalg::svd(U, s, V, points);
85
86	40x	const arma::uword p = points.n_cols;
87	80x	const double denom = points.n_rows > 1
88		? static_cast<double>(points.n_rows - 1)
89		: 1.0;
90
91	40x	arma::vec eigenvalues(p, arma::fill::zeros);
92	40x	const arma::uword k = std::min<arma::uword>(s.n_elem, p);
93	198x	for (arma::uword j = 0; j < k; ++j) {
94	474x	eigenvalues(j) = (s(j) * s(j)) / denom;
95		}
96
97	40x	std::vector<std::string> labels(p);
98	198x	for (arma::uword j = 0; j < p; ++j) {
99	158x	labels[j] = "SVD" + std::to_string(j + 1);
100		}
101	40x	return {V, eigenvalues, std::move(labels)};
102		}
103
104		/// @}
105
106		/// @name Deflation
107		/// @{
108
109		/**
110		* @brief Project a data matrix onto the hyperplane orthogonal to a given axis.
111		*
112		* Computes `data - (data * axis) * axis^T`, effectively removing the
113		* component of every row of `data` that lies along `axis`.
114		*
115		* @param data n x p data matrix.
116		* @param axis Unit-norm column vector of length p. The caller is
117		* responsible for normalizing `axis` before calling this
118		* function; no normalization is performed internally.
119		*
120		* @returns n x p matrix with the projection onto `axis` subtracted out.
121		*/
122	16x	inline arma::mat deflate(const arma::mat& data, const arma::vec& axis) {
123	32x	return data - (data * axis) * axis.t();
124		}
125
126		/// @}
127
128		/// @name Orthogonal SVD rotation
129		/// @{
130
131		/**
132		* @brief Orthonormalize named axes via QR, then complete the rotation with
133		* an SVD of the data projected onto the orthogonal complement.
134		*
135		* The named axes in `weights` are orthonormalized via a full QR
136		* decomposition. The first k columns of the resulting Q become the
137		* "fixed" part of the rotation; the remaining p - k columns define a
138		* complementary subspace onto which `data` is projected, and a further SVD
139		* fills those trailing rotation columns.
140		*
141		* @param data n x p data matrix (caller-centered if required).
142		* @param weights p x k matrix of named axis directions. Column norms
143		* need not be 1; QR handles normalization internally.
144		* @param named_labels Length-k vector of labels for the first k rotation
145		* axes. Trailing axes receive labels "SVD{k+1}" ..
146		* "SVDp".
147		*
148		* @returns A RotationResult containing the combined p x p rotation,
149		* eigenvalues for the SVD-filled trailing axes (indices k..p-1;
150		* the first k eigenvalues are zero), and axis labels.
151		*
152		* @note Equivalent to `orthogonal_svd()` in rENA/R/ena.rotate.by.mean.R:
153		* @code
154		* Q = qr.Q(qr(weights), complete = TRUE) # p x p
155		* X_bar = data %*% Q[, (k+1):p] # n x (p - k)
156		* V = prcomp(X_bar)$rotation
157		* out = cbind(Q[, 1:k], Q[, (k+1):p] %*% V)
158		* @endcode
159		*
160		* @note The named axes in the OUTPUT are the orthonormalized Q columns, NOT
161		* the original `weights` columns. Use complete_rotation() if you need
162		* to preserve input axes verbatim.
163		*
164		* @throws std::runtime_error if `data.n_cols != weights.n_rows` or
165		* `named_labels.size() != weights.n_cols`.
166		*/
167	10x	inline RotationResult orthogonal_svd(
168		const arma::mat& data,
169		const arma::mat& weights,
170		const std::vector<std::string>& named_labels) {
171	10x	const arma::uword p = weights.n_rows;
172	10x	const arma::uword k = weights.n_cols;
173	10x	if (data.n_cols != p) {
174		throw std::runtime_error(
175	1x	"orthogonal_svd: data.n_cols must equal weights.n_rows");
176		}
177	9x	if (named_labels.size() != k) {
178		throw std::runtime_error(
179	1x	"orthogonal_svd: named_labels.size() must equal weights.n_cols");
180		}
181	8x	if (k == 0) {
182	!	return ena_svd(data);
183		}
184
185	8x	arma::mat Q, R;
186	8x	qe::linalg::qr_full(Q, R, weights); // Q is p x p
187
188	8x	arma::mat rotation(p, p);
189	8x	arma::vec eigenvalues(p, arma::fill::zeros);
190	24x	rotation.cols(0, k - 1) = Q.cols(0, k - 1);
191
192	8x	if (k < p) {
193	16x	arma::mat Q_comp = Q.cols(k, p - 1); // p x (p - k)
194	8x	arma::mat X_bar = data * Q_comp; // n x (p - k)
195	8x	RotationResult inner = ena_svd(X_bar);
196	16x	rotation.cols(k, p - 1) = Q_comp * inner.rotation;
197	32x	for (arma::uword j = 0; j < (p - k); ++j) {
198	48x	eigenvalues(k + j) = inner.eigenvalues(j);
199		}
200		}
201
202	8x	std::vector<std::string> labels(p);
203	22x	for (arma::uword j = 0; j < k; ++j) labels[j] = named_labels[j];
204	32x	for (arma::uword j = k; j < p; ++j) {
205	24x	labels[j] = "SVD" + std::to_string(j + 1);
206		}
207	16x	return {rotation, eigenvalues, std::move(labels)};
208		}
209
210		/// @}
211
212		/// @name Complete (generalized) rotation
213		/// @{
214
215		/**
216		* @brief Keep named axes verbatim and fill remaining axes from an SVD of the
217		* data after parallel deflation by all named axes.
218		*
219		* Unlike orthogonal_svd(), this routine preserves the caller-supplied
220		* `named_axes` exactly (no QR re-orthonormalization). The complementary
221		* axes are obtained by deflating `data` simultaneously by all named axes and
222		* then running ena_svd() on the result.
223		*
224		* The deflation is parallel: every projection is subtracted from the
225		* original `data`, not from a progressively deflated copy:
226		* @code
227		* defA = data - data * (named_axes * named_axes^T)
228		* @endcode
229		* This matches `defA <- A - A %% v1 %% t(v1) - A %% v2 %% t(v2)` in
230		* rENA line-for-line. For mutually orthogonal axes the result is identical
231		* to sequential deflation; for non-orthogonal axes it differs.
232		*
233		* @param data n x p data matrix.
234		* @param named_axes p x k matrix of named rotation axes. Each column
235		* must be unit-norm; orthonormality between columns is
236		* NOT required.
237		* @param named_labels Length-k vector of labels for the first k axes.
238		* Trailing axes receive labels "SVD{k+1}" .. "SVDp".
239		* Conventional labels for generalized rotation are
240		* "GMR1", "GMR2", then "SVD{k+1}" onward, but labels
241		* are entirely caller-supplied.
242		*
243		* @returns A RotationResult where columns 0..k-1 of `rotation` equal
244		* `named_axes` verbatim and columns k..p-1 come from the SVD of
245		* the deflated data.
246		*
247		* @note Equivalent to the tail of `ena.rotate.by.generalized` in rENA
248		* (canonical version: commit 2c079126 on rENA `origin/main`).
249		*
250		* @throws std::runtime_error if `data.n_cols != named_axes.n_rows` or
251		* `named_labels.size() != named_axes.n_cols`.
252		*/
253	16x	inline RotationResult complete_rotation(
254		const arma::mat& data,
255		const arma::mat& named_axes,
256		const std::vector<std::string>& named_labels) {
257	16x	const arma::uword p = named_axes.n_rows;
258	16x	const arma::uword k = named_axes.n_cols;
259	16x	if (data.n_cols != p) {
260		throw std::runtime_error(
261	!	"complete_rotation: data.n_cols must equal named_axes.n_rows");
262		}
263	16x	if (named_labels.size() != k) {
264		throw std::runtime_error(
265	!	"complete_rotation: named_labels.size() must equal named_axes.n_cols");
266		}
267	16x	if (k == 0) {
268	!	return ena_svd(data);
269		}
270
271	32x	arma::mat defA = data - data * named_axes * named_axes.t();
272	16x	RotationResult inner = ena_svd(defA);
273
274	16x	arma::mat rotation(p, p);
275	16x	arma::vec eigenvalues(p, arma::fill::zeros);
276	32x	rotation.cols(0, k - 1) = named_axes;
277	16x	if (k < p) {
278	48x	rotation.cols(k, p - 1) = inner.rotation.cols(0, p - k - 1);
279	53x	for (arma::uword j = 0; j < (p - k); ++j) {
280	74x	eigenvalues(k + j) = inner.eigenvalues(j);
281		}
282		}
283
284	16x	std::vector<std::string> labels(p);
285	47x	for (arma::uword j = 0; j < k; ++j) labels[j] = named_labels[j];
286	53x	for (arma::uword j = k; j < p; ++j) {
287	37x	labels[j] = "SVD" + std::to_string(j + 1);
288		}
289	32x	return {rotation, eigenvalues, std::move(labels)};
290		}
291
292		/// @}
293
294		/// @name Means rotation
295		/// @{
296
297		/**
298		* @brief Compute a means rotation from one or more group pairs.
299		*
300		* For each GroupPair, computes the normalized mean-difference vector on the
301		* progressively deflated data and stacks the resulting unit-norm axes into a
302		* weights matrix. The final rotation is completed by orthogonal_svd().
303		* The input `points` is mean-centered before any axes are computed, matching
304		* rENA's `scale(data, scale=FALSE, center=TRUE)`.
305		*
306		* @param points n x p matrix of network positions (raw, not pre-centered).
307		* @param pairs Ordered list of GroupPair objects. Each pair contributes
308		* one "MR" axis in sequence. Must be non-empty.
309		*
310		* @returns A RotationResult with `column_names` = {"MR1", ..., "MRm",
311		* "SVD{m+1}", ..., "SVDp"} where m = pairs.size().
312		*
313		* @note Equivalent to `ena.rotate.by.mean()` in rENA/R/ena.rotate.by.mean.R.
314		* The progressive deflation (each axis is computed on the data after
315		* removing all previous axes) is the key numerical step.
316		*
317		* @warning There is NO guard against a zero-norm mean-difference vector.
318		* If the two group means are identical on the current deflated data,
319		* the axis will contain NaN values (division by zero). This mirrors
320		* a latent bug in rENA and is preserved verbatim for numerical
321		* fidelity; a safe opt-in guard will be added in a future release.
322		*
323		* @throws std::runtime_error if `pairs` is empty.
324		*/
325	8x	inline RotationResult means_rotation(const arma::mat& points,
326		const std::vector<GroupPair>& pairs) {
327	8x	if (pairs.empty()) {
328	1x	throw std::runtime_error("Unable to rotate without 2 groups.");
329		}
330
331	14x	arma::mat data = points.each_row() - arma::mean(points, 0);
332	7x	arma::mat deflated = data;
333	7x	arma::mat weights(data.n_cols, pairs.size(), arma::fill::zeros);
334	7x	std::vector<std::string> labels;
335	7x	labels.reserve(pairs.size());
336
337	19x	for (std::size_t i = 0; i < pairs.size(); ++i) {
338	24x	const arma::rowvec mean_a = arma::mean(deflated.rows(pairs[i].a), 0);
339	24x	const arma::rowvec mean_b = arma::mean(deflated.rows(pairs[i].b), 0);
340	24x	const arma::vec diff = (mean_a - mean_b).t();
341	12x	const double norm = std::sqrt(arma::accu(diff % diff));
342	12x	const arma::vec axis = diff / norm; // no zero-norm guard
343
344	12x	deflated = deflate(deflated, axis);
345	24x	weights.col(i) = axis;
346	12x	labels.push_back("MR" + std::to_string(i + 1));
347		}
348
349	14x	return orthogonal_svd(deflated, weights, labels);
350		}
351
352		/// @}
353
354		} // namespace qe
355
356		#endif // LIBQE_ROTATION_HPP