diff --git a/DESCRIPTION b/DESCRIPTION
index e2fd51b..f9870ef 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,13 +1,13 @@
 Package: scoper
 Type: Package
 Version: 1.4.0.999
-Date: 2026-02-13
+Date: 2026-03-17
 Authors@R: c(person("Nima", "Nouri", role=c("aut"), 
                     email="nima.nouri@yale.edu"),
              person("Edel", "Aron", role=c("ctb"),
                     email="edel.aron@yale.edu"),
              person("Gisela", "Gabernet", role=c("ctb"),
-                    email="gisela.gabernet@yale.edu"),
+                    email="gisela.gabernet@yale.edu"),                 
              person("Cole", "Jensen", role=c("ctb"),
                     email="cole.jensen@yale.edu"),
              person("Huimin", "Lyu", role=c("ctb"),
diff --git a/R/Functions.R b/R/Functions.R
index 99e21de..12b934a 100644
--- a/R/Functions.R
+++ b/R/Functions.R
@@ -887,10 +887,20 @@ identicalClones <- function(db, method=c("nt", "aa"), junction="junction",
 #' @param    db                 data.frame containing sequence data.
 #' @param    threshold          numeric scalar where the tree should be cut (the distance threshold for clonal grouping).
 #' @param    method             one of the \code{"nt"} for nucleotide based clustering or 
-#'                              \code{"aa"} for amino acid based clustering.
+#'                              \code{"aa"} for amino acid based clustering. Method `"aa"` still expects nucleotide sequences, 
+#'                              which will be translated to amino acids
 #' @param    linkage            available linkage are \code{"single"}, \code{"average"}, and \code{"complete"}.
 #' @param    normalize	        method of normalization. The default is \code{"len"}, which divides the distance by the length 
 #'                              of the sequence group. If \code{"none"} then no normalization if performed.
+#' @param    IUPAC              If \code{TRUE}, allows sequences with IUPAC codes to pass validation 
+#'                              and be used in clustering with IUPAC-aware distance calculation 
+#'                              (via \code{alakazam::pairwiseDist}). If \code{FALSE} (default), uses fast Hamming distance 
+#'                              (via \code{fastDist_rcpp}) and only allows standard bases (A, T, C, G), N, and ? 
+#'                              in sequences.This parameter controls validation and distance
+#'                              calculation method, not sequence filtering. See \code{max_n} for 
+#'                              filtering sequences by character content. See the IUPAC and max_n 
+#'                              parameters section for more details and examples. Note: This parameter is only available 
+#'                              for \code{hierarchicalClones} with \code{method="nt"}. 
 #' @param    junction           character name of the column containing junction sequences.
 #'                              Also used to determine sequence length for grouping.
 #' @param    v_call             name of the column containing the V-segment allele calls.
@@ -923,11 +933,14 @@ identicalClones <- function(db, method=c("nt", "aa"), junction="junction",
 #'                              less than 7 nucleotides.
 #' @param    mod3               if \code{TRUE} removes records with a \code{junction} length that is not divisible by 
 #'                              3 in nucleotide space.
-#' @param    max_n              The maximum number of degenerate characters to permit in the junction sequence 
-#'                              before excluding the record from clonal assignment. Note, with 
-#'                              \code{linkage="single"} non-informative positions can create artifactual 
-#'                              links between unrelated sequences. Use with caution. 
-#'                              Default is set to be zero. Set it as \code{"NULL"} for no action.
+#' @param    max_n              The maximum number of non-ATCG characters (degenerate positions) to permit 
+#'                              in the junction sequence before excluding the record from clonal assignment. 
+#'                              Note: \code{max_n} operates independently 
+#'                              from \code{IUPAC} - it controls filtering by character count, while 
+#'                              \code{IUPAC} controls validation and distance calculation method. 
+#'                              With \code{linkage="single"}, non-informative positions can create 
+#'                              artifactual links between unrelated sequences. Use with caution. 
+#'                              Default is 0 (ATCG-only). Set to \code{NULL} for no filtering.
 #' @param    nproc              number of cores to distribute the function over.
 #' @param    verbose            if \code{TRUE} prints out a summary of each step cloning process.
 #'                              if \code{FALSE} (default) process cloning silently.
@@ -936,7 +949,8 @@ identicalClones <- function(db, method=c("nt", "aa"), junction="junction",
 #'                              The default is \code{NULL} for no action.
 #' @param    summarize_clones   if \code{TRUE} performs a series of analysis to assess the clonal landscape
 #'                              and returns a \link{ScoperClones} object. If \code{FALSE} (default) then
-#'                              a modified input \code{db} is returned. When grouping by \code{fields}, 
+#'                              a modified input \code{db} is returned with clone identifiers in the specified 
+#'                              `clone` column. When grouping by \code{fields}, 
 #'                              \code{summarize_clones} should be \code{FALSE}. 
 #' @param   seq_id              The column containing sequence ids
 #'
@@ -947,6 +961,55 @@ identicalClones <- function(db, method=c("nt", "aa"), junction="junction",
 #' clonal assignment summary information and a modified input \code{db} in the \code{db} slot that 
 #' contains clonal identifiers in the specified \code{clone} column.
 #' 
+#' @section IUPAC and max_n parameters:
+#' Note: The \code{IUPAC} parameter is only available for \code{hierarchicalClones} with 
+#' \code{method="nt"} (nucleotide mode). It is ignored when \code{method="aa"} (amino acid mode). 
+#' The \code{identicalClones} and \code{spectralClones} functions always operate as if 
+#' \code{IUPAC=FALSE} (standard bases plus N and ? only). The \code{max_n} parameter 
+#' is available for all cloning functions.
+#' 
+#' The \code{IUPAC} and \code{max_n} parameters serve complementary but distinct purposes:
+#' 
+#' \code{IUPAC} controls:
+#' \itemize{
+#'   \item Sequence validation (which characters are allowed)
+#'   \item Distance calculation method (fast Hamming vs IUPAC-aware scoring)
+#' }
+#' 
+#' \code{max_n} controls:
+#' \itemize{
+#'   \item Sequence filtering by counting non-ATCG characters
+#' }
+#' 
+#' Common use cases (for \code{method="nt"}):
+#' \itemize{
+#'   \item \code{IUPAC=FALSE, max_n=0}: Strict ATCG-only mode with fast distance calculation. 
+#'         Will throw an error and exit if sequences with IUPAC codes not A, T, C, G, N, or ? are detected
+#'         among the first 1000 sequences. Fastest option for high-quality data.
+#'   \item \code{IUPAC=FALSE, max_n>0}: Will throw an error and exit if sequences with IUPAC 
+#'         codes not A, T, C, G, N, or ? are detected among the first 1000 sequences. Allows sequences 
+#'         with limited N/? characters in distance calculation, using fast Hamming distance. 
+#'         Note: IUPAC codes are rejected during validation (before max_n filtering), so max_n only 
+#'         affects sequences that passed validation with N or ? characters. Useful for data with low-quality 
+#'         or masked positions but no experimental ambiguity codes.
+#'   \item \code{IUPAC=TRUE, max_n=0}: Uses IUPAC-aware distance but filters out all non-ATCG 
+#'         characters anyway. Only standard bases remain after filtering.
+#'   \item \code{IUPAC=TRUE, max_n>0}: Allows sequences with limited ambiguity codes and uses 
+#'         proper IUPAC-aware distance calculation. Slower but handles biological ambiguity correctly. 
+#'         Set max_n to the maximum number of ambiguous positions per sequence you want to tolerate 
+#'         (counts all non-ATCG: N, ?, and other IUPAC codes).
+#'   \item \code{IUPAC=TRUE, max_n=NULL}: Process all sequences with IUPAC codes regardless of the 
+#'         number of ambiguous positions. Uses IUPAC-aware distance calculation with no filtering. 
+#'         Most permissive option for data with extensive IUPAC ambiguity codes.
+#' }
+#' 
+#' Note: Validation occurs before filtering. When \code{IUPAC=FALSE}, sequences containing IUPAC 
+#' ambiguity codes (R, Y, W, S, M, K, etc.) will fail validation and be rejected before the 
+#' \code{max_n} filtering step. Therefore, with \code{IUPAC=FALSE, max_n > 0}, only sequences 
+#' with N and ? characters (not IUPAC codes) can pass validation and be filtered by \code{max_n}. 
+#' The \code{max_n} parameter always counts using regex \code{"[^ATCG]"}, but \code{IUPAC} determines 
+#' which non-ATCG characters are allowed to reach the filtering step.
+#' 
 #' @section Single-cell data:
 #' To invoke single-cell mode the \code{cell_id} argument must be specified and the \code{locus} 
 #' column must be correct. Otherwise, clustering will be performed with bulk sequencing assumptions, 
@@ -984,15 +1047,15 @@ identicalClones <- function(db, method=c("nt", "aa"), junction="junction",
 #' 
 #' @export
 hierarchicalClones <- function(db, threshold, method=c("nt", "aa"), linkage=c("single", "average", "complete"), 
-                               normalize=c("len", "none"), junction="junction", 
+                               normalize=c("len", "none"), IUPAC=FALSE, junction="junction", 
                                v_call="v_call", j_call="j_call", clone="clone_id", fields=NULL,
                                cell_id=NULL, locus="locus", only_heavy=TRUE, split_light=FALSE,
                                first=FALSE, cdr3=FALSE, mod3=FALSE, max_n=0, nproc=1,
                                verbose=FALSE, log=NULL, summarize_clones=FALSE, seq_id = "sequence_id") {
     
     results <- defineClonesScoper(db = db, threshold = threshold, model = "hierarchical", 
-                                  method = match.arg(method), linkage = match.arg(linkage), normalize = match.arg(normalize),
-                                  junction = junction, v_call = v_call, j_call = j_call, clone = clone, fields = fields,
+                                  method = match.arg(method), linkage = match.arg(linkage), normalize = match.arg(normalize), 
+                                  IUPAC = IUPAC, junction = junction, v_call = v_call, j_call = j_call, clone = clone, fields = fields,
                                   cell_id = cell_id, locus = locus, only_heavy = only_heavy, split_light = split_light,
                                   first = first, cdr3 = cdr3, mod3 = mod3, max_n = max_n, nproc = nproc,   
                                   verbose = verbose, log = log, summarize_clones = summarize_clones)
@@ -1062,9 +1125,12 @@ hierarchicalClones <- function(db, threshold, method=c("nt", "aa"), linkage=c("s
 #'                              less than 7 nucleotides.
 #' @param    mod3               if \code{TRUE} removes records with a \code{junction} length that is not divisible by 
 #'                              3 in nucleotide space.
-#' @param    max_n              the maximum number of degenerate characters to permit in the junction sequence before excluding the 
-#'                              record from clonal assignment. Default is set to be zero. Set it as \code{"NULL"} 
-#'                              for no action.
+#' @param    max_n              The maximum number of degenerate characters to permit in the junction sequence before excluding the 
+#'                              record from clonal assignment. Counts non-ATCG characters using regex \code{"[^ATCG]"}, which 
+#'                              includes N, ?, and IUPAC ambiguity codes. Note: \code{spectralClones} does not support the 
+#'                              \code{IUPAC} parameter and always validates sequences as if \code{IUPAC=FALSE} (only allows 
+#'                              standard bases A,T,C,G plus N and ?). IUPAC ambiguity codes will cause validation errors. 
+#'                              Default is 0. Set to \code{NULL} for no filtering.
 #' @param    threshold          the supervising cut-off to enforce an upper-limit distance for clonal grouping.
 #'                              A numeric value between (0,1).
 #' @param    base_sim           required similarity cut-off for sequences in equal distances from each other.
@@ -1184,7 +1250,7 @@ defineClonesScoper <- function(db,
                                model = c("identical", "hierarchical", "spectral"),
                                method = c("nt", "aa", "novj", "vj"),
                                linkage = c("single", "average", "complete"), normalize = c("len", "none"),
-                               germline = "germline_alignment", sequence = "sequence_alignment",
+                               germline = "germline_alignment", sequence = "sequence_alignment", IUPAC = FALSE,
                                junction = "junction", v_call = "v_call", j_call = "j_call", clone = "clone_id",
                                fields = NULL, cell_id = NULL, locus = NULL, only_heavy = TRUE, split_light = FALSE,
                                targeting_model = NULL, len_limit = NULL,
@@ -1193,7 +1259,8 @@ defineClonesScoper <- function(db,
                                iter_max = 1000, nstart = 1000, nproc = 1,
                                verbose = FALSE, log = NULL,
                                summarize_clones = FALSE) {
-  
+
+    cat("In modified Functions.R")			       
     ### get model
     model <- match.arg(model)
     
@@ -1249,13 +1316,26 @@ defineClonesScoper <- function(db,
     }
     
     ### Check for invalid characters
-    valid_chars <- colnames(alakazam::getDNAMatrix(gap = 0))
+    # IUPAC mode is only supported for hierarchical clustering
+    if (!IUPAC && model == "hierarchical" && method == "nt") {
+        valid_chars <- c("A", "T", "C", "G", "N", "?")
+    } else {
+        valid_chars <- colnames(alakazam::getDNAMatrix(gap = 0))
+    }
     .validateSeq <- function(x) { all(unique(strsplit(x, "")[[1]]) %in% valid_chars) }
-    valid_seq <- sapply(db[[junction]], .validateSeq)
+    valid_seq <- sapply(head(db[[junction]],1000), .validateSeq)
     not_valid_seq <- which(!valid_seq)
     if (length(not_valid_seq) > 0) {
-        stop("invalid sequence characters in the ", junction, " column. ",
-             length(not_valid_seq)," sequence(s) found.", "\n Valid characters are: '",  valid_chars, "'")
+        error_msg <- paste0("Invalid sequence characters in the ", junction, " column were found when checking the first 1000 sequences. ",
+            length(not_valid_seq)," sequence(s) found.", "\n Valid characters are: '",  valid_chars, "'",
+            "\n If you have other IUPAC characters in your sequences, set IUPAC=TRUE to allow all IUPAC bases, this will run a slower version of hierarchicalClustering.")
+        if (method == "aa") {
+            method_aa_msg <- paste0("Clustering with method='aa' expects nucleotide sequences, which will be translated by this function.")
+            stop(paste0(error_msg, "\n", method_aa_msg))
+        } else {
+            stop(error_msg)
+        }
+        
     }
     
     ### temp columns
@@ -1417,7 +1497,14 @@ defineClonesScoper <- function(db,
     } else {
         stop('Nproc must be positive.')
     }
-    
+
+    ## RDB
+    ## This is necessary until the modified code is in the default path
+    #parallel::clusterEvalQ(cluster, {
+    #    .libPaths(c("/gpfs/gibbs/project/support/rdb9/working/Gabernet/251202/modified/devlib", .libPaths()))
+    #    library(scoper)
+    #})
+
     ### export function to clusters
     if (nproc > 1) { 
         export_functions <- list("passToClustering_lev1", "passToClustering_lev2", "passToClustering_lev3", "passToClustering_lev4",
@@ -1433,6 +1520,7 @@ defineClonesScoper <- function(db,
     db_cloned <- foreach::foreach(gp=1:n_groups,
                          .final=dplyr::bind_rows,
                          .inorder=TRUE,
+			 .packages="scoper",
                          .errorhandling='stop') %dopar% { 
                              # *********************************************************************************
                              # filter each group
@@ -1450,6 +1538,7 @@ defineClonesScoper <- function(db,
                                                               method = method,
                                                               linkage = ifelse(model == "hierarchical", linkage, NA),
                                                               normalize = ifelse(model == "hierarchical", normalize, NA),
+                                                              IUPAC = ifelse(model == "hierarchical" & method == "nt", IUPAC, FALSE),
                                                               germline = germline,
                                                               sequence = sequence,
                                                               junction = junction,
@@ -1749,6 +1838,7 @@ passToClustering_lev1 <- function (db_gp,
                                    method = c("nt", "aa", "novj", "vj"),
                                    linkage = c("single", "average", "complete"),
                                    normalize = c("len", "none"),
+                                   IUPAC = FALSE,
                                    germline = "germline_alignment",
                                    sequence = "sequence_alignment",
                                    junction = "junction",
@@ -1775,6 +1865,7 @@ passToClustering_lev1 <- function (db_gp,
                                                    method = method,
                                                    linkage = linkage,
                                                    normalize = normalize,
+                                                   IUPAC = IUPAC, 
                                                    junction = junction,
                                                    cdr3 = cdr3,
                                                    cdr3_col = cdr3_col,
@@ -1839,6 +1930,7 @@ hierarchicalClones_helper <- function(db_gp,
                                       method = c("nt", "aa"),
                                       linkage = c("single", "average", "complete"),
                                       normalize = c("len", "none"),
+                                      IUPAC = FALSE,
                                       junction = "junction",
                                       cdr3 = FALSE,
                                       cdr3_col = NA,
@@ -1854,7 +1946,7 @@ hierarchicalClones_helper <- function(db_gp,
     
     ### number of sequences
     n <- nrow(db_gp)
-    
+
     # get sequences
     if (method == "nt") {
         seqs <- db_gp[[ifelse(cdr3, cdr3_col, junction)]]   
@@ -1868,16 +1960,22 @@ hierarchicalClones_helper <- function(db_gp,
     n_unq <- nrow(df)
     ind_unq <- df$V1
     seqs_unq <- df$seqs
+
     if (n_unq == 1) {
         return(list("idCluster" = rep(1, n), 
                     "n_cluster" = 1, 
                     "eigen_vals" = rep(0, n)))
     }
-    
+
     # calculate distance matrix
     if (method == "nt") {
-        dist_mtx <- alakazam::pairwiseDist(seq = seqs_unq, 
+    	if (! IUPAC){
+            dist_mtx <- fastDist_rcpp(seqs_unq)
+        }
+       	else{
+            dist_mtx <- alakazam::pairwiseDist(seq = seqs_unq, 
                                  dist_mat = getDNAMatrix(gap = 0))
+        }
     } else if (method == "aa") {
         dist_mtx <- alakazam::pairwiseDist(seq = seqs_unq, 
                                  dist_mat = getAAMatrix(gap = 0))
diff --git a/R/RcppExports.R b/R/RcppExports.R
index f02781e..c4726d7 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -5,3 +5,11 @@ pairwiseMutMatrixRcpp <- function(informative_pos, mutMtx, motifMtx) {
     .Call(`_scoper_pairwiseMutMatrixRcpp`, informative_pos, mutMtx, motifMtx)
 }
 
+countSeqsWithInvalidBases_rcpp <- function(seqs) {
+    .Call(`_scoper_countSeqsWithInvalidBases_rcpp`, seqs)
+}
+
+fastDist_rcpp <- function(seqs) {
+    .Call(`_scoper_fastDist_rcpp`, seqs)
+}
+
diff --git a/docs/topics/hierarchicalClones.md b/docs/topics/hierarchicalClones.md
index 9fbf0d9..69241dc 100644
--- a/docs/topics/hierarchicalClones.md
+++ b/docs/topics/hierarchicalClones.md
@@ -51,7 +51,8 @@ threshold
 
 method
 :   one of the `"nt"` for nucleotide based clustering or 
-`"aa"` for amino acid based clustering.
+`"aa"` for amino acid based clustering. Method `"aa"` still expects nucleotide sequences, 
+which will be translated to amino acids.
 
 linkage
 :   available linkage are `"single"`, `"average"`, and `"complete"`.
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 04defa6..c447597 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -23,9 +23,33 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// countSeqsWithInvalidBases_rcpp
+int countSeqsWithInvalidBases_rcpp(CharacterVector seqs);
+RcppExport SEXP _scoper_countSeqsWithInvalidBases_rcpp(SEXP seqsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< CharacterVector >::type seqs(seqsSEXP);
+    rcpp_result_gen = Rcpp::wrap(countSeqsWithInvalidBases_rcpp(seqs));
+    return rcpp_result_gen;
+END_RCPP
+}
+// fastDist_rcpp
+IntegerMatrix fastDist_rcpp(CharacterVector seqs);
+RcppExport SEXP _scoper_fastDist_rcpp(SEXP seqsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< CharacterVector >::type seqs(seqsSEXP);
+    rcpp_result_gen = Rcpp::wrap(fastDist_rcpp(seqs));
+    return rcpp_result_gen;
+END_RCPP
+}
 
 static const R_CallMethodDef CallEntries[] = {
     {"_scoper_pairwiseMutMatrixRcpp", (DL_FUNC) &_scoper_pairwiseMutMatrixRcpp, 3},
+    {"_scoper_countSeqsWithInvalidBases_rcpp", (DL_FUNC) &_scoper_countSeqsWithInvalidBases_rcpp, 1},
+    {"_scoper_fastDist_rcpp", (DL_FUNC) &_scoper_fastDist_rcpp, 1},
     {NULL, NULL, 0}
 };
 
diff --git a/src/fastDist.cpp b/src/fastDist.cpp
new file mode 100644
index 0000000..0b89d01
--- /dev/null
+++ b/src/fastDist.cpp
@@ -0,0 +1,129 @@
+#include <Rcpp.h>
+using namespace Rcpp;
+
+inline uint8_t code_char(char c){
+  switch(c){
+    case 'A': return 0; case 'C': return 1; case 'G': return 2; case 'T': return 3;
+    case 'N': return 4; case '?': return 5;
+    default:  return 255;
+  }
+}
+
+// [[Rcpp::export]]
+int countSeqsWithInvalidBases_rcpp(CharacterVector seqs) {
+  int N = seqs.size();
+  int bad = 0;
+
+  for (int i = 0; i < N; ++i) {
+    if (seqs[i] == NA_STRING) {
+      bad++;
+      continue;
+    }
+
+    std::string s = as<std::string>(seqs[i]);
+    bool invalid = false;
+    for (char ch : s) {
+      char up = (char)std::toupper((unsigned char)ch);
+      if (code_char(up) == 255) {
+        invalid = true;
+        break;
+      }
+    }
+
+    if (invalid) bad++;
+  }
+
+  return bad;
+}
+
+// [[Rcpp::export]]
+IntegerMatrix fastDist_rcpp(CharacterVector seqs) {
+  int N = seqs.size();
+
+  if (N == 0) stop("empty input");
+
+  std::string s0 = as<std::string>(seqs[0]);
+  int L = (int)s0.size();
+  for (int i = 0; i < N; ++i) {
+    if ((int)std::string(as<std::string>(seqs[i])).size() != L)
+      stop("All sequences must have the same length");
+  }
+
+  // encode to uint8: N x L, row-major in a flat vector
+  std::vector<uint8_t> enc((size_t)N * L);
+  for (int i = 0; i < N; ++i) {
+    std::string s = as<std::string>(seqs[i]);
+    for (int p = 0; p < L; ++p) {
+      uint8_t c = code_char(s[p]);
+      if (c == 255) stop("Only A,C,G,T,N,? are allowed");
+      enc[(size_t)i * L + p] = c;
+    }
+  }
+
+  IntegerMatrix Mmatch(N, N); // zero-initialized
+
+  for (int p = 0; p < L; ++p) {
+    std::vector<int> A, Cb, G, Tt, Ns, Q;
+    A.reserve(N); Cb.reserve(N); G.reserve(N); Tt.reserve(N); Ns.reserve(N); Q.reserve(N);
+
+    // bucket row indices by symbol at column p
+    for (int i = 0; i < N; ++i) {
+      switch (enc[(size_t)i * L + p]) {
+        case 0: A.push_back(i);  break;
+        case 1: Cb.push_back(i); break;
+        case 2: G.push_back(i);  break;
+        case 3: Tt.push_back(i); break;
+        case 4: Ns.push_back(i); break;
+        case 5: Q.push_back(i);  break;
+      }
+    }
+
+    // column-major safe updaters: iterate over j (column) outermost,
+    // then use a pointer to column j and bump rows i.
+    auto bump_within = [&](const std::vector<int>& v){
+      int m = (int)v.size();
+      for (int jj = 0; jj < m; ++jj) {
+        int j = v[jj];
+        int* colj = &Mmatch(0, j);        // pointer to column j
+        for (int ii = 0; ii < m; ++ii) {
+          colj[v[ii]] += 1;               // increment (i = v[ii], j)
+        }
+      }
+    };
+
+    auto bump_pairs = [&](const std::vector<int>& X, const std::vector<int>& Y){
+      int mx = (int)X.size(), my = (int)Y.size();
+      for (int jj = 0; jj < my; ++jj) {
+        int j = Y[jj];
+        int* colj = &Mmatch(0, j);        // pointer to column j
+        for (int ii = 0; ii < mx; ++ii) {
+          colj[X[ii]] += 1;               // increment (i = X[ii], j)
+        }
+      }
+    };
+
+    // same known base
+    bump_within(A);  bump_within(Cb);  bump_within(G);  bump_within(Tt);
+
+    // N with known (both directions)
+    std::vector<int> K; K.reserve(A.size()+Cb.size()+G.size()+Tt.size());
+    K.insert(K.end(), A.begin(),  A.end());
+    K.insert(K.end(), Cb.begin(), Cb.end());
+    K.insert(K.end(), G.begin(),  G.end());
+    K.insert(K.end(), Tt.begin(), Tt.end());
+    bump_pairs(Ns, K);
+    bump_pairs(K, Ns);
+
+    // ? with ?
+    bump_within(Q);
+  }
+
+  // convert matches -> distances
+  for (int i = 0; i < N; ++i) {
+    for (int j = 0; j < N; ++j) {
+      Mmatch(i, j) = L - Mmatch(i, j);
+    }
+    Mmatch(i, i) = 0;
+  }
+  return Mmatch;
+}
diff --git a/tests/testthat/test_clone.R b/tests/testthat/test_clone.R
index 2293311..49a30c7 100644
--- a/tests/testthat/test_clone.R
+++ b/tests/testthat/test_clone.R
@@ -70,6 +70,462 @@ test_that("Test hierarchicalClones", {
     }
 })
 
+#### clone - hierarchicalClones with IUPAC parameter ####
+
+test_that("Test hierarchicalClones with IUPAC parameter", {
+    # IUPAC and max_n serve different purposes:
+    # - IUPAC: Controls (1) validation (which characters allowed) and 
+    #          (2) distance calculation method (fast Hamming vs IUPAC-aware scoring)
+    # - max_n: Filters sequences by counting non-ATCG characters using regex "[^ATCG]"
+    #          (includes N, ?, and all IUPAC codes - whatever passed validation)
+    #
+    # Processing order: Validation -> Filtering -> Distance calculation
+    #
+    # Key use cases tested:
+    # 1. IUPAC=FALSE, max_n=0: Strict ATCG-only mode with fast distance calculation
+    # 2. IUPAC=TRUE, max_n=0: IUPAC-aware distance (but filters out IUPAC seqs anyway)
+    # 3. IUPAC=TRUE, max_n=1+: Main use case - IUPAC codes with proper scoring
+    # 4. IUPAC=FALSE, max_n=1+: Allow N/? but reject IUPAC codes (validation blocks them)
+    # 5. IUPAC=FALSE/TRUE with standard bases: Backward compatibility (same results)
+    
+    # Create test data with IUPAC ambiguity codes
+    # With threshold=0.15 and length 15, distance > 0.15 requires >=3 mutations (3/15=0.2)
+    db_iupac <- data.frame(
+        sequence_id = paste0("seq", 1:12),
+        v_call = rep("IGHV1-1*01", 12),
+        j_call = rep("IGHJ1*01", 12),
+        # Use IUPAC ambiguity codes: R(A/G), Y(C/T), W(A/T), S(C/G), M(A/C), K(G/T)
+        junction = c(
+            # Clone 1: sequences similar to TGTGCAAGCTACTGG (0-1 mutations apart)
+            "TGTGCRAGCTACTGG", # R = A or G at pos 5
+            "TGTGCRAGCTACTGG", # identical to seq1
+            "TGTGCAAGCTACTGG", # standard bases only
+            "TGTGCYAGCTACTGG", # Y = C or T at pos 5, 1 mutation from seq3
+            "TGTGCYAGCTACTGG", # identical to seq4
+            "TGTGCMAGCTACTGG", # M = A or C at pos 5, 1 mutation from seq3
+            "TGTGCWAGCTACTGG", # W = A or T at pos 5, 1 mutation from seq3
+            
+            # Clone 2: sequences similar to ACGTTTGGCCAAACC (3+ mutations from Clone 1)
+            "ACGTTTGGCCAAACC", # standard bases, distant from Clone 1
+            "ACGTTTGGCCAAACC", # identical to seq8
+            "ACGKTTGGCCAAACC", # K = G or T at pos 4, 1 mutation from seq8
+            "ACGRTTGGCCAAACC", # R = A or G at pos 4, 1 mutation from seq8
+            "ACGTTTSGCCAAACC"  # S = C or G at pos 7, 1 mutation from seq8
+        ),
+        locus = rep("IGH", 12),
+        stringsAsFactors = FALSE
+    )
+
+    # Test 1: IUPAC=FALSE should reject IUPAC characters at validation stage
+    # Validation (IUPAC=FALSE): Only allows A,T,C,G,N,? - REJECTS IUPAC codes (R,Y,W,S,M,K,etc)
+    # Result: Sequences with IUPAC codes fail validation and raise error before any filtering
+    # Use case: Strict ATCG-only mode with fast Hamming distance (fastDist_rcpp)
+    expect_error(
+        hierarchicalClones(db_iupac,
+            threshold = 0.15,
+            method = "nt", linkage = "single",
+            junction = "junction",
+            v_call = "v_call", j_call = "j_call",
+            IUPAC = FALSE,  # Use fast Hamming distance for ATCG only
+            summarize_clones = FALSE
+        ),
+        "Invalid sequence characters"
+    )
+
+    # Test 2: IUPAC=TRUE with max_n=0 - validates with IUPAC but filters them out
+    # 1. Validation (IUPAC=TRUE): Allows standard bases, N, ?, and IUPAC codes - all sequences pass
+    # 2. Filtering (max_n=0): Counts non-ATCG using "[^ATCG]" - removes all sequences with any non-ATCG
+    # Result: Only seq3, seq8 and seq9 (standard bases only) remain after filtering
+    expect_warning(
+        expect_message(
+            db_result2 <- hierarchicalClones(db_iupac,
+                threshold = 0.15,
+                method = "nt", linkage = "single",
+                junction = "junction",
+                v_call = "v_call", j_call = "j_call",
+                IUPAC = TRUE,   # Use IUPAC-aware distance calculation
+                max_n = 0,      # Filter out sequences with ANY non-ATCG chars
+                summarize_clones = FALSE
+            ),
+            "Running defineClonesScoper in bulk mode and only keep heavy chains"
+        ),
+        "Removed 9 sequences with non ATCG characters."
+    )
+
+    # Only 3 sequences with standard bases remain
+    expect_equal(nrow(db_result2), 3)
+    expect_equal(sort(db_result2$sequence_id), c("seq3", "seq8", "seq9"))
+    # They are in different clones (distant sequences)
+    expect_equal(length(unique(db_result2$clone_id)), 2)
+
+    # Test 3a: Create dataset with N characters to distinguish from IUPAC codes
+    db_with_n <- db_iupac
+    db_with_n[13,] <- NA
+    db_with_n$junction[13] <- "TGTGCNAGCTACTGG"  # Add seq13 with N
+    db_with_n$sequence_id <- c(db_with_n$sequence_id[1:12], "seq13")
+    db_with_n$v_call <- c(db_with_n$v_call[1:12], "IGHV1-1*01")
+    db_with_n$j_call <- c(db_with_n$j_call[1:12], "IGHJ1*01")
+    db_with_n$locus <- c(db_with_n$locus[1:12], "IGH")
+    
+    # Test 3b: IUPAC=TRUE with max_n=1 - allows up to 1 non-ATCG character per sequence
+    # This is the main use case: proper IUPAC distance calculation with ambiguity codes
+    # 1. Validation (IUPAC=TRUE): Allows standard bases (A,T,C,G) plus N, ?, and IUPAC codes (R,Y,W,S,M,K,etc)
+    # 2. Filtering (max_n=1): Counts non-ATCG using "[^ATCG]" - keeps sequences with ≤1 such character
+    # 3. Distance: Uses IUPAC-aware scoring (alakazam::pairwiseDist) for proper ambiguity handling
+    expect_message(
+        db_result3 <- hierarchicalClones(db_with_n,
+            threshold = 0.15,
+            method = "nt", linkage = "single",
+            junction = "junction",
+            v_call = "v_call", j_call = "j_call",
+            IUPAC = TRUE,   # Use IUPAC-aware distance calculation
+            max_n = 1,      # Allow up to 1 non-ATCG character (IUPAC or N)
+            summarize_clones = FALSE
+        ),
+        "Running defineClonesScoper in bulk mode and only keep heavy chains"
+    )
+    
+    # All 13 sequences kept (each has ≤1 non-ATCG character)
+    expect_equal(nrow(db_result3), 13)
+    
+    # Verify that we have 2 different clusters (Clone 1 and Clone 2)
+    n_clones <- length(unique(db_result3$clone_id))
+    expect_equal(n_clones, 2)
+    
+    # Identical sequences with IUPAC codes should cluster together
+    # Note: rows may be reordered, so reference by sequence_id
+    get_clone <- function(seq_id) db_result3$clone_id[db_result3$sequence_id == seq_id]
+    
+    expect_equal(get_clone("seq1"), get_clone("seq2"))  # seq1=seq2 (both R)
+    expect_equal(get_clone("seq4"), get_clone("seq5"))  # seq4=seq5 (both Y)
+    expect_equal(get_clone("seq8"), get_clone("seq9"))  # seq8=seq9 (standard)
+    
+    # seq13 (with N) should cluster with Clone 1 (similar to seq3)
+    expect_equal(get_clone("seq3"), get_clone("seq13"))
+    
+    # Verify two distinct clones based on biological distance
+    clone1_id <- get_clone("seq3")  # seq3 in Clone 1
+    clone2_id <- get_clone("seq8")  # seq8 in Clone 2
+    expect_true(clone1_id != clone2_id)
+    expect_true(all(sapply(paste0("seq", 1:7), get_clone) == clone1_id))   # Clone 1: seq1-7
+    expect_true(all(sapply(paste0("seq", 8:12), get_clone) == clone2_id))  # Clone 2: seq8-12
+    expect_true(get_clone("seq13") == clone1_id)                           # seq13 joins Clone 1
+
+    # Test 4: IUPAC=FALSE with max_n=1+ - allows N/? but rejects other IUPAC codes
+    # This demonstrates the two-stage process: validation -> filtering
+    # 1. Validation (IUPAC=FALSE): Allows A,T,C,G,N,? but REJECTS IUPAC codes (R,Y,W,S,M,K,etc)
+    # 2. Filtering (max_n): Counts non-ATCG using "[^ATCG]" regex on sequences that passed validation
+    # Use case: Tolerate low-quality positions (N,?) with fast Hamming distance, but no ambiguity codes
+    db_with_n_only <- data.frame(
+        sequence_id = paste0("seq", 1:5),
+        v_call = rep("IGHV1-1*01", 5),
+        j_call = rep("IGHJ1*01", 5),
+        junction = c(
+            "TGTGCAAGCTACTGG",  # standard bases only
+            "TGTGCNAGCTACTGG",  # 1 N (allowed with max_n=1)
+            "TGTGC?AGCTACTGG",  # 1 ? (allowed with max_n=1)
+            "TGTGCNNGCTACTGG",  # 2 N's (filtered with max_n=1)
+            "TGTGCRAGCTACTGG"   # IUPAC R code (should fail validation)
+        ),
+        locus = rep("IGH", 5),
+        stringsAsFactors = FALSE
+    )
+    
+    # Should reject at validation stage due to IUPAC code (R) in seq5
+    expect_error(
+        hierarchicalClones(db_with_n_only,
+            threshold = 0.15,
+            method = "nt", linkage = "single",
+            junction = "junction",
+            v_call = "v_call", j_call = "j_call",
+            IUPAC = FALSE,  # Reject IUPAC codes
+            max_n = 1,      # Allow up to 1 N or ?
+            summarize_clones = FALSE
+        ),
+        "Invalid sequence characters"
+    )
+    
+    # Test with only N/? (no IUPAC codes) - should succeed
+    db_n_only_valid <- db_with_n_only[1:4, ]  # Remove seq5 with IUPAC code
+    
+    expect_warning(
+        expect_message(
+            db_result5 <- hierarchicalClones(db_n_only_valid,
+                threshold = 0.15,
+                method = "nt", linkage = "single",
+                junction = "junction",
+                v_call = "v_call", j_call = "j_call",
+                IUPAC = FALSE,  # Use fast Hamming distance
+                max_n = 1,      # Allow up to 1 N or ?
+                summarize_clones = FALSE
+            ),
+            "Running defineClonesScoper in bulk mode and only keep heavy chains"
+        ),
+        "Removed 1 sequences with non ATCG characters."  # seq4 with 2 N's filtered
+    )
+    
+    # Should keep seq1, seq2, seq3 (0-1 non-ATCG characters each)
+    expect_equal(nrow(db_result5), 3)
+    expect_equal(sort(db_result5$sequence_id), c("seq1", "seq2", "seq3"))
+    # All are similar sequences, should be in same clone
+    expect_equal(length(unique(db_result5$clone_id)), 1)
+
+    # Test 5: Standard bases work with both IUPAC=TRUE and IUPAC=FALSE
+    # This demonstrates backward compatibility and performance consideration:
+    # - IUPAC=FALSE: Uses fast Hamming distance (fastDist_rcpp) - faster
+    # - IUPAC=TRUE: Uses IUPAC-aware scoring (alakazam::pairwiseDist) - slower but handles ambiguity
+    # For standard bases (ATCG), results should be identical
+    db_standard <- ExampleDb[1:50, ]
+
+    # Verify db_standard contains only standard bases (not IUPAC ambiguity codes)
+    standard_chars <- c("A", "T", "C", "G", "N", "?")
+    .is_standard <- function(x) {
+        all(unique(strsplit(x, "")[[1]]) %in% standard_chars)
+    }
+    all_standard <- sapply(db_standard$junction, .is_standard)
+    expect_true(all(all_standard),
+        info = "ExampleDb contains IUPAC ambiguity codes - update test data or test expectations"
+    )
+
+    expect_message(
+        db_result_false <- hierarchicalClones(db_standard,
+            threshold = 0.15,
+            method = "nt", linkage = "single",
+            junction = "junction",
+            v_call = "v_call", j_call = "j_call",
+            IUPAC = FALSE,  # Fast Hamming distance for ATCG only
+            summarize_clones = FALSE
+        ),
+        "Running defineClonesScoper in bulk mode and only keep heavy chains"
+    )
+
+    expect_message(
+        db_result_true <- hierarchicalClones(db_standard,
+            threshold = 0.15,
+            method = "nt", linkage = "single",
+            junction = "junction",
+            v_call = "v_call", j_call = "j_call",
+            IUPAC = TRUE,   # IUPAC-aware distance (slower but handles ambiguity)
+            summarize_clones = FALSE
+        ),
+        "Running defineClonesScoper in bulk mode and only keep heavy chains"
+    )
+
+    # Results should be identical for standard bases
+    # (IUPAC just changes the distance calculation method, not the result)
+    expect_equal(nrow(db_result_false), nrow(db_result_true))
+    expect_true(all(!is.na(db_result_false$clone_id)))
+    expect_true(all(!is.na(db_result_true$clone_id)))
+    # Clone assignments should be the same (though IDs may differ)
+    expect_equal(length(unique(db_result_false$clone_id)),
+                 length(unique(db_result_true$clone_id)))
+    
+    # Test 6: IUPAC=TRUE with max_n=NULL - no filtering, process all sequences
+    # This is the most permissive option for IUPAC data
+    # 1. Validation (IUPAC=TRUE): Allows standard bases, N, ?, and all IUPAC codes
+    # 2. Filtering (max_n=NULL): No filtering - all sequences kept regardless of character count
+    # 3. Distance: Uses IUPAC-aware scoring for proper ambiguity handling
+    # Use case: Data with extensive IUPAC codes where you want to process everything
+    
+    # Create test data with multiple IUPAC codes per sequence
+    db_iupac_multi <- data.frame(
+        sequence_id = paste0("seq", 1:8),
+        v_call = rep("IGHV1-1*01", 8),
+        j_call = rep("IGHJ1*01", 8),
+        junction = c(
+            # Clone 1: sequences with multiple IUPAC codes, similar to each other
+            "TGTGCRAGCTRYCTGG",  # R at pos 6, R at pos 12, Y at pos 13 (3 non-ATCG)
+            "TGTGCRAGCTRYCTGG",  # identical to seq1
+            "TGTGCRAGCTWYNMGG",  # R at pos 6, W at pos 12, Y at pos 13, N at pos 14, M at pos 15 (5 non-ATCG)
+            "TGTGCAAGCTACCTGG",  # standard bases only (0 non-ATCG)
+            # Clone 2: sequences distant from Clone 1, also with IUPAC codes
+            "ACGKTTRGCCNNNYYY",  # K, R, 3 N's, 3 Y's (8 non-ATCG characters!)
+            "ACGKTTRGCCNNNYYY",  # identical to seq5
+            "ACGGTTAGCCAAACCC",  # standard bases only (0 non-ATCG)
+            "ACGKTTSGCCNNNYYY"   # K, S, 3 N's, 3 Y's (8 non-ATCG)
+        ),
+        locus = rep("IGH", 8),
+        stringsAsFactors = FALSE
+    )
+    
+    # With max_n=NULL, all sequences should be kept regardless of IUPAC code count
+    expect_message(
+        db_result6 <- hierarchicalClones(db_iupac_multi,
+            threshold = 0.15,
+            method = "nt", linkage = "single",
+            junction = "junction",
+            v_call = "v_call", j_call = "j_call",
+            IUPAC = TRUE,     # Use IUPAC-aware distance calculation
+            max_n = NULL,     # No filtering - process all sequences
+            summarize_clones = FALSE
+        ),
+        "Running defineClonesScoper in bulk mode and only keep heavy chains"
+    )
+    
+    # All 8 sequences kept (no filtering applied)
+    expect_equal(nrow(db_result6), 8)
+    expect_equal(sort(db_result6$sequence_id), paste0("seq", 1:8))
+    
+    # Should have 2 clones based on biological distance
+    expect_equal(length(unique(db_result6$clone_id)), 2)
+    
+    # Helper function to get clone ID by sequence ID
+    get_clone6 <- function(seq_id) db_result6$clone_id[db_result6$sequence_id == seq_id]
+    
+    # Verify identical sequences cluster together despite many IUPAC codes
+    expect_equal(get_clone6("seq1"), get_clone6("seq2"))  # both identical with 3 IUPAC
+    expect_equal(get_clone6("seq5"), get_clone6("seq6"))  # both identical with 8 IUPAC
+    
+    # Verify Clone 1 (seq1-4) and Clone 2 (seq5-8) are distinct
+    clone1_id <- get_clone6("seq1")
+    clone2_id <- get_clone6("seq5")
+    expect_true(clone1_id != clone2_id)
+    expect_true(all(sapply(paste0("seq", 1:4), get_clone6) == clone1_id))
+    expect_true(all(sapply(paste0("seq", 5:8), get_clone6) == clone2_id))
+    
+})
+
+#### clone - hierarchicalClones, IUPAC code in sequence 1001 ####
+
+test_that("Test hierarchicalClones with IUPAC code beyond validation window", {
+    # Validation only inspects the first 1000 sequences (head(..., 1000)).
+    # A sequence containing an IUPAC code at position 1001 therefore escapes the check
+    # regardless of IUPAC=TRUE/FALSE.
+    # What happens next depends on max_n and IUPAC:
+    #   - max_n=0:    prepare_db filters out non-ATCG sequences -> warning, no error (both IUPAC=T/F)
+    #   - max_n=NULL, IUPAC=FALSE: no filtering -> fastDist_rcpp encounters R -> error
+    #   - max_n=NULL, IUPAC=TRUE:  no filtering -> pairwiseDist handles R correctly -> succeeds
+
+    # Build 1001-row dataset: rows 1-1000 are standard ATCG, row 1001 has IUPAC code R
+    n_std <- 1000
+    db_iupac_1001 <- data.frame(
+        sequence_id = paste0("seq", seq_len(n_std + 1)),
+        v_call      = rep("IGHV1-1*01", n_std + 1),
+        j_call      = rep("IGHJ1*01",   n_std + 1),
+        junction    = c(rep("TGTGCAAGCTACTGG", n_std),  # rows 1-1000: ATCG only
+                        "TGTGCRAGCTACTGG"),              # row 1001: R is an IUPAC code
+        locus       = rep("IGH", n_std + 1),
+        stringsAsFactors = FALSE
+    )
+
+    # Confirm row 1001 is the one with the IUPAC code (sanity check)
+    expect_equal(db_iupac_1001$junction[1001], "TGTGCRAGCTACTGG")
+
+    # Same data in reverse order to put IUPAC code in row 1
+    db_iupac_1 <- db_iupac_1001[1001:1, ]
+
+    # Test 1: IUPAC=FALSE, max_n=0 (default)
+    # - Validation (first 1000 rows): passes - no IUPAC codes in rows 1-1000
+    # - max_n=0 filtering: removes row 1001 (R is non-ATCG) and issues a warning
+    # - Result: runs successfully; row 1001 removed
+    expect_warning(
+        expect_message(
+            db_result1 <- hierarchicalClones(db_iupac_1001,
+                threshold = 0.15,
+                method = "nt", linkage = "single",
+                junction = "junction",
+                v_call = "v_call", j_call = "j_call",
+                IUPAC = FALSE,
+                max_n = 0,
+                summarize_clones = FALSE
+            ),
+            "Running defineClonesScoper in bulk mode and only keep heavy chains"
+        ),
+        "Removed 1 sequences with non ATCG characters."
+    )
+    expect_equal(nrow(db_result1), n_std)
+    expect_false("seq1001" %in% db_result1$sequence_id)
+
+    # Same data in reverse order now throws an error. Potentially confusing for users?
+    expect_error(
+        db_result1 <- hierarchicalClones(db_iupac_1,
+            threshold = 0.15,
+            method = "nt", linkage = "single",
+            junction = "junction",
+            v_call = "v_call", j_call = "j_call",
+            IUPAC = FALSE,
+            max_n = 0,
+            summarize_clones = FALSE
+        ),
+        "Invalid sequence characters in the junction column"
+    )
+
+    # Test 2: IUPAC=FALSE, max_n=NULL
+    # - Validation (first 1000 rows): passes - no IUPAC codes in rows 1-1000
+    # - max_n=NULL: no filtering -> row 1001 reaches fastDist_rcpp
+    # - fastDist_rcpp stops with an error because R is not A,C,G,T,N,?
+    expect_error(
+        suppressMessages(
+            hierarchicalClones(db_iupac_1001,
+                threshold = 0.15,
+                method = "nt", linkage = "single",
+                junction = "junction",
+                v_call = "v_call", j_call = "j_call",
+                IUPAC = FALSE,
+                max_n = NULL,
+                summarize_clones = FALSE
+            )
+        ),
+        "Only A,C,G,T,N,\\? are allowed"
+    )
+
+    # Same data in reverse also throws an error.
+    expect_error(
+        suppressMessages(
+            hierarchicalClones(db_iupac_1,
+                threshold = 0.15,
+                method = "nt", linkage = "single",
+                junction = "junction",
+                v_call = "v_call", j_call = "j_call",
+                IUPAC = FALSE,
+                max_n = NULL,
+                summarize_clones = FALSE
+            )
+        ),
+        "Invalid sequence characters in the junction column"
+    )
+
+    # Test 3: IUPAC=TRUE, max_n=NULL
+    # - Validation (first 1000 rows): passes
+    # - max_n=NULL: no filtering -> row 1001 is kept
+    # - Distance: alakazam::pairwiseDist handles IUPAC codes correctly -> no error
+    # - Result: all 1001 sequences processed; seq1001 included in output
+    expect_message(
+        db_result3 <- hierarchicalClones(db_iupac_1001,
+            threshold = 0.15,
+            method = "nt", linkage = "single",
+            junction = "junction",
+            v_call = "v_call", j_call = "j_call",
+            IUPAC = TRUE,
+            max_n = NULL,
+            summarize_clones = FALSE
+        ),
+        "Running defineClonesScoper in bulk mode and only keep heavy chains"
+    )
+    expect_equal(nrow(db_result3), n_std + 1)
+    expect_true("seq1001" %in% db_result3$sequence_id)
+    # seq1001 (TGTGCRAGCTACTGG) is within distance 0.15 of the standard sequences
+    # (TGTGCAAGCTACTGG): R matches A under IUPAC scoring -> same clone
+    expect_equal(length(unique(db_result3$clone_id)), 1)
+
+    # Same data in reverse order gives the same result (IUPAC=TRUE allows R, max_n=NULL means no filtering)
+    expect_message(
+        db_result4 <- hierarchicalClones(db_iupac_1,
+            threshold = 0.15,
+            method = "nt", linkage = "single",
+            junction = "junction",
+            v_call = "v_call", j_call = "j_call",
+            IUPAC = TRUE,
+            max_n = NULL,
+            summarize_clones = FALSE
+        ),
+        "Running defineClonesScoper in bulk mode and only keep heavy chains"
+    )
+    # Results should be identical regardless of row order
+    db_result4_ordered <- db_result4[match(db_result3$sequence_id, db_result4$sequence_id), ]
+    expect_equal(db_result3, db_result4_ordered,  check.attributes = FALSE)
+})
+
 #### clone - spectralClones - novj method ####
 
 test_that("Test spectralClones - novj", {
@@ -407,7 +863,7 @@ test_that("Test hierarchicalClones only_heavy and first", {
     # expect sequences from cells 1 and 3 in clone 1, and
     # sequences from cell 2 in clone 2
     clones_split_F_th0@db %>%
-        mutate(expected_clone_id = dplyr::case_when(
+        dplyr::mutate(expected_clone_id = dplyr::case_when(
             cell_id %in% c(1, 3) ~ "1",
             cell_id %in% c(2) ~ "2",
             TRUE ~ NA_character_
@@ -484,7 +940,7 @@ test_that("Test hierarchicalClones only_heavy and first", {
     # sequences from cell 2 in clone 2, regardless of
     # light chains
     clones_split_F_th0_2l@db %>%
-        mutate(expected_clone_id = dplyr::case_when(
+        dplyr::mutate(expected_clone_id = dplyr::case_when(
             cell_id %in% c(1, 3) ~ "1",
             cell_id %in% c(2) ~ "2",
             TRUE ~ NA_character_
@@ -558,3 +1014,5 @@ test_that("Testing split_light warnings for all cloning mehtods", {
                                 cell_id = "cell_id", split_light = TRUE))
    expect_equal(db_nsplit[['clone_id']], db_split[['clone_id']])
 })
+
+