From 502993751ee1e059953fbc6494fca1fd7647e9dd Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Tue, 8 Apr 2025 11:47:05 -0400
Subject: [PATCH 01/35] add update rate
---
R/buildPedigree.R | 15 +-
R/convertPedigree.R | 2 +-
R/makeLinks.R | 217 ++++++++++++-----------
R/readGedcom.R | 82 +++++----
data-raw/benchmark.R | 4 +-
data-raw/df_inbreeding.R | 2 +-
data-raw/df_potter.R | 8 +-
man/collapseNames.Rd | 16 ++
man/readGedcom.Rd | 3 +
tests/testthat/test-computeRelatedness.R | 13 +-
tests/testthat/test-makeLinks.R | 3 +-
tests/testthat/test-readPedigrees.R | 10 +-
tests/testthat/test-summarizePedigrees.R | 24 ++-
13 files changed, 235 insertions(+), 164 deletions(-)
create mode 100644 man/collapseNames.Rd
diff --git a/R/buildPedigree.R b/R/buildPedigree.R
index 3d94ecad..98dfbaa3 100644
--- a/R/buildPedigree.R
+++ b/R/buildPedigree.R
@@ -88,7 +88,8 @@ ped2graph <- function(ped,
...) {
# Check ped/data.fram
if (!inherits(ped, "data.frame")) {
- stop("ped should be a data.frame or inherit to a data.frame")}
+ stop("ped should be a data.frame or inherit to a data.frame")
+ }
# Handle adjacent argument
adjacent <- match.arg(tolower(adjacent)[1],
choices = c(
@@ -182,8 +183,10 @@ ped2maternal <- function(ped, personID = "ID",
momID = "momID", dadID = "dadID",
matID = "matID", ...) {
# Call to wrapper function
- .ped2id(ped = ped, personID = personID, momID = momID,
- dadID = dadID, famID = matID, type = "mothers")
+ .ped2id(
+ ped = ped, personID = personID, momID = momID,
+ dadID = dadID, famID = matID, type = "mothers"
+ )
}
#' Add a paternal line ID variable to a pedigree
@@ -203,6 +206,8 @@ ped2paternal <- function(ped, personID = "ID",
momID = "momID", dadID = "dadID",
patID = "patID", ...) {
# Call to wrapper function
- .ped2id(ped = ped, personID = personID, momID = momID,
- dadID = dadID, famID = patID, type = "fathers")
+ .ped2id(
+ ped = ped, personID = personID, momID = momID,
+ dadID = dadID, famID = patID, type = "fathers"
+ )
}
diff --git a/R/convertPedigree.R b/R/convertPedigree.R
index 0f454acc..9cab93f0 100644
--- a/R/convertPedigree.R
+++ b/R/convertPedigree.R
@@ -224,7 +224,7 @@ ped2com <- function(ped, component,
} else {
# isChild is the 'S' matrix from RAM
- isChild <- isChild(isChild_method=isChild_method, ped=ped)
+ isChild <- isChild(isChild_method = isChild_method, ped = ped)
if (saveable) {
saveRDS(isChild, file = checkpoint_files$isChild)
diff --git a/R/makeLinks.R b/R/makeLinks.R
index 96768fd5..d2dc6a65 100644
--- a/R/makeLinks.R
+++ b/R/makeLinks.R
@@ -50,23 +50,28 @@ com2links <- function(
}
# Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided.
if (!is.null(ad_ped_matrix)) {
- ad_ped_matrix <- validate_and_convert_matrix(mat=ad_ped_matrix,
- name = "ad_ped_matrix")
+ ad_ped_matrix <- validate_and_convert_matrix(
+ mat = ad_ped_matrix,
+ name = "ad_ped_matrix"
+ )
}
# Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided.
if (!is.null(cn_ped_matrix)) {
- cn_ped_matrix <- validate_and_convert_matrix(mat=cn_ped_matrix,
- name="cn_ped_matrix",
- ensure_symmetric = TRUE)
+ cn_ped_matrix <- validate_and_convert_matrix(
+ mat = cn_ped_matrix,
+ name = "cn_ped_matrix",
+ ensure_symmetric = TRUE
+ )
}
# Validate and process mit_ped_matrix: convert and ensure binary values.
if (!is.null(mit_ped_matrix)) {
-
- mit_ped_matrix <- validate_and_convert_matrix(mat=mit_ped_matrix,
- name="mit_ped_matrix",force_binary = TRUE,
- ensure_symmetric = TRUE)
+ mit_ped_matrix <- validate_and_convert_matrix(
+ mat = mit_ped_matrix,
+ name = "mit_ped_matrix", force_binary = TRUE,
+ ensure_symmetric = TRUE
+ )
}
# --- Build IDs and Prepare Matrix Pointers ---
@@ -516,109 +521,117 @@ com2links.legacy <- function(
verbose = FALSE,
outcome_name = "data",
...) {
- # --- Legacy Mode ---
- if (verbose) {
- message("Using legacy mode")
+ # --- Legacy Mode ---
+ if (verbose) {
+ message("Using legacy mode")
+ }
+ # In legacy mode, convert matrices to the expected symmetric formats.
+
+ # load(paste0(outcome_name,'_dataBiggestCnPedigree.Rdata'))
+ # biggestCnPed <- methods::as(biggestCnPed, "symmetricMatrix")
+ # load(paste0(outcome_name,'_dataBiggestPedigree.Rdata'))
+ # load(paste0(outcome_name,'_dataBiggestMtPedigree.Rdata'))
+
+ # rel_pairs_file <- paste0(outcome_name,'_datacnmitBiggestRelatedPairsTake3.csv')
+
+ biggestMtPed <- mit_ped_matrix
+ remove(mit_ped_matrix)
+ biggestCnPed <- methods::as(cn_ped_matrix, "symmetricMatrix")
+ remove(cn_ped_matrix)
+ biggestPed <- ad_ped_matrix
+ remove(ad_ped_matrix)
+ biggestMtPed@x[biggestMtPed@x > 0] <- 1
+
+ # Set the output file name.
+ if (exists("rel_pairs_file")) {
+ fname <- rel_pairs_file
+ } else {
+ fname <- paste0(outcome_name, "_dataBiggestRelatedPairsTake2.csv")
+ }
+ # Initialize the output file with headers.
+ ds <- data.frame(
+ ID1 = numeric(0), ID2 = numeric(0),
+ addRel = numeric(0),
+ mitRel = numeric(0), cnuRel = numeric(0)
+ )
+
+ utils::write.table(ds,
+ file = fname, sep = ",",
+ append = FALSE, row.names = FALSE
+ )
+
+ # Extract IDs from the common nuclear matrix.
+ ids <- as.numeric(dimnames(biggestCnPed)[[1]])
+
+ # Extract pointers from the legacy matrices.
+ newColPos1 <- biggestPed@p + 1L
+ iss1 <- biggestPed@i + 1L
+ newColPos2 <- biggestMtPed@p + 1L
+ iss2 <- biggestMtPed@i + 1L
+ newColPos3 <- biggestCnPed@p + 1L
+ iss3 <- biggestCnPed@i + 1L
+ nc <- ncol(biggestPed)
+
+ # Process each individual.
+ for (j in 1L:nc) {
+ ID2 <- ids[j]
+ ncp1 <- newColPos1[j]
+ ncp1p <- newColPos1[j + 1L]
+ cond1 <- ncp1 < ncp1p
+ if (cond1) {
+ vv1 <- ncp1:(ncp1p - 1L)
+ iss1vv <- iss1[vv1]
}
- # In legacy mode, convert matrices to the expected symmetric formats.
-
- # load(paste0(outcome_name,'_dataBiggestCnPedigree.Rdata'))
- # biggestCnPed <- methods::as(biggestCnPed, "symmetricMatrix")
- # load(paste0(outcome_name,'_dataBiggestPedigree.Rdata'))
- # load(paste0(outcome_name,'_dataBiggestMtPedigree.Rdata'))
-
- # rel_pairs_file <- paste0(outcome_name,'_datacnmitBiggestRelatedPairsTake3.csv')
-
- biggestMtPed <- mit_ped_matrix
- remove(mit_ped_matrix)
- biggestCnPed <- methods::as(cn_ped_matrix, "symmetricMatrix")
- remove(cn_ped_matrix)
- biggestPed <- ad_ped_matrix
- remove(ad_ped_matrix)
- biggestMtPed@x[biggestMtPed@x > 0] <- 1
-
- # Set the output file name.
- if (exists("rel_pairs_file")) {
- fname <- rel_pairs_file
- } else {
- fname <- paste0(outcome_name, "_dataBiggestRelatedPairsTake2.csv")
+ ncp2 <- newColPos2[j]
+ ncp2p <- newColPos2[j + 1L]
+ cond2 <- ncp2 < ncp2p
+ if (cond2) {
+ vv2 <- ncp2:(ncp2p - 1L)
+ iss2vv <- iss2[vv2]
+ }
+ ncp3 <- newColPos3[j]
+ ncp3p <- newColPos3[j + 1L]
+ cond3 <- ncp3 < ncp3p
+ if (cond3) {
+ vv3 <- ncp3:(ncp3p - 1L)
+ iss3vv <- iss3[vv3]
}
- # Initialize the output file with headers.
- ds <- data.frame(ID1 = numeric(0), ID2 = numeric(0),
- addRel = numeric(0),
- mitRel = numeric(0), cnuRel = numeric(0))
-
- utils::write.table(ds, file = fname, sep = ",",
- append = FALSE, row.names = FALSE)
-
- # Extract IDs from the common nuclear matrix.
- ids <- as.numeric(dimnames(biggestCnPed)[[1]])
-
- # Extract pointers from the legacy matrices.
- newColPos1 <- biggestPed@p + 1L
- iss1 <- biggestPed@i + 1L
- newColPos2 <- biggestMtPed@p + 1L
- iss2 <- biggestMtPed@i + 1L
- newColPos3 <- biggestCnPed@p + 1L
- iss3 <- biggestCnPed@i + 1L
- nc <- ncol(biggestPed)
-
- # Process each individual.
- for (j in 1L:nc) {
- ID2 <- ids[j]
- ncp1 <- newColPos1[j]
- ncp1p <- newColPos1[j + 1L]
- cond1 <- ncp1 < ncp1p
+
+ # Merge indices from all three matrices.
+ u <- sort(igraph::union(igraph::union(if (cond1) {
+ iss1vv
+ }, if (cond2) {
+ iss2vv
+ }), if (cond3) {
+ iss3vv
+ }))
+ # browser()
+ if (cond1 || cond2 || cond3) {
+ ID1 <- ids[u]
+ tds <- data.frame(
+ ID1 = ID1, ID2 = ID2,
+ addRel = 0, mitRel = 0, cnuRel = 0
+ )
if (cond1) {
- vv1 <- ncp1:(ncp1p - 1L)
- iss1vv <- iss1[vv1]
+ tds$addRel[u %in% iss1vv] <- biggestPed@x[vv1]
}
- ncp2 <- newColPos2[j]
- ncp2p <- newColPos2[j + 1L]
- cond2 <- ncp2 < ncp2p
if (cond2) {
- vv2 <- ncp2:(ncp2p - 1L)
- iss2vv <- iss2[vv2]
+ tds$mitRel[u %in% iss2vv] <- biggestMtPed@x[vv2]
}
- ncp3 <- newColPos3[j]
- ncp3p <- newColPos3[j + 1L]
- cond3 <- ncp3 < ncp3p
if (cond3) {
- vv3 <- ncp3:(ncp3p - 1L)
- iss3vv <- iss3[vv3]
- }
-
- # Merge indices from all three matrices.
- u <- sort(igraph::union(igraph::union(if (cond1) {
- iss1vv
- }, if (cond2) {
- iss2vv
- }), if (cond3) {
- iss3vv
- }))
- # browser()
- if (cond1 || cond2 || cond3) {
- ID1 <- ids[u]
- tds <- data.frame(ID1 = ID1, ID2 = ID2,
- addRel = 0, mitRel = 0, cnuRel = 0)
- if (cond1) {
- tds$addRel[u %in% iss1vv] <- biggestPed@x[vv1]
- }
- if (cond2) {
- tds$mitRel[u %in% iss2vv] <- biggestMtPed@x[vv2]
- }
- if (cond3) {
- tds$cnuRel[u %in% iss3vv] <- biggestCnPed@x[vv3]
- }
- utils::write.table(tds, file = fname, row.names = FALSE,
- col.names = FALSE, append = TRUE, sep = ",")
- }
- if (!(j %% update_rate)) {
- cat(paste0("Done with ", j, " of ", nc, "\n"))
+ tds$cnuRel[u %in% iss3vv] <- biggestCnPed@x[vv3]
}
+ utils::write.table(tds,
+ file = fname, row.names = FALSE,
+ col.names = FALSE, append = TRUE, sep = ","
+ )
+ }
+ if (!(j %% update_rate)) {
+ cat(paste0("Done with ", j, " of ", nc, "\n"))
}
- return(NULL)
}
+ return(NULL)
+}
#' @title validate_and_convert_matrix
#' @description
diff --git a/R/readGedcom.R b/R/readGedcom.R
index d6858e58..f9066c91 100644
--- a/R/readGedcom.R
+++ b/R/readGedcom.R
@@ -9,6 +9,7 @@
#' @param combine_cols A logical value indicating whether to combine columns with duplicate values.
#' @param verbose A logical value indicating whether to print messages.
#' @param skinny A logical value indicating whether to return a skinny data frame.
+#' @param update_rate numeric. The rate at which to print progress
#' @param ... Additional arguments to be passed to the function.
#' @return A data frame containing information about individuals, with the following potential columns:
#' - `id`: ID of the individual
@@ -53,6 +54,7 @@ readGedcom <- function(file_path,
remove_empty_cols = TRUE,
combine_cols = TRUE,
skinny = FALSE,
+ update_rate = 1000,
...) {
# Checks
if (!file.exists(file_path)) stop("File does not exist: ", file_path)
@@ -73,11 +75,15 @@ readGedcom <- function(file_path,
identifiers = c("id", "momID", "dadID"),
names = c(
"name", "name_given", "name_given_pieces",
- "name_surn", "name_surn_pieces", "name_marriedsurn", "name_nick", "name_npfx", "name_nsfx"
+ "name_surn", "name_surn_pieces", "name_marriedsurn",
+ "name_nick", "name_npfx", "name_nsfx"
),
sex = c("sex"),
birth = c("birth_date", "birth_lat", "birth_long", "birth_place"),
- death = c("death_caus", "death_date", "death_lat", "death_long", "death_place"),
+ death = c(
+ "death_caus", "death_date",
+ "death_lat", "death_long", "death_place"
+ ),
attributes = c(
"attribute_caste", "attribute_children", "attribute_description",
"attribute_education", "attribute_idnumber", "attribute_marriages",
@@ -91,7 +97,10 @@ readGedcom <- function(file_path,
all_var_names <- unlist(var_names, use.names = FALSE)
# Initialize all variables to NA
- vars <- stats::setNames(as.list(rep(NA_character_, length(all_var_names))), all_var_names)
+ vars <- stats::setNames(
+ as.list(rep(NA_character_, length(all_var_names))),
+ all_var_names
+ )
df_temp <- as.data.frame(matrix(nrow = 1, ncol = length(all_var_names)))
names(df_temp) <- all_var_names
@@ -107,7 +116,10 @@ readGedcom <- function(file_path,
df_temp <- rbind(df_temp, line_to_write)
# Reset all variables to NA
- vars <- stats::setNames(as.list(rep(NA_character_, length(all_var_names))), all_var_names)
+ vars <- stats::setNames(as.list(rep(
+ NA_character_,
+ length(all_var_names)
+ )), all_var_names)
vars$id <- stringr::str_extract(tmpv, "(?<=@.)\\d*(?=@)")
next
@@ -269,7 +281,7 @@ readGedcom <- function(file_path,
vars <- result$vars
if (result$matched) next
- if (verbose && i %% 1000 == 0) {
+ if (verbose && i %% update_rate == 0) {
cat("Processed", i, "lines\n")
}
}
@@ -296,32 +308,8 @@ readGedcom <- function(file_path,
df_temp <- processParents(df_temp, datasource = "gedcom")
}
-
-
if (combine_cols) {
- if (verbose) {
- print("Combining Duplicate Columns")
- }
- # need to check if any values aren't NA in name_given_pieces and name_surn_pieces
- # Combine `name_given` and `name_given_pieces`
-
- # Combine `name_given` and `name_given_pieces`
- if (!all(is.na(df_temp$name_given_pieces)) | !all(is.na(df_temp$name_given))) {
- result <- combine_columns(df_temp$name_given, df_temp$name_given_pieces)
- df_temp$name_given <- result$combined
- if (!result$retain_col2) {
- df_temp$name_given_pieces <- NULL
- }
- }
-
- # Combine `name_surn` and `name_surn_pieces`
- if (!all(is.na(df_temp$name_surn_pieces)) | !all(is.na(df_temp$name_surn))) {
- result <- combine_columns(df_temp$name_surn, df_temp$name_surn_pieces)
- df_temp$name_surn <- result$combined
- if (!result$retain_col2) {
- df_temp$name_surn_pieces <- NULL
- }
- }
+ df_temp <- collapseNames(verbose = verbose, df_temp = df_temp)
}
if (remove_empty_cols) {
@@ -565,3 +553,37 @@ process_tag <- function(tag, field_name, pattern_rows, line, vars,
}
return(list(vars = vars, matched = matched))
}
+
+#' collapse Names
+#'
+#' This function combines the `name_given` and `name_given_pieces` columns in a data frame.
+#'
+#' @inheritParams readGedcom
+#' @param df_temp A data frame containing the columns to be combined.
+
+collapseNames <- function(verbose, df_temp) {
+ if (verbose) {
+ print("Combining Duplicate Columns")
+ }
+ # need to check if any values aren't NA in name_given_pieces and name_surn_pieces
+ # Combine `name_given` and `name_given_pieces`
+
+ # Combine `name_given` and `name_given_pieces`
+ if (!all(is.na(df_temp$name_given_pieces)) | !all(is.na(df_temp$name_given))) {
+ result <- combine_columns(df_temp$name_given, df_temp$name_given_pieces)
+ df_temp$name_given <- result$combined
+ if (!result$retain_col2) {
+ df_temp$name_given_pieces <- NULL
+ }
+ }
+
+ # Combine `name_surn` and `name_surn_pieces`
+ if (!all(is.na(df_temp$name_surn_pieces)) | !all(is.na(df_temp$name_surn))) {
+ result <- combine_columns(df_temp$name_surn, df_temp$name_surn_pieces)
+ df_temp$name_surn <- result$combined
+ if (!result$retain_col2) {
+ df_temp$name_surn_pieces <- NULL
+ }
+ }
+ return(df_temp)
+}
diff --git a/data-raw/benchmark.R b/data-raw/benchmark.R
index 8c4c90b5..fe480cdf 100644
--- a/data-raw/benchmark.R
+++ b/data-raw/benchmark.R
@@ -60,4 +60,6 @@ print(benchmark_results)
# Optional: Save results to CSV for later analysis
write.csv(summary(benchmark_results),
- "benchmark_results.csv", row.names = FALSE)
+ "benchmark_results.csv",
+ row.names = FALSE
+)
diff --git a/data-raw/df_inbreeding.R b/data-raw/df_inbreeding.R
index 9af10ff0..430830ff 100644
--- a/data-raw/df_inbreeding.R
+++ b/data-raw/df_inbreeding.R
@@ -8,5 +8,5 @@ inbreeding <- raw
##
# data processing
-#write.csv(inbreeding, "data-raw/inbreeding.csv", row.names = FALSE)
+# write.csv(inbreeding, "data-raw/inbreeding.csv", row.names = FALSE)
usethis::use_data(inbreeding, overwrite = TRUE, compress = "xz")
diff --git a/data-raw/df_potter.R b/data-raw/df_potter.R
index aa039c81..d1f5faf3 100644
--- a/data-raw/df_potter.R
+++ b/data-raw/df_potter.R
@@ -45,9 +45,11 @@ potter <- data.frame(
"Molly Weasley",
"Lucy Weasley"
),
- gen = c(1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3),
+ gen = c(
+ 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3
+ ),
momID = c(
101, 101, 103, 103, NA, 3, 4, 10, NA, NA,
10, 10, 10, 10, 10, 10, NA, 105, 105, NA,
diff --git a/man/collapseNames.Rd b/man/collapseNames.Rd
new file mode 100644
index 00000000..01a38a6c
--- /dev/null
+++ b/man/collapseNames.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/readGedcom.R
+\name{collapseNames}
+\alias{collapseNames}
+\title{collapse Names}
+\usage{
+collapseNames(verbose, df_temp)
+}
+\arguments{
+\item{verbose}{A logical value indicating whether to print messages.}
+
+\item{df_temp}{A data frame containing the columns to be combined.}
+}
+\description{
+This function combines the `name_given` and `name_given_pieces` columns in a data frame.
+}
diff --git a/man/readGedcom.Rd b/man/readGedcom.Rd
index 78d45773..fdb158e1 100644
--- a/man/readGedcom.Rd
+++ b/man/readGedcom.Rd
@@ -11,6 +11,7 @@ readGedcom(
remove_empty_cols = TRUE,
combine_cols = TRUE,
skinny = FALSE,
+ update_rate = 1000,
...
)
}
@@ -27,6 +28,8 @@ readGedcom(
\item{skinny}{A logical value indicating whether to return a skinny data frame.}
+\item{update_rate}{numeric. The rate at which to print progress}
+
\item{...}{Additional arguments to be passed to the function.}
}
\value{
diff --git a/tests/testthat/test-computeRelatedness.R b/tests/testthat/test-computeRelatedness.R
index 05fb7e62..192df30b 100644
--- a/tests/testthat/test-computeRelatedness.R
+++ b/tests/testthat/test-computeRelatedness.R
@@ -28,11 +28,13 @@ test_that("calculateRelatedness function with empirical", {
# Test 7: empirical divide by zero
test_that("calculateH handles divide by zero for empirical", {
-expect_error(
- calculateRelatedness(generations = 2,
- empirical = TRUE, total_a = 0,
- total_m = 0))
-
+ expect_error(
+ calculateRelatedness(
+ generations = 2,
+ empirical = TRUE, total_a = 0,
+ total_m = 0
+ )
+ )
})
test_that("inferRelatedness performs as expected", {
result <- inferRelatedness(0, aceA = .9, aceC = 0, sharedC = 0)
@@ -105,4 +107,3 @@ test_that("calculateH stops for illegal coefficients", {
"The observed correlations should be between -1 and 1"
)
})
-
diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R
index e8dfe99e..475f7b6b 100644
--- a/tests/testthat/test-makeLinks.R
+++ b/tests/testthat/test-makeLinks.R
@@ -131,7 +131,8 @@ test_that("com2links correctly handles missing matrices", {
expect_error(
com2links(ad_ped_matrix = NULL, mit_ped_matrix = NULL, cn_ped_matrix = NULL),
- "At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided." )
+ "At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided."
+ )
expect_error(com2links(ad_ped_matrix = hazard), "The 'ad_ped_matrix' must be a matrix or dgCMatrix.")
})
diff --git a/tests/testthat/test-readPedigrees.R b/tests/testthat/test-readPedigrees.R
index 1f098697..a6d6bdd0 100644
--- a/tests/testthat/test-readPedigrees.R
+++ b/tests/testthat/test-readPedigrees.R
@@ -214,10 +214,10 @@ test_that("readWikifamilytree reads a string correctly", {
# read E:/Dropbox/Lab/Research/Projects/2024/BGMiscJoss/BGmisc_main/data-raw/Targaryen tree Dance.txt
-#test_that("readWikifamilytree reads a file correctly", {
- # Create a temporary WikiFamilyTree file for testing
- # Example usage
+# test_that("readWikifamilytree reads a file correctly", {
+# Create a temporary WikiFamilyTree file for testing
+# Example usage
# family_tree_file_path <- "data-raw/Targaryen tree Dance.txt" # system.file("extdata", "Targaryen tree Dance.txt", package = "BGmisc")
- # result <- readWikifamilytree(file_path=family_tree_file_path)
-#})
+# result <- readWikifamilytree(file_path=family_tree_file_path)
+# })
diff --git a/tests/testthat/test-summarizePedigrees.R b/tests/testthat/test-summarizePedigrees.R
index 61dae665..5bc1d6ea 100644
--- a/tests/testthat/test-summarizePedigrees.R
+++ b/tests/testthat/test-summarizePedigrees.R
@@ -53,9 +53,11 @@ test_that("summarizeMatrilines() works", {
nbiggest <- 2
df <- ped2fam(potter, famID = "newFamID", personID = "personID") %>%
ped2maternal(personID = "personID")
- df_summarized <- summarizeMatrilines(df, famID = "newFamID",
- personID = "personID",
- nbiggest = nbiggest)
+ df_summarized <- summarizeMatrilines(df,
+ famID = "newFamID",
+ personID = "personID",
+ nbiggest = nbiggest
+ )
# is the total count from the family summary the same as the raw data?
result_observed <- sum(df_summarized$maternal_summary$count)
result_expected <- nrow(potter)
@@ -76,9 +78,11 @@ test_that("summarizePatrilines() works", {
nbiggest <- 4
df <- ped2fam(potter, famID = "newFamID", personID = "personID") %>%
ped2paternal(personID = "personID")
- df_summarized <- summarizePatrilines(df, famID = "newFamID",
- personID = "personID",
- nbiggest = nbiggest)
+ df_summarized <- summarizePatrilines(df,
+ famID = "newFamID",
+ personID = "personID",
+ nbiggest = nbiggest
+ )
# is the total count from the family summary the same as the raw data?
result_observed <- sum(df_summarized$paternal_summary$count)
result_expected <- nrow(potter)
@@ -111,7 +115,7 @@ test_that("summarizePedigrees() handles missing values correctly", {
})
# Test Case 7: When all variables are skipped
-test_that("summarizePedigrees works when all numeric variables are skipped",{
+test_that("summarizePedigrees works when all numeric variables are skipped", {
df <- data.frame(
ID = 1:5,
momID = c(NA, 1, 1, NA, 4),
@@ -126,8 +130,10 @@ test_that("summarizePedigrees works when all numeric variables are skipped",{
# Test Case 8: Handling invalid column names
test_that("summarizePedigrees() throws error on invalid column names", {
- df <- data.frame(ID = 1:5, momID = c(NA, 1, 1, NA, 4),
- dadID = c(NA, 2, 2, NA, 5), famID = c(1, 1, 1, 2, 2))
+ df <- data.frame(
+ ID = 1:5, momID = c(NA, 1, 1, NA, 4),
+ dadID = c(NA, 2, 2, NA, 5), famID = c(1, 1, 1, 2, 2)
+ )
expect_error(summarizePedigrees(df, byr = "unknown_column"))
})
From e22c933ea0dcfb8053e4c434d03f9cf4f12163c8 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Tue, 8 Apr 2025 13:53:02 -0400
Subject: [PATCH 02/35] allow verbose argument to be passed to
standardizeColnames
---
NEWS.md | 1 +
R/checkIDs.R | 2 +-
R/checkParents.R | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
R/checkSex.R | 2 +-
R/cleanPedigree.R | 2 +-
R/plotPedigree.R | 2 +-
R/tweakPedigree.R | 4 ++--
7 files changed, 53 insertions(+), 7 deletions(-)
diff --git a/NEWS.md b/NEWS.md
index d8bd3dd0..c5392a9d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -3,6 +3,7 @@
* added tests for checkParents function
* added GoT analysis
* reduced complexity of com2links and summarizePedigree with the use of subfunctions
+* allow verbose argument to be passed to standardizeColnames
# BGmisc 1.3.5.1
* Setting the default for the `sparse` argument in `ped2com()` to TRUE
diff --git a/R/checkIDs.R b/R/checkIDs.R
index b9da9a6d..d528d56a 100644
--- a/R/checkIDs.R
+++ b/R/checkIDs.R
@@ -17,7 +17,7 @@
#' @export
checkIDs <- function(ped, verbose = FALSE, repair = FALSE) {
# Standardize column names in the input dataframe
- ped <- standardizeColnames(ped)
+ ped <- standardizeColnames(ped, verbose = verbose)
# Initialize a list to store validation results
validation_results <- list()
diff --git a/R/checkParents.R b/R/checkParents.R
index eab8e0ca..c37a08e9 100644
--- a/R/checkParents.R
+++ b/R/checkParents.R
@@ -26,7 +26,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
parentswithoutrow = repair) {
# Standardize column names in the input dataframe
ped_og <- ped
- ped <- standardizeColnames(ped)
+ ped <- standardizeColnames(ped, verbose = verbose)
# Initialize a list to store validation results
@@ -300,3 +300,48 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
repairParentIDs <- function(ped, verbose = FALSE) {
checkParentIDs(ped = ped, verbose = verbose, repair = TRUE)
}
+
+#' Add Phantom Parents
+#'
+#' This function adds phantom parents to a pedigree.
+#' @inheritParams checkParentIDs
+
+addPhantoms <- function(ped, verbose, pid, validation_results) {
+ # Add parents who appear in momID or dadID but are missing from ID
+ new_entries <- data.frame()
+
+ listed_parents <- unique(c(ped$momID, ped$dadID))
+ listed_parents <- listed_parents[!is.na(listed_parents)]
+
+ existing_ids <- ped$ID
+ missing_parents <- setdiff(listed_parents, existing_ids)
+
+ if (length(missing_parents) > 0) {
+ if (verbose) {
+ cat("Adding parents who were listed in momID/dadID but missing from ID:\n")
+ print(missing_parents)
+ }
+
+ for (pid in missing_parents) {
+ role <- unique(
+ c(
+ if (pid %in% ped$momID) "mom" else NULL,
+ if (pid %in% ped$dadID) "dad" else NULL
+ )
+ )
+ inferred_sex <- if ("mom" %in% role) validation_results$female_var else validation_results$male_var
+
+ new_row <- ped[1, ]
+ new_row$ID <- pid
+ new_row$dadID <- NA
+ new_row$momID <- NA
+ new_row$sex <- inferred_sex
+ new_entries <- rbind(new_entries, new_row)
+ }
+ }
+ ped <- merge(ped, new_entries, all = TRUE)
+ if (verbose) {
+ cat("Added phantom parents for:", paste(new_entries$ID, collapse = ", "), "\n")
+ }
+ return(ped)
+}
diff --git a/R/checkSex.R b/R/checkSex.R
index d220dec6..8fbabf6a 100644
--- a/R/checkSex.R
+++ b/R/checkSex.R
@@ -37,7 +37,7 @@
#'
checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, repair = FALSE) {
# Standardize column names in the input dataframe
- ped <- standardizeColnames(ped)
+ ped <- standardizeColnames(ped, verbose = verbose)
# TO DO: bypass the rest of the function if recode_only is TRUE
diff --git a/R/cleanPedigree.R b/R/cleanPedigree.R
index ffd8054e..b03ca270 100644
--- a/R/cleanPedigree.R
+++ b/R/cleanPedigree.R
@@ -60,7 +60,7 @@ standardizeColnames <- function(df, verbose = FALSE) {
# check_sex = TRUE,
# check_parents = TRUE,
# verbose = FALSE) {
-# corrected_ped <- ped <- standardizeColnames(ped)
+# corrected_ped <- ped <- standardizeColnames(ped, verbose = verbose)
# if (verbose) {
# print("Repairing pedigree...")
# }
diff --git a/R/plotPedigree.R b/R/plotPedigree.R
index 051610e6..5b2229cd 100644
--- a/R/plotPedigree.R
+++ b/R/plotPedigree.R
@@ -25,7 +25,7 @@ plotPedigree <- function(ped,
pconnect = .5,
...) {
# Standardize column names in the input dataframe
- ped <- standardizeColnames(ped)
+ ped <- standardizeColnames(ped, verbose = verbose)
# Define required columns
simulated_vars <- c("fam", "ID", "dadID", "momID", "sex")
diff --git a/R/tweakPedigree.R b/R/tweakPedigree.R
index 2367fe21..edefaf99 100644
--- a/R/tweakPedigree.R
+++ b/R/tweakPedigree.R
@@ -18,7 +18,7 @@ makeTwins <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_t
"fam", "ID", "gen",
"dadID", "momID", "spID", "sex"
), collapse = "")) {
- ped <- standardizeColnames(ped)
+ ped <- standardizeColnames(ped, verbose = verbose)
if (verbose) {
cat("The input pedigree is not in the same format as the output of simulatePedigree\n")
}
@@ -131,7 +131,7 @@ makeInbreeding <- function(ped,
c("fam", "ID", "gen", "dadID", "momID", "spID", "sex"),
collapse = ""
)) {
- ped <- standardizeColnames(ped)
+ ped <- standardizeColnames(ped, verbose = verbose)
if (verbose) {
cat("The input pedigree is not in the same format as the output of simulatePedigree\n")
}
From a8c2b6c8781bcf4a37858c12660247dd221a1233 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Tue, 8 Apr 2025 14:54:40 -0400
Subject: [PATCH 03/35] refactor checkids
---
R/checkIDs.R | 151 ++++++++++++++++++++----------------------
R/summarizePedigree.R | 62 ++++++++++-------
2 files changed, 109 insertions(+), 104 deletions(-)
diff --git a/R/checkIDs.R b/R/checkIDs.R
index d528d56a..c92653ed 100644
--- a/R/checkIDs.R
+++ b/R/checkIDs.R
@@ -28,91 +28,15 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) {
}
# Identify non-unique IDs
- duplicated_ids <- ped$ID[duplicated(ped$ID) | duplicated(ped$ID, fromLast = TRUE)]
-
+ id_check <- checkIDuniqueness(ped=ped, verbose=verbose)
- # Update the validation_results list
- if (length(duplicated_ids) > 0) {
- if (verbose) {
- cat(paste0(length(duplicated_ids), " non-unique IDs found.\n"))
- }
- validation_results$all_unique_ids <- FALSE
- validation_results$total_non_unique_ids <- length(duplicated_ids)
- validation_results$non_unique_ids <- unique(duplicated_ids)
- } else {
- if (verbose) {
- cat("All IDs are unique.\n")
- }
- validation_results$all_unique_ids <- TRUE
- validation_results$total_non_unique_ids <- 0
- validation_results$non_unique_ids <- NULL
- }
if (verbose) {
cat("Step 2: Checking for within row duplicats...\n")
- cat("Is own father?\n")
- }
- is_own_father <- ped$ID[ped$ID == ped$dadID & !is.na(ped$dadID)]
- if (verbose) {
- cat("Is own mother?\n")
- }
- is_own_mother <- ped$ID[ped$ID == ped$momID & !is.na(ped$momID)]
- if (verbose) {
- cat("Is mother father?\n")
}
- duplicated_parents <- ped$ID[ped$dadID == ped$momID & !is.na(ped$dadID) & !is.na(ped$momID)]
+ row_check <- checkWithinRowDuplicates(ped=ped, verbose = verbose)
- # get the total number of within row duplicates
- validation_results$total_own_father <- length(is_own_father)
- validation_results$total_own_mother <- length(is_own_mother)
- validation_results$total_duplicated_parents <- length(duplicated_parents)
- validation_results$total_within_row_duplicates <- sum(length(is_own_father), length(is_own_mother), length(duplicated_parents))
- # Update the validation_results list
+ validation_results <- c(id_check, row_check)
- if (validation_results$total_within_row_duplicates > 0) {
- if (verbose) {
- cat(paste0(
- validation_results$total_within_row_duplicates,
- " within row duplicates found.\n"
- ))
- }
- validation_results$within_row_duplicates <- TRUE
- if (validation_results$total_own_father > 0) {
- validation_results$is_own_father_ids <- unique(is_own_father)
- if (verbose) {
- cat(paste0(
- validation_results$total_own_father,
- " individuals are their own fathers.\n"
- ))
- }
- }
- if (validation_results$total_own_mother > 0) {
- validation_results$is_own_mother_ids <- unique(is_own_mother)
- if (verbose) {
- cat(paste0(
- validation_results$total_own_mother,
- " individuals are their own mothers.\n"
- ))
- }
- }
- if (validation_results$total_duplicated_parents > 0) {
- validation_results$duplicated_parents_ids <- unique(duplicated_parents)
- if (verbose) {
- cat(paste0(
- validation_results$total_duplicated_parents,
- " individuals have the same mother and father.\n"
- ))
- }
- }
- } else {
- if (verbose) {
- cat("No within row duplicates found.\n")
- }
- validation_results$within_row_duplicates <- FALSE
- validation_results$total_within_row_duplicates <- 0
- validation_results$is_own_father_ids <- NULL
- validation_results$is_own_mother_ids <- NULL
- validation_results$duplicated_parents_ids <- NULL
- }
if (verbose) {
cat("Validation Results:\n")
print(validation_results)
@@ -169,3 +93,72 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) {
repairIDs <- function(ped, verbose = FALSE) {
checkIDs(ped = ped, verbose = verbose, repair = TRUE)
}
+
+#' Check for duplicated individual IDs
+#'
+#' This function checks for duplicated individual IDs in a pedigree.
+#'
+#' @param ped A pedigree object
+#' @param verbose A logical indicating whether to print progress messages
+#' @return A list containing the results of the check
+#'
+checkIDuniqueness <- function(ped, verbose = FALSE) {
+ # Identify non-unique IDs
+
+ duplicated_ids <- ped$ID[duplicated(ped$ID) | duplicated(ped$ID, fromLast = TRUE)]
+
+ if (verbose) {
+ if (length(duplicated_ids) > 0) {
+ cat(length(duplicated_ids), " non-unique IDs found.\n")
+ } else {
+ cat("All IDs are unique.\n")
+ }
+ }
+
+ # Update the validation_results list
+ list(
+ all_unique_ids = length(duplicated_ids) == 0,
+ total_non_unique_ids = length(duplicated_ids),
+ non_unique_ids = if (length(duplicated_ids) > 0) unique(duplicated_ids) else NULL
+ )
+}
+
+
+#' Check for within-row duplicates (self-parents, same mom/dad)
+checkWithinRowDuplicates <- function(ped, verbose = FALSE) {
+ # is the individual their own father or mother?
+ is_own_father <- ped$ID[ped$ID == ped$dadID & !is.na(ped$dadID)]
+ is_own_mother <- ped$ID[ped$ID == ped$momID & !is.na(ped$momID)]
+
+ # is mother and father the same?
+ duplicated_parents <- ped$ID[
+ ped$dadID == ped$momID &
+ !is.na(ped$dadID) & !is.na(ped$momID)
+ ]
+
+ # get the total number of within row duplicates
+ total <- length(is_own_father) + length(is_own_mother) + length(duplicated_parents)
+
+ if (verbose) {
+ if (total > 0) {
+ cat(total, " within row duplicates found.\n")
+ if (length(is_own_father) > 0) cat(length(is_own_father), " individuals are their own fathers.\n")
+ if (length(is_own_mother) > 0) cat(length(is_own_mother), " individuals are their own mothers.\n")
+ if (length(duplicated_parents) > 0) cat(length(duplicated_parents), " individuals have the same mother and father.\n")
+ } else {
+ cat("No within row duplicates found.\n")
+ }
+ }
+ # Update the validation_results list
+ list(
+ total_own_father = length(is_own_father),
+ total_own_mother = length(is_own_mother),
+ total_duplicated_parents = length(duplicated_parents),
+ total_within_row_duplicates = total,
+ within_row_duplicates = total > 0,
+ is_own_father_ids = if (length(is_own_father) > 0) unique(is_own_father) else NULL,
+ is_own_mother_ids = if (length(is_own_mother) > 0) unique(is_own_mother) else NULL,
+ duplicated_parents_ids = if (length(duplicated_parents) > 0) unique(duplicated_parents) else NULL
+ )
+}
+
diff --git a/R/summarizePedigree.R b/R/summarizePedigree.R
index 6c6564f3..ea8f81e8 100644
--- a/R/summarizePedigree.R
+++ b/R/summarizePedigree.R
@@ -39,7 +39,7 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
matID = "matID", patID = "patID",
type = c("fathers", "mothers", "families"),
byr = NULL, include_founder = FALSE, founder_sort_var = NULL,
- nbiggest = 5, noldest = 5, skip_var = NULL,
+ nbiggest = 5, noldest = nbiggest, skip_var = NULL,
five_num_summary = FALSE, network_checks = FALSE,
verbose = FALSE) {
# Fast Fails
@@ -123,15 +123,11 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
)
# Find the originating member for each line
if (include_founder) {
- if (verbose) message("Finding originating members for families...")
- originating_member_family <- findFounder(ped_dt,
+ family_summary_dt <- summarizeFounder(
+ verbose = verbose, ped_dt = ped_dt,
group_var = famID,
- sort_var = founder_sort_var
- )
- # Merge summary statistics with originating members for additional information
- family_summary_dt <- merge(family_summary_dt,
- originating_member_family,
- by = famID, suffixes = c("", "_founder")
+ sort_var = founder_sort_var,
+ foo_summary_dt = family_summary_dt
)
}
output$family_summary <- family_summary_dt
@@ -146,14 +142,11 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
five_num_summary = five_num_summary
)
if (include_founder) {
- if (verbose) message("Finding originating members for matrilineal lines...")
- originating_member_maternal <- findFounder(ped_dt,
+ maternal_summary_dt <- summarizeFounder(
+ verbose = verbose, ped_dt = ped_dt,
group_var = matID,
- sort_var = founder_sort_var
- )
- maternal_summary_dt <- merge(maternal_summary_dt,
- originating_member_maternal,
- by = matID, suffixes = c("", "_founder")
+ sort_var = founder_sort_var,
+ foo_summary_dt = maternal_summary_dt
)
}
output$maternal_summary <- maternal_summary_dt
@@ -167,14 +160,11 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
five_num_summary = five_num_summary
)
if (include_founder) {
- if (verbose) message("Finding originating members for patrilineal lines...")
- originating_member_paternal <- findFounder(ped_dt,
+ paternal_summary_dt <- summarizeFounder(
+ verbose = verbose, ped_dt = ped_dt,
group_var = patID,
- sort_var = founder_sort_var
- )
- paternal_summary_dt <- merge(paternal_summary_dt,
- originating_member_paternal,
- by = patID, suffixes = c("", "_founder")
+ sort_var = founder_sort_var,
+ foo_summary_dt = paternal_summary_dt
)
}
@@ -298,8 +288,8 @@ calculateSummaryDT <- function(data, group_var, skip_var,
return(summary_stats)
}
-# Function to find the originating member for each line
-
+#' Function to find the originating member for each line
+#'
#' This function finds the originating member for each line in a pedigree. It is supposed to be used internally by the \code{summarize_pedigree} function.
#' @inheritParams summarizePedigrees
#' @param sort_var A character string specifying the column name to sort by.
@@ -312,7 +302,29 @@ findFounder <- function(data, group_var, sort_var) {
data[order(get(sort_var)), .SD[1], by = group_var]
}
+#' Function to summarize the originating members for each line
+#'
+#' This function summarizes the originating members for each line in a pedigree. It is supposed to be used internally by the \code{summarize_pedigree} function.
+#'
+#' @inheritParams summarizePedigrees
+#' @inheritParams findFounder
+#'
+#' @keywords internal
+summarizeFounder <- function(ped_dt, group_var, sort_var, foo_summary_dt, verbose) {
+ if (verbose) message(paste0("Finding originating members for ", "group_var"))
+ originating_member_foo <- findFounder(
+ data = ped_dt,
+ group_var = group_var,
+ sort_var = sort_var
+ )
+ # Merge summary statistics with originating members for additional information
+ foo_summary_dt <- merge(foo_summary_dt,
+ originating_member_foo,
+ by = group_var, suffixes = c("", "_founder")
+ )
+ return(foo_summary_dt)
+}
#' Summarize the maternal lines in a pedigree
#' @inheritParams summarizePedigrees
#' @seealso [summarizePedigrees ()]
From 3326f757fbe4fa9e5731fa74972096a1e842842c Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Tue, 8 Apr 2025 14:57:25 -0400
Subject: [PATCH 04/35] docs
---
NEWS.md | 3 ++-
man/addPhantoms.Rd | 16 ++++++++++++++++
man/checkIDuniqueness.Rd | 19 +++++++++++++++++++
man/checkWithinRowDuplicates.Rd | 11 +++++++++++
man/findFounder.Rd | 2 +-
man/summarizeFounder.Rd | 17 +++++++++++++++++
man/summarizePedigrees.Rd | 2 +-
7 files changed, 67 insertions(+), 3 deletions(-)
create mode 100644 man/addPhantoms.Rd
create mode 100644 man/checkIDuniqueness.Rd
create mode 100644 man/checkWithinRowDuplicates.Rd
create mode 100644 man/summarizeFounder.Rd
diff --git a/NEWS.md b/NEWS.md
index c5392a9d..31ad40c0 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,9 +2,10 @@
* revived checkParents function to check for handling phantom parents and missing parents
* added tests for checkParents function
* added GoT analysis
-* reduced complexity of com2links and summarizePedigree with the use of subfunctions
+* reduced complexity of com2links, summarizePedigree, and checkIDs with the use of subfunctions
* allow verbose argument to be passed to standardizeColnames
+
# BGmisc 1.3.5.1
* Setting the default for the `sparse` argument in `ped2com()` to TRUE
diff --git a/man/addPhantoms.Rd b/man/addPhantoms.Rd
new file mode 100644
index 00000000..2981b01e
--- /dev/null
+++ b/man/addPhantoms.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/checkParents.R
+\name{addPhantoms}
+\alias{addPhantoms}
+\title{Add Phantom Parents}
+\usage{
+addPhantoms(ped, verbose, pid, validation_results)
+}
+\arguments{
+\item{ped}{A dataframe representing the pedigree data with columns 'ID', 'dadID', and 'momID'.}
+
+\item{verbose}{A logical flag indicating whether to print progress and validation messages to the console.}
+}
+\description{
+This function adds phantom parents to a pedigree.
+}
diff --git a/man/checkIDuniqueness.Rd b/man/checkIDuniqueness.Rd
new file mode 100644
index 00000000..88b4dba3
--- /dev/null
+++ b/man/checkIDuniqueness.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/checkIDs.R
+\name{checkIDuniqueness}
+\alias{checkIDuniqueness}
+\title{Check for duplicated individual IDs}
+\usage{
+checkIDuniqueness(ped, verbose = FALSE)
+}
+\arguments{
+\item{ped}{A pedigree object}
+
+\item{verbose}{A logical indicating whether to print progress messages}
+}
+\value{
+A list containing the results of the check
+}
+\description{
+This function checks for duplicated individual IDs in a pedigree.
+}
diff --git a/man/checkWithinRowDuplicates.Rd b/man/checkWithinRowDuplicates.Rd
new file mode 100644
index 00000000..e0ddb318
--- /dev/null
+++ b/man/checkWithinRowDuplicates.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/checkIDs.R
+\name{checkWithinRowDuplicates}
+\alias{checkWithinRowDuplicates}
+\title{Check for within-row duplicates (self-parents, same mom/dad)}
+\usage{
+checkWithinRowDuplicates(ped, verbose = FALSE)
+}
+\description{
+Check for within-row duplicates (self-parents, same mom/dad)
+}
diff --git a/man/findFounder.Rd b/man/findFounder.Rd
index a4cf70bb..19bfcb1f 100644
--- a/man/findFounder.Rd
+++ b/man/findFounder.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/summarizePedigree.R
\name{findFounder}
\alias{findFounder}
-\title{This function finds the originating member for each line in a pedigree. It is supposed to be used internally by the \code{summarize_pedigree} function.}
+\title{Function to find the originating member for each line}
\usage{
findFounder(data, group_var, sort_var)
}
diff --git a/man/summarizeFounder.Rd b/man/summarizeFounder.Rd
new file mode 100644
index 00000000..703834e3
--- /dev/null
+++ b/man/summarizeFounder.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summarizePedigree.R
+\name{summarizeFounder}
+\alias{summarizeFounder}
+\title{Function to summarize the originating members for each line}
+\usage{
+summarizeFounder(ped_dt, group_var, sort_var, foo_summary_dt, verbose)
+}
+\arguments{
+\item{sort_var}{A character string specifying the column name to sort by.}
+
+\item{verbose}{Logical, if TRUE, print progress messages.}
+}
+\description{
+This function summarizes the originating members for each line in a pedigree. It is supposed to be used internally by the \code{summarize_pedigree} function.
+}
+\keyword{internal}
diff --git a/man/summarizePedigrees.Rd b/man/summarizePedigrees.Rd
index b298d326..3ed2f0a5 100644
--- a/man/summarizePedigrees.Rd
+++ b/man/summarizePedigrees.Rd
@@ -17,7 +17,7 @@ summarizePedigrees(
include_founder = FALSE,
founder_sort_var = NULL,
nbiggest = 5,
- noldest = 5,
+ noldest = nbiggest,
skip_var = NULL,
five_num_summary = FALSE,
network_checks = FALSE,
From 6019916322c00d8227bfbdffada562fe09cadea7 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Wed, 9 Apr 2025 14:37:32 -0400
Subject: [PATCH 05/35] documentation
---
NEWS.md | 2 +-
R/checkIDs.R | 13 +++++--
R/checkParents.R | 3 +-
R/computeRelatedness.R | 16 ++++++++
R/helpGeneric.R | 68 +--------------------------------
R/simulatePedigree.R | 8 ++++
man/SimPed.Rd | 33 ----------------
man/addPhantoms.Rd | 4 +-
man/calculateRelatedness.Rd | 3 ++
man/checkIDuniqueness.Rd | 4 +-
man/checkWithinRowDuplicates.Rd | 10 ++++-
man/inferRelatedness.Rd | 5 +++
man/related_coef.Rd | 33 ----------------
man/relatedness.Rd | 33 ----------------
man/repairIDs.Rd | 4 +-
man/simulatePedigree.Rd | 5 +++
16 files changed, 67 insertions(+), 177 deletions(-)
delete mode 100644 man/SimPed.Rd
delete mode 100644 man/related_coef.Rd
delete mode 100644 man/relatedness.Rd
diff --git a/NEWS.md b/NEWS.md
index 31ad40c0..94602bd7 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,7 +4,7 @@
* added GoT analysis
* reduced complexity of com2links, summarizePedigree, and checkIDs with the use of subfunctions
* allow verbose argument to be passed to standardizeColnames
-
+* list SimPed and related_coef as aliases for functions
# BGmisc 1.3.5.1
* Setting the default for the `sparse` argument in `ped2com()` to TRUE
diff --git a/R/checkIDs.R b/R/checkIDs.R
index c92653ed..4c1100fd 100644
--- a/R/checkIDs.R
+++ b/R/checkIDs.R
@@ -87,8 +87,7 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) {
#' Repair Missing IDs
#'
#' This function repairs missing IDs in a pedigree.
-#' @param ped A pedigree object
-#' @param verbose A logical indicating whether to print progress messages
+#' @inheritParams checkIDs
#' @return A corrected pedigree
repairIDs <- function(ped, verbose = FALSE) {
checkIDs(ped = ped, verbose = verbose, repair = TRUE)
@@ -98,8 +97,7 @@ repairIDs <- function(ped, verbose = FALSE) {
#'
#' This function checks for duplicated individual IDs in a pedigree.
#'
-#' @param ped A pedigree object
-#' @param verbose A logical indicating whether to print progress messages
+#' @inheritParams checkIDs
#' @return A list containing the results of the check
#'
checkIDuniqueness <- function(ped, verbose = FALSE) {
@@ -124,7 +122,14 @@ checkIDuniqueness <- function(ped, verbose = FALSE) {
}
+
#' Check for within-row duplicates (self-parents, same mom/dad)
+#'
+#' This function checks for within-row duplicates in a pedigree.
+#'
+#' @inheritParams checkIDs
+#' @return A list containing the results of the check
+#'
checkWithinRowDuplicates <- function(ped, verbose = FALSE) {
# is the individual their own father or mother?
is_own_father <- ped$ID[ped$ID == ped$dadID & !is.na(ped$dadID)]
diff --git a/R/checkParents.R b/R/checkParents.R
index c37a08e9..3c680e77 100644
--- a/R/checkParents.R
+++ b/R/checkParents.R
@@ -305,8 +305,9 @@ repairParentIDs <- function(ped, verbose = FALSE) {
#'
#' This function adds phantom parents to a pedigree.
#' @inheritParams checkParentIDs
+#' @param validation_results validation results
-addPhantoms <- function(ped, verbose, pid, validation_results) {
+addPhantoms <- function(ped, verbose, validation_results) {
# Add parents who appear in momID or dadID but are missing from ID
new_entries <- data.frame()
diff --git a/R/computeRelatedness.R b/R/computeRelatedness.R
index e3f53b8b..3fa91ffd 100644
--- a/R/computeRelatedness.R
+++ b/R/computeRelatedness.R
@@ -64,6 +64,14 @@ calculateRelatedness <- function(
return(coef)
}
+
+#' @rdname calculateRelatedness
+#' @export
+related_coef <- function(...) {
+ warning("The 'related_coef' function is deprecated. Please use 'calculateRelatedness' instead.")
+ calculateRelatedness(...)
+}
+
#' Infer Relatedness Coefficient
#'
#' @description
@@ -79,6 +87,7 @@ calculateRelatedness <- function(
#' @param aceA Numeric. Proportion of variance attributable to additive genetic variance. Must be between 0 and 1. Default is 0.9.
#' @param aceC Numeric. Proportion of variance attributable to shared environmental variance. Must be between 0 and 1. Default is 0.
#' @param sharedC Numeric. Proportion of shared environment shared between the two individuals. Must be between 0 (no shared environment) and 1 (completely shared environment). Default is 0.
+#' @param ... Further named arguments that may be passed to another function.
#'
#' @return
#' Numeric. The calculated relatedness coefficient (`est_r`).
@@ -97,6 +106,13 @@ inferRelatedness <- function(obsR, aceA = .9, aceC = 0, sharedC = 0) {
return(calc_r)
}
+#' @rdname inferRelatedness
+#' @export
+relatedness <- function(...) {
+ warning("The 'relatedness' function is deprecated. Please use 'inferRelatedness' instead.")
+ inferRelatedness(...)
+}
+
#' Falconer's Formula
#'
#' @description
diff --git a/R/helpGeneric.R b/R/helpGeneric.R
index e8ef381c..8b376bfb 100644
--- a/R/helpGeneric.R
+++ b/R/helpGeneric.R
@@ -99,71 +99,7 @@ resample <- function(x, ...) {
}
-#' SimPed (Deprecated)
-#'
-#' This function is a wrapper around the new `simulatePedigree` function.
-#' `SimPed` has been deprecated, and it's advised to use `simulatePedigree` directly.
-#'
-#' @param ... Arguments to be passed to `simulatePedigree`.
-#' @return The same result as calling `simulatePedigree`.
-#' @seealso \code{\link{simulatePedigree}} for the updated function.
-#' @description When calling this function, a warning will be issued about its deprecation.
-#' @keywords deprecated
-#' @examples
-#' \dontrun{
-#' # This is an example of the deprecated function:
-#' SimPed(...)
-#' # It is recommended to use:
-#' simulatePedigree(...)
-#' }
-#' @export
-SimPed <- function(...) { # nolint: object_name_linter.
- warning("The 'SimPed' function is deprecated. Please use 'simulatePedigree' instead.")
- simulatePedigree(...)
-}
-#' related_coef (Deprecated)
-#'
-#' This function is a wrapper around the new `calculateRelatedness` function.
-#' `related_coef` has been deprecated, and it's advised to use `calculateRelatedness` directly.
-#'
-#' @param ... Arguments to be passed to `calculateRelatedness`.
-#' @return The same result as calling `calculateRelatedness`.
-#' @seealso \code{\link{calculateRelatedness}} for the updated function.
-#' @description When calling this function, a warning will be issued about its deprecation.
-#' @keywords deprecated
-#' @examples
-#' \dontrun{
-#' # This is an example of the deprecated function:
-#' related_coef(...)
-#' # It is recommended to use:
-#' calculateRelatedness(...)
-#' }
-#' @export
-related_coef <- function(...) {
- warning("The 'related_coef' function is deprecated. Please use 'calculateRelatedness' instead.")
- calculateRelatedness(...)
-}
-#' relatedness (Deprecated)
-#'
-#' This function is a wrapper around the new `inferRelatedness` function.
-#' `relatedness` has been deprecated, and it's advised to use `inferRelatedness` directly.
-#'
-#' @param ... Arguments to be passed to `inferRelatedness`.
-#' @return The same result as calling `inferRelatedness`.
-#' @seealso \code{\link{inferRelatedness}} for the updated function.
-#' @description When calling this function, a warning will be issued about its deprecation.
-#' @keywords deprecated
-#' @examples
-#' \dontrun{
-#' # This is an example of the deprecated function:
-#' relatedness(...)
-#' # It is recommended to use:
-#' inferRelatedness(...)
-#' }
-#' @export
-relatedness <- function(...) {
- warning("The 'relatedness' function is deprecated. Please use 'inferRelatedness' instead.")
- inferRelatedness(...)
-}
+
+
diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R
index d50b7732..29407af8 100644
--- a/R/simulatePedigree.R
+++ b/R/simulatePedigree.R
@@ -343,6 +343,7 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR,
#' @param balancedSex Not fully developed yet. Always \code{TRUE} in the current version.
#' @param balancedMar Not fully developed yet. Always \code{TRUE} in the current version.
#' @param verbose logical If TRUE, print progress through stages of algorithm
+#' @param ... Additional arguments to be passed to other functions.
#' @return A \code{data.frame} with each row representing a simulated individual. The columns are as follows:
#' \itemize{
@@ -408,3 +409,10 @@ simulatePedigree <- function(kpc = 3,
# print(df_Fam)
return(df_Fam)
}
+
+#' @rdname simulatePedigree
+#' @export
+SimPed <- function(...) { # nolint: object_name_linter.
+ warning("The 'SimPed' function is deprecated. Please use 'simulatePedigree' instead.")
+ simulatePedigree(...)
+}
diff --git a/man/SimPed.Rd b/man/SimPed.Rd
deleted file mode 100644
index 79dd1f96..00000000
--- a/man/SimPed.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helpGeneric.R
-\name{SimPed}
-\alias{SimPed}
-\title{SimPed (Deprecated)}
-\usage{
-SimPed(...)
-}
-\arguments{
-\item{...}{Arguments to be passed to `simulatePedigree`.}
-}
-\value{
-The same result as calling `simulatePedigree`.
-}
-\description{
-When calling this function, a warning will be issued about its deprecation.
-}
-\details{
-This function is a wrapper around the new `simulatePedigree` function.
-`SimPed` has been deprecated, and it's advised to use `simulatePedigree` directly.
-}
-\examples{
-\dontrun{
-# This is an example of the deprecated function:
-SimPed(...)
-# It is recommended to use:
-simulatePedigree(...)
-}
-}
-\seealso{
-\code{\link{simulatePedigree}} for the updated function.
-}
-\keyword{deprecated}
diff --git a/man/addPhantoms.Rd b/man/addPhantoms.Rd
index 2981b01e..bafe5b75 100644
--- a/man/addPhantoms.Rd
+++ b/man/addPhantoms.Rd
@@ -4,12 +4,14 @@
\alias{addPhantoms}
\title{Add Phantom Parents}
\usage{
-addPhantoms(ped, verbose, pid, validation_results)
+addPhantoms(ped, verbose, validation_results)
}
\arguments{
\item{ped}{A dataframe representing the pedigree data with columns 'ID', 'dadID', and 'momID'.}
\item{verbose}{A logical flag indicating whether to print progress and validation messages to the console.}
+
+\item{validation_results}{validation results}
}
\description{
This function adds phantom parents to a pedigree.
diff --git a/man/calculateRelatedness.Rd b/man/calculateRelatedness.Rd
index b9a8f067..6d932e63 100644
--- a/man/calculateRelatedness.Rd
+++ b/man/calculateRelatedness.Rd
@@ -2,6 +2,7 @@
% Please edit documentation in R/computeRelatedness.R
\name{calculateRelatedness}
\alias{calculateRelatedness}
+\alias{related_coef}
\title{Calculate Relatedness Coefficient}
\usage{
calculateRelatedness(
@@ -18,6 +19,8 @@ calculateRelatedness(
denom_m = FALSE,
...
)
+
+related_coef(...)
}
\arguments{
\item{generations}{Number of generations back of common ancestors the pair share.}
diff --git a/man/checkIDuniqueness.Rd b/man/checkIDuniqueness.Rd
index 88b4dba3..6c1aebf6 100644
--- a/man/checkIDuniqueness.Rd
+++ b/man/checkIDuniqueness.Rd
@@ -7,9 +7,9 @@
checkIDuniqueness(ped, verbose = FALSE)
}
\arguments{
-\item{ped}{A pedigree object}
+\item{ped}{A dataframe representing the pedigree data with columns `ID`, `dadID`, and `momID`.}
-\item{verbose}{A logical indicating whether to print progress messages}
+\item{verbose}{A logical flag indicating whether to print progress and validation messages to the console.}
}
\value{
A list containing the results of the check
diff --git a/man/checkWithinRowDuplicates.Rd b/man/checkWithinRowDuplicates.Rd
index e0ddb318..4797306c 100644
--- a/man/checkWithinRowDuplicates.Rd
+++ b/man/checkWithinRowDuplicates.Rd
@@ -6,6 +6,14 @@
\usage{
checkWithinRowDuplicates(ped, verbose = FALSE)
}
+\arguments{
+\item{ped}{A dataframe representing the pedigree data with columns `ID`, `dadID`, and `momID`.}
+
+\item{verbose}{A logical flag indicating whether to print progress and validation messages to the console.}
+}
+\value{
+A list containing the results of the check
+}
\description{
-Check for within-row duplicates (self-parents, same mom/dad)
+This function checks for within-row duplicates in a pedigree.
}
diff --git a/man/inferRelatedness.Rd b/man/inferRelatedness.Rd
index dba7a0ed..1dd8b17c 100644
--- a/man/inferRelatedness.Rd
+++ b/man/inferRelatedness.Rd
@@ -2,9 +2,12 @@
% Please edit documentation in R/computeRelatedness.R
\name{inferRelatedness}
\alias{inferRelatedness}
+\alias{relatedness}
\title{Infer Relatedness Coefficient}
\usage{
inferRelatedness(obsR, aceA = 0.9, aceC = 0, sharedC = 0)
+
+relatedness(...)
}
\arguments{
\item{obsR}{Numeric. Observed correlation between the two groups. Must be between -1 and 1.}
@@ -14,6 +17,8 @@ inferRelatedness(obsR, aceA = 0.9, aceC = 0, sharedC = 0)
\item{aceC}{Numeric. Proportion of variance attributable to shared environmental variance. Must be between 0 and 1. Default is 0.}
\item{sharedC}{Numeric. Proportion of shared environment shared between the two individuals. Must be between 0 (no shared environment) and 1 (completely shared environment). Default is 0.}
+
+\item{...}{Further named arguments that may be passed to another function.}
}
\value{
Numeric. The calculated relatedness coefficient (`est_r`).
diff --git a/man/related_coef.Rd b/man/related_coef.Rd
deleted file mode 100644
index 11112982..00000000
--- a/man/related_coef.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helpGeneric.R
-\name{related_coef}
-\alias{related_coef}
-\title{related_coef (Deprecated)}
-\usage{
-related_coef(...)
-}
-\arguments{
-\item{...}{Arguments to be passed to `calculateRelatedness`.}
-}
-\value{
-The same result as calling `calculateRelatedness`.
-}
-\description{
-When calling this function, a warning will be issued about its deprecation.
-}
-\details{
-This function is a wrapper around the new `calculateRelatedness` function.
-`related_coef` has been deprecated, and it's advised to use `calculateRelatedness` directly.
-}
-\examples{
-\dontrun{
-# This is an example of the deprecated function:
-related_coef(...)
-# It is recommended to use:
-calculateRelatedness(...)
-}
-}
-\seealso{
-\code{\link{calculateRelatedness}} for the updated function.
-}
-\keyword{deprecated}
diff --git a/man/relatedness.Rd b/man/relatedness.Rd
deleted file mode 100644
index f84740df..00000000
--- a/man/relatedness.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helpGeneric.R
-\name{relatedness}
-\alias{relatedness}
-\title{relatedness (Deprecated)}
-\usage{
-relatedness(...)
-}
-\arguments{
-\item{...}{Arguments to be passed to `inferRelatedness`.}
-}
-\value{
-The same result as calling `inferRelatedness`.
-}
-\description{
-When calling this function, a warning will be issued about its deprecation.
-}
-\details{
-This function is a wrapper around the new `inferRelatedness` function.
-`relatedness` has been deprecated, and it's advised to use `inferRelatedness` directly.
-}
-\examples{
-\dontrun{
-# This is an example of the deprecated function:
-relatedness(...)
-# It is recommended to use:
-inferRelatedness(...)
-}
-}
-\seealso{
-\code{\link{inferRelatedness}} for the updated function.
-}
-\keyword{deprecated}
diff --git a/man/repairIDs.Rd b/man/repairIDs.Rd
index 2ce49f7a..cc37d78f 100644
--- a/man/repairIDs.Rd
+++ b/man/repairIDs.Rd
@@ -7,9 +7,9 @@
repairIDs(ped, verbose = FALSE)
}
\arguments{
-\item{ped}{A pedigree object}
+\item{ped}{A dataframe representing the pedigree data with columns `ID`, `dadID`, and `momID`.}
-\item{verbose}{A logical indicating whether to print progress messages}
+\item{verbose}{A logical flag indicating whether to print progress and validation messages to the console.}
}
\value{
A corrected pedigree
diff --git a/man/simulatePedigree.Rd b/man/simulatePedigree.Rd
index a2e07b23..b7c28650 100644
--- a/man/simulatePedigree.Rd
+++ b/man/simulatePedigree.Rd
@@ -2,6 +2,7 @@
% Please edit documentation in R/simulatePedigree.R
\name{simulatePedigree}
\alias{simulatePedigree}
+\alias{SimPed}
\title{Simulate Pedigrees
This function simulates "balanced" pedigrees based on a group of parameters:
1) k - Kids per couple;
@@ -19,6 +20,8 @@ simulatePedigree(
balancedMar = TRUE,
verbose = FALSE
)
+
+SimPed(...)
}
\arguments{
\item{kpc}{Number of kids per couple. An integer >= 2 that determines how many kids each fertilized mated couple will have in the pedigree. Default value is 3. Returns an error when kpc equals 1.}
@@ -36,6 +39,8 @@ simulatePedigree(
\item{balancedMar}{Not fully developed yet. Always \code{TRUE} in the current version.}
\item{verbose}{logical If TRUE, print progress through stages of algorithm}
+
+\item{...}{Additional arguments to be passed to other functions.}
}
\value{
A \code{data.frame} with each row representing a simulated individual. The columns are as follows:
From 7f249101de4e10259cf8bd28abbc1ede8e2def2c Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Wed, 9 Apr 2025 23:04:00 -0400
Subject: [PATCH 06/35] Updatedocs
---
.gitignore | 1 +
R/checkIDs.R | 5 +-
R/checkParents.R | 48 +-
R/helpGeneric.R | 6 -
R/makeLinks.R | 499 ++++++++++++++++++-
benchmark_results.csv | 4 +
data-raw/benchmark.R | 81 ++-
man/{addPhantoms.Rd => addRowlessParents.Rd} | 8 +-
tests/testthat/test-checkParents.R | 2 +-
tests/testthat/test-makeLinks.R | 90 ++++
10 files changed, 688 insertions(+), 56 deletions(-)
create mode 100644 benchmark_results.csv
rename man/{addPhantoms.Rd => addRowlessParents.Rd} (75%)
diff --git a/.gitignore b/.gitignore
index 8faabc57..dc07da73 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,4 @@ ASOIAF.ged
.vscode/launch.json
dataRelatedPairs_new2.csv
data-raw/ASOIAF_040725.ged
+dataRelatedPairs.csv
diff --git a/R/checkIDs.R b/R/checkIDs.R
index 4c1100fd..b687f4c7 100644
--- a/R/checkIDs.R
+++ b/R/checkIDs.R
@@ -28,12 +28,12 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) {
}
# Identify non-unique IDs
- id_check <- checkIDuniqueness(ped=ped, verbose=verbose)
+ id_check <- checkIDuniqueness(ped = ped, verbose = verbose)
if (verbose) {
cat("Step 2: Checking for within row duplicats...\n")
}
- row_check <- checkWithinRowDuplicates(ped=ped, verbose = verbose)
+ row_check <- checkWithinRowDuplicates(ped = ped, verbose = verbose)
validation_results <- c(id_check, row_check)
@@ -166,4 +166,3 @@ checkWithinRowDuplicates <- function(ped, verbose = FALSE) {
duplicated_parents_ids = if (length(duplicated_parents) > 0) unique(duplicated_parents) else NULL
)
}
-
diff --git a/R/checkParents.R b/R/checkParents.R
index 3c680e77..b80669bf 100644
--- a/R/checkParents.R
+++ b/R/checkParents.R
@@ -250,39 +250,10 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
cat("Added phantom moms for:", paste(changes$phantom_moms_added, collapse = ", "), "\n")
}
}
- # add phantom parents
+ # add parents who appear in momID or dadID but are missing from ID
if (parentswithoutrow) {
# Add parents who appear in momID or dadID but are missing from ID
- listed_parents <- unique(c(ped$momID, ped$dadID))
- listed_parents <- listed_parents[!is.na(listed_parents)]
-
- existing_ids <- ped$ID
- missing_parents <- setdiff(listed_parents, existing_ids)
-
- if (length(missing_parents) > 0) {
- if (verbose) {
- cat("Adding parents who were listed in momID/dadID but missing from ID:\n")
- print(missing_parents)
- }
-
- for (pid in missing_parents) {
- role <- unique(
- c(
- if (pid %in% ped$momID) "mom" else NULL,
- if (pid %in% ped$dadID) "dad" else NULL
- )
- )
- inferred_sex <- if ("mom" %in% role) validation_results$female_var else validation_results$male_var
-
- new_row <- ped[1, ]
- new_row$ID <- pid
- new_row$dadID <- NA
- new_row$momID <- NA
- new_row$sex <- inferred_sex
- new_entries <- rbind(new_entries, new_row)
- }
- }
- ped <- merge(ped, new_entries, all = TRUE)
+ ped <- addRowlessParents(ped = ped, verbose = verbose, validation_results = validation_results)
}
if (verbose) {
@@ -301,13 +272,13 @@ repairParentIDs <- function(ped, verbose = FALSE) {
checkParentIDs(ped = ped, verbose = verbose, repair = TRUE)
}
-#' Add Phantom Parents
+#' Add addRowlessParents
#'
-#' This function adds phantom parents to a pedigree.
+#' This function adds parents who appear in momID or dadID but are missing from ID
#' @inheritParams checkParentIDs
#' @param validation_results validation results
-addPhantoms <- function(ped, verbose, validation_results) {
+addRowlessParents <- function(ped, verbose, validation_results) {
# Add parents who appear in momID or dadID but are missing from ID
new_entries <- data.frame()
@@ -339,10 +310,11 @@ addPhantoms <- function(ped, verbose, validation_results) {
new_row$sex <- inferred_sex
new_entries <- rbind(new_entries, new_row)
}
- }
- ped <- merge(ped, new_entries, all = TRUE)
- if (verbose) {
- cat("Added phantom parents for:", paste(new_entries$ID, collapse = ", "), "\n")
+
+ ped <- merge(ped, new_entries, all = TRUE)
+ if (verbose) {
+ cat("Added phantom parents for:", paste(new_entries$ID, collapse = ", "), "\n")
+ }
}
return(ped)
}
diff --git a/R/helpGeneric.R b/R/helpGeneric.R
index 8b376bfb..8dae2b2c 100644
--- a/R/helpGeneric.R
+++ b/R/helpGeneric.R
@@ -97,9 +97,3 @@ resample <- function(x, ...) {
}
x[sample.int(length(x), ...)]
}
-
-
-
-
-
-
diff --git a/R/makeLinks.R b/R/makeLinks.R
index d2dc6a65..930d3d2b 100644
--- a/R/makeLinks.R
+++ b/R/makeLinks.R
@@ -78,6 +78,9 @@ com2links <- function(
# Extract individual IDs from the first available matrix.
ids <- NULL
+
+
+
if (!is.null(cn_ped_matrix)) {
ids <- as.numeric(dimnames(cn_ped_matrix)[[1]])
nc <- ncol(cn_ped_matrix)
@@ -93,6 +96,15 @@ com2links <- function(
stop("Could not extract IDs from the provided matrices.")
}
+
+ # Construct case identifier
+ matrix_case <- paste(sort(c(
+ if (!is.null(ad_ped_matrix)) "ad" else NULL,
+ if (!is.null(mit_ped_matrix)) "mt" else NULL,
+ if (!is.null(cn_ped_matrix)) "cn" else NULL
+ )), collapse = "-")
+
+
# Count how many matrices are provided.
sum_nulls <- sum(!is.null(ad_ped_matrix),
!is.null(mit_ped_matrix),
@@ -100,7 +112,7 @@ com2links <- function(
na.rm = TRUE
)
if (verbose) {
- print(sum_nulls)
+ print(matrix_case)
}
# Extract the internal pointers (p, i, and x slots) for each provided matrix.
@@ -633,6 +645,491 @@ com2links.legacy <- function(
return(NULL)
}
+com2links.beta <- function(
+ rel_pairs_file = "dataRelatedPairs.csv",
+ ad_ped_matrix = NULL,
+ mit_ped_matrix = mt_ped_matrix,
+ mt_ped_matrix = NULL,
+ cn_ped_matrix = NULL,
+ # pat_ped_matrix = NULL,
+ # mat_ped_matrix = NULL,
+ # mapa_id_file = "data_mapaID.csv",
+ write_buffer_size = 1000,
+ update_rate = 1000,
+ gc = TRUE,
+ writetodisk = TRUE,
+ verbose = FALSE,
+ legacy = FALSE,
+ outcome_name = "data",
+ drop_upper_triangular = TRUE,
+ ...) {
+ # --- Input Validations and Preprocessing ---
+
+ # Ensure that at least one relationship matrix is provided.
+ if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) {
+ stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.")
+ }
+ # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided.
+ if (!is.null(ad_ped_matrix)) {
+ ad_ped_matrix <- validate_and_convert_matrix(
+ mat = ad_ped_matrix,
+ name = "ad_ped_matrix"
+ )
+ }
+
+ # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided.
+ if (!is.null(cn_ped_matrix)) {
+ cn_ped_matrix <- validate_and_convert_matrix(
+ mat = cn_ped_matrix,
+ name = "cn_ped_matrix",
+ ensure_symmetric = TRUE
+ )
+ }
+
+ # Validate and process mit_ped_matrix: convert and ensure binary values.
+ if (!is.null(mit_ped_matrix)) {
+ mit_ped_matrix <- validate_and_convert_matrix(
+ mat = mit_ped_matrix,
+ name = "mit_ped_matrix", force_binary = TRUE,
+ ensure_symmetric = TRUE
+ )
+ }
+
+ # --- Build IDs and Prepare Matrix Pointers ---
+
+ # Extract individual IDs from the first available matrix.
+ ids <- NULL
+
+
+
+ if (!is.null(cn_ped_matrix)) {
+ ids <- as.numeric(dimnames(cn_ped_matrix)[[1]])
+ nc <- ncol(cn_ped_matrix)
+ } else if (!is.null(ad_ped_matrix)) {
+ ids <- as.numeric(dimnames(ad_ped_matrix)[[1]])
+ nc <- ncol(ad_ped_matrix)
+ } else if (!is.null(mit_ped_matrix)) {
+ ids <- as.numeric(dimnames(mit_ped_matrix)[[1]])
+ nc <- ncol(mit_ped_matrix)
+ }
+
+ if (is.null(ids)) {
+ stop("Could not extract IDs from the provided matrices.")
+ }
+
+ # --- matrix_case construction and switch dispatch ---
+ matrix_case <- paste(sort(c(
+ if (!is.null(ad_ped_matrix)) "ad" else NULL,
+ if (!is.null(mit_ped_matrix)) "mt" else NULL,
+ if (!is.null(cn_ped_matrix)) "cn" else NULL
+ )), collapse = "-")
+
+ if (verbose) {
+ print(matrix_case)
+ }
+
+ switch(matrix_case,
+ "ad" = process_one(
+ matrix = ad_ped_matrix,
+ rel_name = "addRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "mt" = process_one(
+ matrix = mit_ped_matrix,
+ rel_name = "mitRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "cn" = process_one(
+ matrix = cn_ped_matrix,
+ rel_name = "cnuRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "ad-mt" = process_two(
+ matrix1 = ad_ped_matrix,
+ name1 = "addRel",
+ matrix2 = mit_ped_matrix,
+ name2 = "mitRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "ad-cn" = process_two(
+ matrix1 = ad_ped_matrix,
+ name1 = "addRel",
+ matrix2 = cn_ped_matrix,
+ name2 = "cnuRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "cn-mt" = process_two(
+ matrix1 = cn_ped_matrix,
+ name1 = "cnuRel",
+ matrix2 = mit_ped_matrix,
+ name2 = "mitRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "ad-cn-mt" = process_all_three(
+ mat1 = ad_ped_matrix,
+ name1 = "addRel",
+ mat2 = mit_ped_matrix,
+ name2 = "mitRel",
+ mat3 = cn_ped_matrix,
+ name3 = "cnuRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ stop("Unsupported matrix combination")
+ )
+}
+
+process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, write_buffer_size, drop_upper_triangular, update_rate, verbose, gc, ...) {
+ # Extract pointers and indices from the matrix.
+ newColPos <- matrix@p + 1L
+ iss <- matrix@i + 1L
+ x <- matrix@x
+
+ # Initialize the related pairs file with headers.
+ df_relpairs <- initialize_empty_df(relNames = rel_name)
+
+ if (writetodisk == TRUE) {
+ utils::write.table(
+ df_relpairs,
+ file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE
+ )
+
+ # Prepare an empty buffer for batching writes.
+ write_buffer <- list()
+ remove(df_relpairs)
+ }
+
+ # Process each column in the matrix.
+ for (j in 1L:nc) {
+ ID2 <- ids[j]
+
+ # Extract column indices
+ ncp <- newColPos[j]
+ ncpp <- newColPos[j + 1L]
+ cond <- ncp < ncpp
+ if (cond) {
+ vv <- ncp:(ncpp - 1L)
+ issvv <- iss[vv]
+ }
+
+ # Create a unique set of row indices.
+ u <- sort(issvv)
+
+ # If any relationships exist for this individual, build the related pairs.
+ if (cond) {
+ ID1 <- ids[u]
+ tds <- data.frame(ID1 = ID1, ID2 = ID2)
+ tds[[rel_name]] <- 0
+
+ if (cond) {
+ tds[u %in% issvv, rel_name] <- x[vv]
+ }
+ if (drop_upper_triangular == TRUE) {
+ tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle
+ }
+
+ # Write the batch to disk or accumulate in the data frame.
+ if (nrow(tds) > 0) {
+ if (writetodisk == TRUE) {
+ write_buffer[[length(write_buffer) + 1]] <- tds
+
+ if (length(write_buffer) >= write_buffer_size) { # Write in batches
+ utils::write.table(do.call(rbind, write_buffer),
+ file = rel_pairs_file,
+ row.names = FALSE, col.names = FALSE, append = TRUE, sep = ","
+ )
+ write_buffer <- list()
+ }
+ } else {
+ df_relpairs <- rbind(df_relpairs, tds)
+ }
+ }
+ }
+ if (verbose && (j %% update_rate == 0L)) {
+ cat("Done with", j, "of", nc, "\n")
+ }
+ }
+ # If not writing to disk, return the accumulated data frame.
+ if (writetodisk == FALSE) {
+ return(df_relpairs)
+ } else {
+ # Write any remaining buffered rows.
+ if (length(write_buffer) > 0) {
+ utils::write.table(do.call(rbind, write_buffer),
+ file = rel_pairs_file,
+ row.names = FALSE, col.names = FALSE, append = TRUE, sep = ","
+ )
+ }
+ }
+ if (gc == TRUE) {
+ remove(newColPos, iss, x)
+ }
+}
+
+process_all_three <- function(
+ mat1, name1,
+ mat2, name2,
+ mat3, name3,
+ ids, nc,
+ rel_pairs_file,
+ writetodisk,
+ write_buffer_size,
+ drop_upper_triangular,
+ update_rate,
+ verbose,
+ gc,
+ ...) {
+ # Extract matrix slots
+ p1 <- mat1@p + 1L
+ i1 <- mat1@i + 1L
+ x1 <- mat1@x
+ p2 <- mat2@p + 1L
+ i2 <- mat2@i + 1L
+ x2 <- mat2@x
+ p3 <- mat3@p + 1L
+ i3 <- mat3@i + 1L
+ x3 <- mat3@x
+
+ relNames <- c(name1, name2, name3)
+ df_relpairs <- initialize_empty_df(relNames)
+
+ if (writetodisk) {
+ utils::write.table(df_relpairs, file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE)
+ write_buffer <- list()
+ rm(df_relpairs)
+ }
+
+ for (j in seq_len(nc)) {
+ ID2 <- ids[j]
+
+ # Get index spans
+ v1 <- if (p1[j] < p1[j + 1L]) {
+ idx <- p1[j]:(p1[j + 1L] - 1L)
+ list(i = i1[idx], x = x1[idx])
+ } else {
+ NULL
+ }
+ v2 <- if (p2[j] < p2[j + 1L]) {
+ idx <- p2[j]:(p2[j + 1L] - 1L)
+ list(i = i2[idx], x = x2[idx])
+ } else {
+ NULL
+ }
+ v3 <- if (p3[j] < p3[j + 1L]) {
+ idx <- p3[j]:(p3[j + 1L] - 1L)
+ list(i = i3[idx], x = x3[idx])
+ } else {
+ NULL
+ }
+
+ # Union of index positions
+ u <- sort(unique(c(
+ if (!is.null(v1)) v1$i else NULL,
+ if (!is.null(v2)) v2$i else NULL,
+ if (!is.null(v3)) v3$i else NULL
+ )))
+ if (length(u) > 0) {
+ ID1 <- ids[u]
+ tds <- data.frame(ID1 = ID1, ID2 = ID2)
+ tds[[name1]] <- if (!is.null(v1)) ifelse(u %in% v1$i, v1$x[match(u, v1$i)], 0) else 0
+ tds[[name2]] <- if (!is.null(v2)) ifelse(u %in% v2$i, v2$x[match(u, v2$i)], 0) else 0
+ tds[[name3]] <- if (!is.null(v3)) ifelse(u %in% v3$i, v3$x[match(u, v3$i)], 0) else 0
+
+ if (drop_upper_triangular) {
+ tds <- tds[tds$ID1 <= tds$ID2, ]
+ }
+
+ if (nrow(tds) > 0) {
+ if (writetodisk) {
+ write_buffer[[length(write_buffer) + 1L]] <- tds
+ if (length(write_buffer) >= write_buffer_size) {
+ utils::write.table(do.call(rbind, write_buffer),
+ file = rel_pairs_file, row.names = FALSE,
+ col.names = FALSE, append = TRUE, sep = ","
+ )
+ write_buffer <- list()
+ }
+ } else {
+ df_relpairs <- rbind(df_relpairs, tds)
+ }
+ }
+ }
+
+ if (verbose && (j %% update_rate == 0L)) {
+ cat("Done with", j, "of", nc, "\n")
+ }
+ }
+
+ if (!writetodisk) {
+ return(df_relpairs)
+ } else if (length(write_buffer) > 0) {
+ utils::write.table(do.call(rbind, write_buffer),
+ file = rel_pairs_file, row.names = FALSE,
+ col.names = FALSE, append = TRUE, sep = ","
+ )
+ }
+
+ invisible(NULL)
+}
+
+process_two <- function(
+ matrix1, name1,
+ matrix2, name2,
+ ids, nc,
+ rel_pairs_file,
+ writetodisk,
+ write_buffer_size,
+ drop_upper_triangular,
+ update_rate,
+ verbose,
+ gc,
+ ...) {
+ # Extract internal slots
+ p1 <- matrix1@p + 1L
+ i1 <- matrix1@i + 1L
+ x1 <- matrix1@x
+ p2 <- matrix2@p + 1L
+ i2 <- matrix2@i + 1L
+ x2 <- matrix2@x
+
+ relNames <- c(name1, name2)
+ df_relpairs <- initialize_empty_df(relNames)
+
+ if (writetodisk) {
+ utils::write.table(df_relpairs, file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE)
+ write_buffer <- list()
+ rm(df_relpairs)
+ }
+
+ for (j in seq_len(nc)) {
+ ID2 <- ids[j]
+
+ # Get index/value slices
+ v1 <- if (p1[j] < p1[j + 1L]) {
+ idx <- p1[j]:(p1[j + 1L] - 1L)
+ list(i = i1[idx], x = x1[idx])
+ } else {
+ NULL
+ }
+ v2 <- if (p2[j] < p2[j + 1L]) {
+ idx <- p2[j]:(p2[j + 1L] - 1L)
+ list(i = i2[idx], x = x2[idx])
+ } else {
+ NULL
+ }
+
+ # Union of indices from both matrices
+ u <- sort(unique(c(
+ if (!is.null(v1)) v1$i else NULL,
+ if (!is.null(v2)) v2$i else NULL
+ )))
+
+ if (length(u) > 0) {
+ ID1 <- ids[u]
+ tds <- data.frame(ID1 = ID1, ID2 = ID2)
+ tds[[name1]] <- if (!is.null(v1)) ifelse(u %in% v1$i, v1$x[match(u, v1$i)], 0) else 0
+ tds[[name2]] <- if (!is.null(v2)) ifelse(u %in% v2$i, v2$x[match(u, v2$i)], 0) else 0
+
+ if (drop_upper_triangular) {
+ tds <- tds[tds$ID1 <= tds$ID2, ]
+ }
+
+ if (nrow(tds) > 0) {
+ if (writetodisk) {
+ write_buffer[[length(write_buffer) + 1L]] <- tds
+ if (length(write_buffer) >= write_buffer_size) {
+ utils::write.table(do.call(rbind, write_buffer),
+ file = rel_pairs_file, row.names = FALSE,
+ col.names = FALSE, append = TRUE, sep = ","
+ )
+ write_buffer <- list()
+ }
+ } else {
+ df_relpairs <- rbind(df_relpairs, tds)
+ }
+ }
+ }
+
+ if (verbose && (j %% update_rate == 0L)) {
+ cat("Done with", j, "of", nc, "\n")
+ }
+ }
+
+ if (!writetodisk) {
+ return(df_relpairs)
+ } else if (length(write_buffer) > 0) {
+ utils::write.table(do.call(rbind, write_buffer),
+ file = rel_pairs_file, row.names = FALSE,
+ col.names = FALSE, append = TRUE, sep = ","
+ )
+ }
+
+ invisible(NULL)
+}
+
+
#' @title validate_and_convert_matrix
#' @description
#' This function validates and converts a matrix to a specific format.
diff --git a/benchmark_results.csv b/benchmark_results.csv
new file mode 100644
index 00000000..efe84ed8
--- /dev/null
+++ b/benchmark_results.csv
@@ -0,0 +1,4 @@
+"expr","min","lq","mean","median","uq","max","neval"
+"beta",430.4392,516.5138,574.575919,563.7353,623.24015,956.2494,100
+"regular",424.8536,507.47875,578.514,564.8453,631.97425,997.8213,100
+"legacy",580.1082,1392.24995,2086.481802,1783.02635,2686.8328,4768.9119,100
diff --git a/data-raw/benchmark.R b/data-raw/benchmark.R
index fe480cdf..536190b1 100644
--- a/data-raw/benchmark.R
+++ b/data-raw/benchmark.R
@@ -2,7 +2,7 @@ library(microbenchmark)
library(Matrix)
# library(BGmisc)
# data("hazard")
-
+library(tidyverse)
# make big data
set.seed(15)
@@ -10,8 +10,34 @@ Ngen <- 5
kpc <- 5
sexR <- .50
marR <- .7
-ped <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR)
-
+ped <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) %>%
+ mutate(
+ fam = "fam 1"
+ )
+set.seed(151)
+Ngen <- 5
+marR <- .8
+ped2 <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) %>%
+ mutate(
+ fam = "fam 2",
+ ID = ID + 10000,
+ momID = momID + 10000,
+ dadID = dadID + 10000,
+ spID = spID + 10000
+ )
+set.seed(1151)
+kpc <- 8
+ped3 <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) %>%
+ mutate(
+ fam = "fam 3",
+ ID = ID + 20000,
+ momID = momID + 20000,
+ dadID = dadID + 20000,
+ spID = spID + 20000
+ )
+ped <- rbind(ped, ped2)
+ped <- rbind(ped, ped3)
+if(FALSE){
# Define parameters
component <- "additive" # Change this to test different components
saveable <- FALSE # Disable saving to avoid disk I/O slowing down benchmarking
@@ -52,9 +78,58 @@ benchmark_results <- microbenchmark(
times = 100 # Run each method 100 times
)
+summary(benchmark_results)
+
+lm(benchmark_results$time ~ benchmark_results$expr) %>%
+ summary()
+# Print benchmark results
+print(benchmark_results)
+
+# Optional: Save results to CSV for later analysis
+write.csv(summary(benchmark_results),
+ "benchmark_results.csv",
+ row.names = FALSE
+)
# Print benchmark
+}
+verbose=FALSE
+ad_ped_matrix <- ped2com(ped, component = "additive", adjacency_method = "direct", sparse = TRUE)
+mit_ped_matrix <- ped2com(ped, component = "mitochondrial", adjacency_method = "direct", sparse = TRUE)
+cn_ped_matrix <- ped2com(ped, component = "common nuclear", adjacency_method = "indexed", sparse = TRUE)
+benchmark_results <- microbenchmark(
+ beta = {
+ com2links.beta(
+ ad_ped_matrix = ad_ped_matrix,
+ mit_ped_matrix = mit_ped_matrix,
+ cn_ped_matrix = cn_ped_matrix,
+ writetodisk = TRUE,
+ verbose = verbose
+); file.remove("dataRelatedPairs.csv")
+ }, regular = {
+ com2links(
+ ad_ped_matrix = ad_ped_matrix,
+ mit_ped_matrix = mit_ped_matrix,
+ cn_ped_matrix = cn_ped_matrix,
+ writetodisk = TRUE,
+ verbose = verbose
+ ); file.remove("dataRelatedPairs.csv")
+ }, legacy = {
+ com2links(
+ ad_ped_matrix = ad_ped_matrix,
+ mit_ped_matrix = mit_ped_matrix,
+ cn_ped_matrix = cn_ped_matrix,
+ verbose = verbose,
+ legacy = TRUE
+ ); file.remove("dataRelatedPairs.csv")
+ },
+
+ times = 100 # Run each method 100 times
+)
+summary(benchmark_results)
+lm(benchmark_results$time ~ benchmark_results$expr) %>%
+ summary()
# Print benchmark results
print(benchmark_results)
diff --git a/man/addPhantoms.Rd b/man/addRowlessParents.Rd
similarity index 75%
rename from man/addPhantoms.Rd
rename to man/addRowlessParents.Rd
index bafe5b75..8161ee4e 100644
--- a/man/addPhantoms.Rd
+++ b/man/addRowlessParents.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/checkParents.R
-\name{addPhantoms}
-\alias{addPhantoms}
+\name{addRowlessParents}
+\alias{addRowlessParents}
\title{Add Phantom Parents}
\usage{
-addPhantoms(ped, verbose, validation_results)
+addRowlessParents(ped, verbose, validation_results)
}
\arguments{
\item{ped}{A dataframe representing the pedigree data with columns 'ID', 'dadID', and 'momID'.}
@@ -14,5 +14,5 @@ addPhantoms(ped, verbose, validation_results)
\item{validation_results}{validation results}
}
\description{
-This function adds phantom parents to a pedigree.
+This function adds parents w
}
diff --git a/tests/testthat/test-checkParents.R b/tests/testthat/test-checkParents.R
index 21660c4b..50916289 100644
--- a/tests/testthat/test-checkParents.R
+++ b/tests/testthat/test-checkParents.R
@@ -22,6 +22,6 @@ test_that("checksif single parents found correctly in ASOIAF dataset", {
single_moms <- length(df_asoiaf$id[is.na(df_asoiaf$dadID) & !is.na(df_asoiaf$momID)])
expect_equal(single_moms, length(results$missing_fathers))
expect_equal(single_dads, length(results$missing_mothers))
- repaired_df <- checkParentIDs(df_asoiaf, verbose = FALSE, repair = TRUE)
+ repaired_df <- checkParentIDs(df_asoiaf, verbose = FALSE, repair = TRUE, parentswithoutrow = TRUE)
expect_equal(nrow(repaired_df), nrow(df_asoiaf) + single_moms + single_dads)
})
diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R
index 475f7b6b..200b8934 100644
--- a/tests/testthat/test-makeLinks.R
+++ b/tests/testthat/test-makeLinks.R
@@ -108,6 +108,17 @@ test_that("com2links legacy works", {
expect_true(all(c("ID1", "ID2", "addRel", "mitRel", "cnuRel") %in% colnames(written_data)))
+
+ result_beta <- com2links.beta(
+ ad_ped_matrix = ad_ped_matrix,
+ mit_ped_matrix = mit_ped_matrix, cn_ped_matrix = cn_ped_matrix,
+ writetodisk = FALSE
+ )
+
+ expect_true(is.data.frame(result_beta))
+ expect_true(all(c("ID1", "ID2", "addRel", "mitRel", "cnuRel") %in% colnames(result_beta)))
+
+
result <- com2links(
ad_ped_matrix = ad_ped_matrix,
mit_ped_matrix = mit_ped_matrix, cn_ped_matrix = cn_ped_matrix,
@@ -115,15 +126,94 @@ test_that("com2links legacy works", {
)
expect_true(is.data.frame(result))
+ expect_true(all(c("ID1", "ID2", "addRel", "mitRel", "cnuRel") %in% colnames(result)))
# Drop row names to avoid mismatches in expect_equal
rownames(result) <- NULL
rownames(written_data) <- NULL
+ rownames(result_beta) <- NULL
# Final comparison between written versions
expect_equal(written_data, result)
+ expect_equal(result_beta, result)
})
+test_that("com2links beta works", {
+ data(hazard)
+ ad_ped_matrix <- ped2com(hazard, component = "additive", adjacency_method = "direct", sparse = TRUE)
+ mit_ped_matrix <- ped2com(hazard, component = "mitochondrial", adjacency_method = "direct", sparse = TRUE)
+ cn_ped_matrix <- ped2com(hazard, component = "common nuclear", adjacency_method = "indexed", sparse = TRUE)
+
+ # compare 2
+ result_beta <- com2links.beta(
+ ad_ped_matrix = ad_ped_matrix,
+ mit_ped_matrix = mit_ped_matrix,
+ writetodisk = FALSE
+ )
+
+ expect_true(is.data.frame(result_beta))
+ expect_true(all(c("ID1", "ID2", "addRel", "mitRel") %in% colnames(result_beta)))
+
+
+ result <- com2links(
+ ad_ped_matrix = ad_ped_matrix,
+ mit_ped_matrix = mit_ped_matrix,
+ writetodisk = FALSE
+ )
+
+ expect_true(is.data.frame(result))
+ expect_true(all(c("ID1", "ID2", "addRel", "mitRel") %in% colnames(result)))
+ # Drop row names to avoid mismatches in expect_equal
+ rownames(result) <- NULL
+ rownames(result_beta) <- NULL
+
+ # Final comparison between versions
+ expect_equal(result_beta, result)
+
+
+ # write to disk
+ result_disk <- com2links.beta(
+ ad_ped_matrix = ad_ped_matrix,
+ mit_ped_matrix = mit_ped_matrix,
+ writetodisk = TRUE
+ )
+ expect_true(file.exists("dataRelatedPairs.csv"))
+ written_data <- read.csv("dataRelatedPairs.csv")
+ # remove the file
+ expect_true(file.remove("dataRelatedPairs.csv"))
+
+ expect_true(all(c("ID1", "ID2", "addRel", "mitRel") %in% colnames(written_data)))
+ rownames(written_data) <- NULL
+ expect_equal(result_beta, written_data)
+ expect_equal(result, written_data)
+ # compare 1
+
+ result_beta <- com2links.beta(
+ mit_ped_matrix = mit_ped_matrix,
+ writetodisk = FALSE
+ )
+
+ expect_true(is.data.frame(result_beta))
+ expect_true(all(c("ID1", "ID2", "mitRel") %in% colnames(result_beta)))
+
+
+ result <- com2links(
+ mit_ped_matrix = mit_ped_matrix,
+ writetodisk = FALSE
+ )
+
+ expect_true(is.data.frame(result))
+ expect_true(all(c("ID1", "ID2", "mitRel") %in% colnames(result)))
+ # Drop row names to avoid mismatches in expect_equal
+ rownames(result) <- NULL
+ rownames(result_beta) <- NULL
+
+ # Final comparison between versions
+ expect_equal(result_beta, result)
+})
+
+
+
test_that("com2links correctly handles missing matrices", {
data(hazard)
From 61fb620c45124b84a590f7fac0c8740a951106fa Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Thu, 10 Apr 2025 00:16:11 -0400
Subject: [PATCH 07/35] renaming beta
---
R/makeLinks.R | 21 ++++++++++++-----
benchmark_results.csv | 6 ++---
man/addRowlessParents.Rd | 4 ++--
man/com2links.Rd | 3 ---
man/com2links.legacy.Rd | 27 +++++++++++++++++++---
man/com2links.og.Rd | 41 +++++++++++++++++++++++++++++++++
tests/testthat/test-makeLinks.R | 14 +++++------
7 files changed, 92 insertions(+), 24 deletions(-)
create mode 100644 man/com2links.og.Rd
diff --git a/R/makeLinks.R b/R/makeLinks.R
index 930d3d2b..7bbdb082 100644
--- a/R/makeLinks.R
+++ b/R/makeLinks.R
@@ -20,8 +20,9 @@
#' @param ... Additional arguments to be passed to \code{\link{com2links}}
#'
#' @return A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk.
-#' @export
-com2links <- function(
+#' @keywords internal
+
+com2links.legacy <- function(
rel_pairs_file = "dataRelatedPairs.csv",
ad_ped_matrix = NULL,
mit_ped_matrix = mt_ped_matrix,
@@ -498,7 +499,7 @@ com2links <- function(
} else if (legacy) {
# --- Legacy Mode ---
# In legacy mode, convert matrices to the expected symmetric formats.
- com2links.legacy(
+ com2links.og(
rel_pairs_file = rel_pairs_file,
ad_ped_matrix = ad_ped_matrix,
mit_ped_matrix = mit_ped_matrix,
@@ -521,9 +522,11 @@ com2links <- function(
#' Convert Pedigree Matrices to Related Pairs File (Legacy)
#' @description
#' This legacy function converts pedigree matrices into a related pairs file.
-#' @inheritParams com2links
+#' @inheritParams com2links.legacy
+#' @keywords internal
-com2links.legacy <- function(
+
+com2links.og <- function(
rel_pairs_file = "dataRelatedPairs.csv",
ad_ped_matrix = NULL,
mit_ped_matrix = mt_ped_matrix,
@@ -645,7 +648,13 @@ com2links.legacy <- function(
return(NULL)
}
-com2links.beta <- function(
+#' Convert Sparse Relationship Matrices to Kinship Links
+#' @inheritParams com2links.legacy
+#' @inherit com2links.legacy description
+#' @inherit com2links.legacy details
+#' @export
+
+com2links <- function(
rel_pairs_file = "dataRelatedPairs.csv",
ad_ped_matrix = NULL,
mit_ped_matrix = mt_ped_matrix,
diff --git a/benchmark_results.csv b/benchmark_results.csv
index efe84ed8..192b22dd 100644
--- a/benchmark_results.csv
+++ b/benchmark_results.csv
@@ -1,4 +1,4 @@
"expr","min","lq","mean","median","uq","max","neval"
-"beta",430.4392,516.5138,574.575919,563.7353,623.24015,956.2494,100
-"regular",424.8536,507.47875,578.514,564.8453,631.97425,997.8213,100
-"legacy",580.1082,1392.24995,2086.481802,1783.02635,2686.8328,4768.9119,100
+"beta",16.0127553,19.82506435,22.108722786,21.26392435,23.70929065,33.0821112,100
+"regular",16.1161818,19.80905795,22.26289046694,21.25613265,24.39315385,33.3530921,100
+"legacy",32.0135859,39.7116454,43.55598528601,42.62947545,47.32561085,65.684218301,100
diff --git a/man/addRowlessParents.Rd b/man/addRowlessParents.Rd
index 8161ee4e..83132805 100644
--- a/man/addRowlessParents.Rd
+++ b/man/addRowlessParents.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/checkParents.R
\name{addRowlessParents}
\alias{addRowlessParents}
-\title{Add Phantom Parents}
+\title{Add addRowlessParents}
\usage{
addRowlessParents(ped, verbose, validation_results)
}
@@ -14,5 +14,5 @@ addRowlessParents(ped, verbose, validation_results)
\item{validation_results}{validation results}
}
\description{
-This function adds parents w
+This function adds parents who appear in momID or dadID but are missing from ID
}
diff --git a/man/com2links.Rd b/man/com2links.Rd
index 9dae2f09..e9c7ac2b 100644
--- a/man/com2links.Rd
+++ b/man/com2links.Rd
@@ -50,9 +50,6 @@ com2links(
\item{...}{Additional arguments to be passed to \code{\link{com2links}}}
}
-\value{
-A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk.
-}
\description{
This function processes one or more sparse relationship components (additive, mitochondrial,
and common nuclear) and converts them into kinship link pairs. The resulting related pairs are
diff --git a/man/com2links.legacy.Rd b/man/com2links.legacy.Rd
index 757117a1..a523a106 100644
--- a/man/com2links.legacy.Rd
+++ b/man/com2links.legacy.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/makeLinks.R
\name{com2links.legacy}
\alias{com2links.legacy}
-\title{Convert Pedigree Matrices to Related Pairs File (Legacy)}
+\title{Convert Sparse Relationship Matrices to Kinship Links}
\usage{
com2links.legacy(
rel_pairs_file = "dataRelatedPairs.csv",
@@ -10,9 +10,14 @@ com2links.legacy(
mit_ped_matrix = mt_ped_matrix,
mt_ped_matrix = NULL,
cn_ped_matrix = NULL,
- update_rate = 500,
+ write_buffer_size = 1000,
+ update_rate = 1000,
+ gc = TRUE,
+ writetodisk = TRUE,
verbose = FALSE,
+ legacy = FALSE,
outcome_name = "data",
+ drop_upper_triangular = TRUE,
...
)
}
@@ -27,14 +32,30 @@ com2links.legacy(
\item{cn_ped_matrix}{Matrix of common nuclear relatedness coefficients.}
+\item{write_buffer_size}{Number of related pairs to write to disk at a time.}
+
\item{update_rate}{Numeric. Frequency (in iterations) at which progress messages are printed.}
+\item{gc}{Logical. If TRUE, performs garbage collection via \code{\link{gc}} to free memory.}
+
+\item{writetodisk}{Logical. If TRUE, writes the related pairs to disk; if FALSE, returns a data frame.}
+
\item{verbose}{Logical. If TRUE, prints progress messages.}
+\item{legacy}{Logical. If TRUE, uses the legacy branch of the function.}
+
\item{outcome_name}{Character string representing the outcome name (used in file naming).}
+\item{drop_upper_triangular}{Logical. If TRUE, drops the upper triangular portion of the matrix.}
+
\item{...}{Additional arguments to be passed to \code{\link{com2links}}}
}
+\value{
+A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk.
+}
\description{
-This legacy function converts pedigree matrices into a related pairs file.
+This function processes one or more sparse relationship components (additive, mitochondrial,
+and common nuclear) and converts them into kinship link pairs. The resulting related pairs are
+either returned as a data frame or written to disk in CSV format.
}
+\keyword{internal}
diff --git a/man/com2links.og.Rd b/man/com2links.og.Rd
new file mode 100644
index 00000000..b6c3d71b
--- /dev/null
+++ b/man/com2links.og.Rd
@@ -0,0 +1,41 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/makeLinks.R
+\name{com2links.og}
+\alias{com2links.og}
+\title{Convert Pedigree Matrices to Related Pairs File (Legacy)}
+\usage{
+com2links.og(
+ rel_pairs_file = "dataRelatedPairs.csv",
+ ad_ped_matrix = NULL,
+ mit_ped_matrix = mt_ped_matrix,
+ mt_ped_matrix = NULL,
+ cn_ped_matrix = NULL,
+ update_rate = 500,
+ verbose = FALSE,
+ outcome_name = "data",
+ ...
+)
+}
+\arguments{
+\item{rel_pairs_file}{File path to write related pairs to (CSV format).}
+
+\item{ad_ped_matrix}{Matrix of additive genetic relatedness coefficients.}
+
+\item{mit_ped_matrix}{Matrix of mitochondrial relatedness coefficients. Alias: \code{mt_ped_matrix}.}
+
+\item{mt_ped_matrix}{Matrix of mitochondrial relatedness coefficients.}
+
+\item{cn_ped_matrix}{Matrix of common nuclear relatedness coefficients.}
+
+\item{update_rate}{Numeric. Frequency (in iterations) at which progress messages are printed.}
+
+\item{verbose}{Logical. If TRUE, prints progress messages.}
+
+\item{outcome_name}{Character string representing the outcome name (used in file naming).}
+
+\item{...}{Additional arguments to be passed to \code{\link{com2links}}}
+}
+\description{
+This legacy function converts pedigree matrices into a related pairs file.
+}
+\keyword{internal}
diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R
index 200b8934..a2666f2e 100644
--- a/tests/testthat/test-makeLinks.R
+++ b/tests/testthat/test-makeLinks.R
@@ -95,7 +95,7 @@ test_that("com2links legacy works", {
mit_ped_matrix <- ped2com(hazard, component = "mitochondrial", adjacency_method = "direct", sparse = TRUE)
cn_ped_matrix <- ped2com(hazard, component = "common nuclear", adjacency_method = "indexed", sparse = TRUE)
- resultlegacy <- com2links(
+ resultlegacy <- com2links.legacy(
ad_ped_matrix = ad_ped_matrix,
mit_ped_matrix = mit_ped_matrix, cn_ped_matrix = cn_ped_matrix,
legacy = TRUE
@@ -109,7 +109,7 @@ test_that("com2links legacy works", {
expect_true(all(c("ID1", "ID2", "addRel", "mitRel", "cnuRel") %in% colnames(written_data)))
- result_beta <- com2links.beta(
+ result_beta <- com2links(
ad_ped_matrix = ad_ped_matrix,
mit_ped_matrix = mit_ped_matrix, cn_ped_matrix = cn_ped_matrix,
writetodisk = FALSE
@@ -119,7 +119,7 @@ test_that("com2links legacy works", {
expect_true(all(c("ID1", "ID2", "addRel", "mitRel", "cnuRel") %in% colnames(result_beta)))
- result <- com2links(
+ result <- com2links.legacy(
ad_ped_matrix = ad_ped_matrix,
mit_ped_matrix = mit_ped_matrix, cn_ped_matrix = cn_ped_matrix,
writetodisk = FALSE
@@ -145,7 +145,7 @@ test_that("com2links beta works", {
cn_ped_matrix <- ped2com(hazard, component = "common nuclear", adjacency_method = "indexed", sparse = TRUE)
# compare 2
- result_beta <- com2links.beta(
+ result_beta <- com2links(
ad_ped_matrix = ad_ped_matrix,
mit_ped_matrix = mit_ped_matrix,
writetodisk = FALSE
@@ -155,7 +155,7 @@ test_that("com2links beta works", {
expect_true(all(c("ID1", "ID2", "addRel", "mitRel") %in% colnames(result_beta)))
- result <- com2links(
+ result <- com2links.legacy(
ad_ped_matrix = ad_ped_matrix,
mit_ped_matrix = mit_ped_matrix,
writetodisk = FALSE
@@ -172,7 +172,7 @@ test_that("com2links beta works", {
# write to disk
- result_disk <- com2links.beta(
+ result_disk <- com2links(
ad_ped_matrix = ad_ped_matrix,
mit_ped_matrix = mit_ped_matrix,
writetodisk = TRUE
@@ -188,7 +188,7 @@ test_that("com2links beta works", {
expect_equal(result, written_data)
# compare 1
- result_beta <- com2links.beta(
+ result_beta <- com2links(
mit_ped_matrix = mit_ped_matrix,
writetodisk = FALSE
)
From 2c0961cc070dda438c5839cdd0dc46bcf32b5f6a Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Thu, 10 Apr 2025 09:55:57 -0400
Subject: [PATCH 08/35] Update .gitignore
---
.gitignore | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.gitignore b/.gitignore
index dc07da73..f84ea082 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,7 +12,7 @@ tests/testthat/Rplots.pdf
*.ASOIAF.ged
ASOIAF.ged
*.Rproj
-
+benchmark_results.csv
.vscode/launch.json
dataRelatedPairs_new2.csv
data-raw/ASOIAF_040725.ged
From edf27455ba0f12c93361cb0be118329e1afeeeef Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Thu, 10 Apr 2025 10:03:16 -0400
Subject: [PATCH 09/35] reorder coms
---
R/makeLinks.R | 406 +++++++++++++++++++++++++-------------------------
1 file changed, 203 insertions(+), 203 deletions(-)
diff --git a/R/makeLinks.R b/R/makeLinks.R
index 7bbdb082..2894a3bf 100644
--- a/R/makeLinks.R
+++ b/R/makeLinks.R
@@ -20,7 +20,208 @@
#' @param ... Additional arguments to be passed to \code{\link{com2links}}
#'
#' @return A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk.
-#' @keywords internal
+#' @export com2links
+
+com2links <- function(
+ rel_pairs_file = "dataRelatedPairs.csv",
+ ad_ped_matrix = NULL,
+ mit_ped_matrix = mt_ped_matrix,
+ mt_ped_matrix = NULL,
+ cn_ped_matrix = NULL,
+ # pat_ped_matrix = NULL,
+ # mat_ped_matrix = NULL,
+ # mapa_id_file = "data_mapaID.csv",
+ write_buffer_size = 1000,
+ update_rate = 1000,
+ gc = TRUE,
+ writetodisk = TRUE,
+ verbose = FALSE,
+ legacy = FALSE,
+ outcome_name = "data",
+ drop_upper_triangular = TRUE,
+ ...) {
+ # --- Input Validations and Preprocessing ---
+
+ # Ensure that at least one relationship matrix is provided.
+ if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) {
+ stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.")
+ }
+ # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided.
+ if (!is.null(ad_ped_matrix)) {
+ ad_ped_matrix <- validate_and_convert_matrix(
+ mat = ad_ped_matrix,
+ name = "ad_ped_matrix"
+ )
+ }
+
+ # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided.
+ if (!is.null(cn_ped_matrix)) {
+ cn_ped_matrix <- validate_and_convert_matrix(
+ mat = cn_ped_matrix,
+ name = "cn_ped_matrix",
+ ensure_symmetric = TRUE
+ )
+ }
+
+ # Validate and process mit_ped_matrix: convert and ensure binary values.
+ if (!is.null(mit_ped_matrix)) {
+ mit_ped_matrix <- validate_and_convert_matrix(
+ mat = mit_ped_matrix,
+ name = "mit_ped_matrix", force_binary = TRUE,
+ ensure_symmetric = TRUE
+ )
+ }
+
+ # --- Build IDs and Prepare Matrix Pointers ---
+
+ # Extract individual IDs from the first available matrix.
+ ids <- NULL
+
+
+
+ if (!is.null(cn_ped_matrix)) {
+ ids <- as.numeric(dimnames(cn_ped_matrix)[[1]])
+ nc <- ncol(cn_ped_matrix)
+ } else if (!is.null(ad_ped_matrix)) {
+ ids <- as.numeric(dimnames(ad_ped_matrix)[[1]])
+ nc <- ncol(ad_ped_matrix)
+ } else if (!is.null(mit_ped_matrix)) {
+ ids <- as.numeric(dimnames(mit_ped_matrix)[[1]])
+ nc <- ncol(mit_ped_matrix)
+ }
+
+ if (is.null(ids)) {
+ stop("Could not extract IDs from the provided matrices.")
+ }
+
+ # --- matrix_case construction and switch dispatch ---
+ matrix_case <- paste(sort(c(
+ if (!is.null(ad_ped_matrix)) "ad" else NULL,
+ if (!is.null(mit_ped_matrix)) "mt" else NULL,
+ if (!is.null(cn_ped_matrix)) "cn" else NULL
+ )), collapse = "-")
+
+ if (verbose) {
+ print(matrix_case)
+ }
+
+ switch(matrix_case,
+ "ad" = process_one(
+ matrix = ad_ped_matrix,
+ rel_name = "addRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "mt" = process_one(
+ matrix = mit_ped_matrix,
+ rel_name = "mitRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "cn" = process_one(
+ matrix = cn_ped_matrix,
+ rel_name = "cnuRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "ad-mt" = process_two(
+ matrix1 = ad_ped_matrix,
+ name1 = "addRel",
+ matrix2 = mit_ped_matrix,
+ name2 = "mitRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "ad-cn" = process_two(
+ matrix1 = ad_ped_matrix,
+ name1 = "addRel",
+ matrix2 = cn_ped_matrix,
+ name2 = "cnuRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "cn-mt" = process_two(
+ matrix1 = cn_ped_matrix,
+ name1 = "cnuRel",
+ matrix2 = mit_ped_matrix,
+ name2 = "mitRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "ad-cn-mt" = process_all_three(
+ mat1 = ad_ped_matrix,
+ name1 = "addRel",
+ mat2 = mit_ped_matrix,
+ name2 = "mitRel",
+ mat3 = cn_ped_matrix,
+ name3 = "cnuRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ stop("Unsupported matrix combination")
+ )
+}
+#' Convert Sparse Relationship Matrices to Kinship Links
+#' @inheritParams com2links
+#' @inherit com2links description
+#' @inherit com2links details
+#' @keyword internal
com2links.legacy <- function(
rel_pairs_file = "dataRelatedPairs.csv",
@@ -522,7 +723,7 @@ com2links.legacy <- function(
#' Convert Pedigree Matrices to Related Pairs File (Legacy)
#' @description
#' This legacy function converts pedigree matrices into a related pairs file.
-#' @inheritParams com2links.legacy
+#' @inheritParams com2links
#' @keywords internal
@@ -648,207 +849,6 @@ com2links.og <- function(
return(NULL)
}
-#' Convert Sparse Relationship Matrices to Kinship Links
-#' @inheritParams com2links.legacy
-#' @inherit com2links.legacy description
-#' @inherit com2links.legacy details
-#' @export
-
-com2links <- function(
- rel_pairs_file = "dataRelatedPairs.csv",
- ad_ped_matrix = NULL,
- mit_ped_matrix = mt_ped_matrix,
- mt_ped_matrix = NULL,
- cn_ped_matrix = NULL,
- # pat_ped_matrix = NULL,
- # mat_ped_matrix = NULL,
- # mapa_id_file = "data_mapaID.csv",
- write_buffer_size = 1000,
- update_rate = 1000,
- gc = TRUE,
- writetodisk = TRUE,
- verbose = FALSE,
- legacy = FALSE,
- outcome_name = "data",
- drop_upper_triangular = TRUE,
- ...) {
- # --- Input Validations and Preprocessing ---
-
- # Ensure that at least one relationship matrix is provided.
- if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) {
- stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.")
- }
- # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided.
- if (!is.null(ad_ped_matrix)) {
- ad_ped_matrix <- validate_and_convert_matrix(
- mat = ad_ped_matrix,
- name = "ad_ped_matrix"
- )
- }
-
- # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided.
- if (!is.null(cn_ped_matrix)) {
- cn_ped_matrix <- validate_and_convert_matrix(
- mat = cn_ped_matrix,
- name = "cn_ped_matrix",
- ensure_symmetric = TRUE
- )
- }
-
- # Validate and process mit_ped_matrix: convert and ensure binary values.
- if (!is.null(mit_ped_matrix)) {
- mit_ped_matrix <- validate_and_convert_matrix(
- mat = mit_ped_matrix,
- name = "mit_ped_matrix", force_binary = TRUE,
- ensure_symmetric = TRUE
- )
- }
-
- # --- Build IDs and Prepare Matrix Pointers ---
-
- # Extract individual IDs from the first available matrix.
- ids <- NULL
-
-
-
- if (!is.null(cn_ped_matrix)) {
- ids <- as.numeric(dimnames(cn_ped_matrix)[[1]])
- nc <- ncol(cn_ped_matrix)
- } else if (!is.null(ad_ped_matrix)) {
- ids <- as.numeric(dimnames(ad_ped_matrix)[[1]])
- nc <- ncol(ad_ped_matrix)
- } else if (!is.null(mit_ped_matrix)) {
- ids <- as.numeric(dimnames(mit_ped_matrix)[[1]])
- nc <- ncol(mit_ped_matrix)
- }
-
- if (is.null(ids)) {
- stop("Could not extract IDs from the provided matrices.")
- }
-
- # --- matrix_case construction and switch dispatch ---
- matrix_case <- paste(sort(c(
- if (!is.null(ad_ped_matrix)) "ad" else NULL,
- if (!is.null(mit_ped_matrix)) "mt" else NULL,
- if (!is.null(cn_ped_matrix)) "cn" else NULL
- )), collapse = "-")
-
- if (verbose) {
- print(matrix_case)
- }
-
- switch(matrix_case,
- "ad" = process_one(
- matrix = ad_ped_matrix,
- rel_name = "addRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- "mt" = process_one(
- matrix = mit_ped_matrix,
- rel_name = "mitRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- "cn" = process_one(
- matrix = cn_ped_matrix,
- rel_name = "cnuRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- "ad-mt" = process_two(
- matrix1 = ad_ped_matrix,
- name1 = "addRel",
- matrix2 = mit_ped_matrix,
- name2 = "mitRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- "ad-cn" = process_two(
- matrix1 = ad_ped_matrix,
- name1 = "addRel",
- matrix2 = cn_ped_matrix,
- name2 = "cnuRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- "cn-mt" = process_two(
- matrix1 = cn_ped_matrix,
- name1 = "cnuRel",
- matrix2 = mit_ped_matrix,
- name2 = "mitRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- "ad-cn-mt" = process_all_three(
- mat1 = ad_ped_matrix,
- name1 = "addRel",
- mat2 = mit_ped_matrix,
- name2 = "mitRel",
- mat3 = cn_ped_matrix,
- name3 = "cnuRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- stop("Unsupported matrix combination")
- )
-}
process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, write_buffer_size, drop_upper_triangular, update_rate, verbose, gc, ...) {
# Extract pointers and indices from the matrix.
From 39fde6cb5f2c3d9309ed374b224d4e84f0a56fda Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Thu, 10 Apr 2025 10:03:51 -0400
Subject: [PATCH 10/35] Delete benchmark_results.csv
---
benchmark_results.csv | 4 ----
1 file changed, 4 deletions(-)
delete mode 100644 benchmark_results.csv
diff --git a/benchmark_results.csv b/benchmark_results.csv
deleted file mode 100644
index 192b22dd..00000000
--- a/benchmark_results.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-"expr","min","lq","mean","median","uq","max","neval"
-"beta",16.0127553,19.82506435,22.108722786,21.26392435,23.70929065,33.0821112,100
-"regular",16.1161818,19.80905795,22.26289046694,21.25613265,24.39315385,33.3530921,100
-"legacy",32.0135859,39.7116454,43.55598528601,42.62947545,47.32561085,65.684218301,100
From 015c32abfa9461162f0b9c5ffc0f7f47ca3bc1cb Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Thu, 10 Apr 2025 10:03:16 -0400
Subject: [PATCH 11/35] reorder coms
---
R/makeLinks.R | 403 ++++++++++++++++++++--------------------
man/com2links.Rd | 3 +
man/com2links.legacy.Rd | 3 -
3 files changed, 204 insertions(+), 205 deletions(-)
diff --git a/R/makeLinks.R b/R/makeLinks.R
index 7bbdb082..f0bee11e 100644
--- a/R/makeLinks.R
+++ b/R/makeLinks.R
@@ -20,8 +20,208 @@
#' @param ... Additional arguments to be passed to \code{\link{com2links}}
#'
#' @return A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk.
+#' @export com2links
+
+com2links <- function(
+ rel_pairs_file = "dataRelatedPairs.csv",
+ ad_ped_matrix = NULL,
+ mit_ped_matrix = mt_ped_matrix,
+ mt_ped_matrix = NULL,
+ cn_ped_matrix = NULL,
+ # pat_ped_matrix = NULL,
+ # mat_ped_matrix = NULL,
+ # mapa_id_file = "data_mapaID.csv",
+ write_buffer_size = 1000,
+ update_rate = 1000,
+ gc = TRUE,
+ writetodisk = TRUE,
+ verbose = FALSE,
+ legacy = FALSE,
+ outcome_name = "data",
+ drop_upper_triangular = TRUE,
+ ...) {
+ # --- Input Validations and Preprocessing ---
+
+ # Ensure that at least one relationship matrix is provided.
+ if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) {
+ stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.")
+ }
+ # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided.
+ if (!is.null(ad_ped_matrix)) {
+ ad_ped_matrix <- validate_and_convert_matrix(
+ mat = ad_ped_matrix,
+ name = "ad_ped_matrix"
+ )
+ }
+
+ # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided.
+ if (!is.null(cn_ped_matrix)) {
+ cn_ped_matrix <- validate_and_convert_matrix(
+ mat = cn_ped_matrix,
+ name = "cn_ped_matrix",
+ ensure_symmetric = TRUE
+ )
+ }
+
+ # Validate and process mit_ped_matrix: convert and ensure binary values.
+ if (!is.null(mit_ped_matrix)) {
+ mit_ped_matrix <- validate_and_convert_matrix(
+ mat = mit_ped_matrix,
+ name = "mit_ped_matrix", force_binary = TRUE,
+ ensure_symmetric = TRUE
+ )
+ }
+
+ # --- Build IDs and Prepare Matrix Pointers ---
+
+ # Extract individual IDs from the first available matrix.
+ ids <- NULL
+
+
+
+ if (!is.null(cn_ped_matrix)) {
+ ids <- as.numeric(dimnames(cn_ped_matrix)[[1]])
+ nc <- ncol(cn_ped_matrix)
+ } else if (!is.null(ad_ped_matrix)) {
+ ids <- as.numeric(dimnames(ad_ped_matrix)[[1]])
+ nc <- ncol(ad_ped_matrix)
+ } else if (!is.null(mit_ped_matrix)) {
+ ids <- as.numeric(dimnames(mit_ped_matrix)[[1]])
+ nc <- ncol(mit_ped_matrix)
+ }
+
+ if (is.null(ids)) {
+ stop("Could not extract IDs from the provided matrices.")
+ }
+
+ # --- matrix_case construction and switch dispatch ---
+ matrix_case <- paste(sort(c(
+ if (!is.null(ad_ped_matrix)) "ad" else NULL,
+ if (!is.null(mit_ped_matrix)) "mt" else NULL,
+ if (!is.null(cn_ped_matrix)) "cn" else NULL
+ )), collapse = "-")
+
+ if (verbose) {
+ print(matrix_case)
+ }
+
+ switch(matrix_case,
+ "ad" = process_one(
+ matrix = ad_ped_matrix,
+ rel_name = "addRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "mt" = process_one(
+ matrix = mit_ped_matrix,
+ rel_name = "mitRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "cn" = process_one(
+ matrix = cn_ped_matrix,
+ rel_name = "cnuRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "ad-mt" = process_two(
+ matrix1 = ad_ped_matrix,
+ name1 = "addRel",
+ matrix2 = mit_ped_matrix,
+ name2 = "mitRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "ad-cn" = process_two(
+ matrix1 = ad_ped_matrix,
+ name1 = "addRel",
+ matrix2 = cn_ped_matrix,
+ name2 = "cnuRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "cn-mt" = process_two(
+ matrix1 = cn_ped_matrix,
+ name1 = "cnuRel",
+ matrix2 = mit_ped_matrix,
+ name2 = "mitRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ "ad-cn-mt" = process_all_three(
+ mat1 = ad_ped_matrix,
+ name1 = "addRel",
+ mat2 = mit_ped_matrix,
+ name2 = "mitRel",
+ mat3 = cn_ped_matrix,
+ name3 = "cnuRel",
+ ids = ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc,
+ ...
+ ),
+ stop("Unsupported matrix combination")
+ )
+}
+#' Convert Sparse Relationship Matrices to Kinship Links
+#' @inheritParams com2links
#' @keywords internal
+
com2links.legacy <- function(
rel_pairs_file = "dataRelatedPairs.csv",
ad_ped_matrix = NULL,
@@ -522,7 +722,7 @@ com2links.legacy <- function(
#' Convert Pedigree Matrices to Related Pairs File (Legacy)
#' @description
#' This legacy function converts pedigree matrices into a related pairs file.
-#' @inheritParams com2links.legacy
+#' @inheritParams com2links
#' @keywords internal
@@ -648,207 +848,6 @@ com2links.og <- function(
return(NULL)
}
-#' Convert Sparse Relationship Matrices to Kinship Links
-#' @inheritParams com2links.legacy
-#' @inherit com2links.legacy description
-#' @inherit com2links.legacy details
-#' @export
-
-com2links <- function(
- rel_pairs_file = "dataRelatedPairs.csv",
- ad_ped_matrix = NULL,
- mit_ped_matrix = mt_ped_matrix,
- mt_ped_matrix = NULL,
- cn_ped_matrix = NULL,
- # pat_ped_matrix = NULL,
- # mat_ped_matrix = NULL,
- # mapa_id_file = "data_mapaID.csv",
- write_buffer_size = 1000,
- update_rate = 1000,
- gc = TRUE,
- writetodisk = TRUE,
- verbose = FALSE,
- legacy = FALSE,
- outcome_name = "data",
- drop_upper_triangular = TRUE,
- ...) {
- # --- Input Validations and Preprocessing ---
-
- # Ensure that at least one relationship matrix is provided.
- if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) {
- stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.")
- }
- # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided.
- if (!is.null(ad_ped_matrix)) {
- ad_ped_matrix <- validate_and_convert_matrix(
- mat = ad_ped_matrix,
- name = "ad_ped_matrix"
- )
- }
-
- # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided.
- if (!is.null(cn_ped_matrix)) {
- cn_ped_matrix <- validate_and_convert_matrix(
- mat = cn_ped_matrix,
- name = "cn_ped_matrix",
- ensure_symmetric = TRUE
- )
- }
-
- # Validate and process mit_ped_matrix: convert and ensure binary values.
- if (!is.null(mit_ped_matrix)) {
- mit_ped_matrix <- validate_and_convert_matrix(
- mat = mit_ped_matrix,
- name = "mit_ped_matrix", force_binary = TRUE,
- ensure_symmetric = TRUE
- )
- }
-
- # --- Build IDs and Prepare Matrix Pointers ---
-
- # Extract individual IDs from the first available matrix.
- ids <- NULL
-
-
-
- if (!is.null(cn_ped_matrix)) {
- ids <- as.numeric(dimnames(cn_ped_matrix)[[1]])
- nc <- ncol(cn_ped_matrix)
- } else if (!is.null(ad_ped_matrix)) {
- ids <- as.numeric(dimnames(ad_ped_matrix)[[1]])
- nc <- ncol(ad_ped_matrix)
- } else if (!is.null(mit_ped_matrix)) {
- ids <- as.numeric(dimnames(mit_ped_matrix)[[1]])
- nc <- ncol(mit_ped_matrix)
- }
-
- if (is.null(ids)) {
- stop("Could not extract IDs from the provided matrices.")
- }
-
- # --- matrix_case construction and switch dispatch ---
- matrix_case <- paste(sort(c(
- if (!is.null(ad_ped_matrix)) "ad" else NULL,
- if (!is.null(mit_ped_matrix)) "mt" else NULL,
- if (!is.null(cn_ped_matrix)) "cn" else NULL
- )), collapse = "-")
-
- if (verbose) {
- print(matrix_case)
- }
-
- switch(matrix_case,
- "ad" = process_one(
- matrix = ad_ped_matrix,
- rel_name = "addRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- "mt" = process_one(
- matrix = mit_ped_matrix,
- rel_name = "mitRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- "cn" = process_one(
- matrix = cn_ped_matrix,
- rel_name = "cnuRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- "ad-mt" = process_two(
- matrix1 = ad_ped_matrix,
- name1 = "addRel",
- matrix2 = mit_ped_matrix,
- name2 = "mitRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- "ad-cn" = process_two(
- matrix1 = ad_ped_matrix,
- name1 = "addRel",
- matrix2 = cn_ped_matrix,
- name2 = "cnuRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- "cn-mt" = process_two(
- matrix1 = cn_ped_matrix,
- name1 = "cnuRel",
- matrix2 = mit_ped_matrix,
- name2 = "mitRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- "ad-cn-mt" = process_all_three(
- mat1 = ad_ped_matrix,
- name1 = "addRel",
- mat2 = mit_ped_matrix,
- name2 = "mitRel",
- mat3 = cn_ped_matrix,
- name3 = "cnuRel",
- ids = ids,
- nc = nc,
- rel_pairs_file = rel_pairs_file,
- writetodisk = writetodisk,
- write_buffer_size = write_buffer_size,
- drop_upper_triangular = drop_upper_triangular,
- update_rate = update_rate,
- verbose = verbose,
- gc = gc,
- ...
- ),
- stop("Unsupported matrix combination")
- )
-}
process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, write_buffer_size, drop_upper_triangular, update_rate, verbose, gc, ...) {
# Extract pointers and indices from the matrix.
diff --git a/man/com2links.Rd b/man/com2links.Rd
index e9c7ac2b..9dae2f09 100644
--- a/man/com2links.Rd
+++ b/man/com2links.Rd
@@ -50,6 +50,9 @@ com2links(
\item{...}{Additional arguments to be passed to \code{\link{com2links}}}
}
+\value{
+A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk.
+}
\description{
This function processes one or more sparse relationship components (additive, mitochondrial,
and common nuclear) and converts them into kinship link pairs. The resulting related pairs are
diff --git a/man/com2links.legacy.Rd b/man/com2links.legacy.Rd
index a523a106..bf4bc6e4 100644
--- a/man/com2links.legacy.Rd
+++ b/man/com2links.legacy.Rd
@@ -50,9 +50,6 @@ com2links.legacy(
\item{...}{Additional arguments to be passed to \code{\link{com2links}}}
}
-\value{
-A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk.
-}
\description{
This function processes one or more sparse relationship components (additive, mitochondrial,
and common nuclear) and converts them into kinship link pairs. The resulting related pairs are
From 0850662838bae94e354bd4898d2ae71c8682770e Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Thu, 10 Apr 2025 10:26:25 -0400
Subject: [PATCH 12/35] Update test-makeLinks.R
---
tests/testthat/test-makeLinks.R | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R
index a2666f2e..51bdcb49 100644
--- a/tests/testthat/test-makeLinks.R
+++ b/tests/testthat/test-makeLinks.R
@@ -25,6 +25,18 @@ test_that("com2links produces correct output with a single relationship matrix (
expect_true(all(result$addRel >= 0)) # Relatedness values should be non-negative
})
+test_that("com2links produces correct output with cn_ped_matrix", {
+ data(ASOIAF)
+ cn_ped_matrix <- ped2mit(ASOIAF, sparse = TRUE)
+
+ result <- com2links(cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE)
+
+ expect_true(is.data.frame(result))
+ expect_true(all(c("ID1", "ID2", "cnRel") %in% colnames(result)))
+ expect_equal(ncol(result), 3) # Expect ID1, ID2, and addRel
+ expect_true(all(result$cnRel >= 0)) # Relatedness values should be non-negative
+})
+
test_that("com2links produces correct output with mt_ped_matrix", {
data(hazard)
mit_ped_matrix <- ped2mit(hazard, sparse = TRUE)
@@ -34,7 +46,7 @@ test_that("com2links produces correct output with mt_ped_matrix", {
expect_true(is.data.frame(result))
expect_true(all(c("ID1", "ID2", "mitRel") %in% colnames(result)))
expect_equal(ncol(result), 3) # Expect ID1, ID2, and addRel
- expect_true(all(result$addRel >= 0)) # Relatedness values should be non-negative
+ expect_true(all(result$mitRel %in% c(0, 1))) # Mitochondrial should be binary
})
test_that("com2links processes multiple matrices correctly (hazard dataset)", {
From c5eabef3ccce44d39580740a9a6799ea8ceced08 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Thu, 10 Apr 2025 10:58:00 -0400
Subject: [PATCH 13/35] splitting links into legacy
---
R/makeLinks.R | 635 +-------------------------------
R/makeLinkslegacy.R | 631 +++++++++++++++++++++++++++++++
tests/testthat/test-makeLinks.R | 55 ++-
3 files changed, 684 insertions(+), 637 deletions(-)
create mode 100644 R/makeLinkslegacy.R
diff --git a/R/makeLinks.R b/R/makeLinks.R
index f0bee11e..0156717c 100644
--- a/R/makeLinks.R
+++ b/R/makeLinks.R
@@ -222,632 +222,6 @@ com2links <- function(
#' @keywords internal
-com2links.legacy <- function(
- rel_pairs_file = "dataRelatedPairs.csv",
- ad_ped_matrix = NULL,
- mit_ped_matrix = mt_ped_matrix,
- mt_ped_matrix = NULL,
- cn_ped_matrix = NULL,
- # pat_ped_matrix = NULL,
- # mat_ped_matrix = NULL,
- # mapa_id_file = "data_mapaID.csv",
- write_buffer_size = 1000,
- update_rate = 1000,
- gc = TRUE,
- writetodisk = TRUE,
- verbose = FALSE,
- legacy = FALSE,
- outcome_name = "data",
- drop_upper_triangular = TRUE,
- ...) {
- # Non-legacy mode processing
-
- if (!legacy) {
- # --- Input Validations and Preprocessing ---
-
- # Ensure that at least one relationship matrix is provided.
- if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) {
- stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.")
- }
- # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided.
- if (!is.null(ad_ped_matrix)) {
- ad_ped_matrix <- validate_and_convert_matrix(
- mat = ad_ped_matrix,
- name = "ad_ped_matrix"
- )
- }
-
- # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided.
- if (!is.null(cn_ped_matrix)) {
- cn_ped_matrix <- validate_and_convert_matrix(
- mat = cn_ped_matrix,
- name = "cn_ped_matrix",
- ensure_symmetric = TRUE
- )
- }
-
- # Validate and process mit_ped_matrix: convert and ensure binary values.
- if (!is.null(mit_ped_matrix)) {
- mit_ped_matrix <- validate_and_convert_matrix(
- mat = mit_ped_matrix,
- name = "mit_ped_matrix", force_binary = TRUE,
- ensure_symmetric = TRUE
- )
- }
-
- # --- Build IDs and Prepare Matrix Pointers ---
-
- # Extract individual IDs from the first available matrix.
- ids <- NULL
-
-
-
- if (!is.null(cn_ped_matrix)) {
- ids <- as.numeric(dimnames(cn_ped_matrix)[[1]])
- nc <- ncol(cn_ped_matrix)
- } else if (!is.null(ad_ped_matrix)) {
- ids <- as.numeric(dimnames(ad_ped_matrix)[[1]])
- nc <- ncol(ad_ped_matrix)
- } else if (!is.null(mit_ped_matrix)) {
- ids <- as.numeric(dimnames(mit_ped_matrix)[[1]])
- nc <- ncol(mit_ped_matrix)
- }
-
- if (is.null(ids)) {
- stop("Could not extract IDs from the provided matrices.")
- }
-
-
- # Construct case identifier
- matrix_case <- paste(sort(c(
- if (!is.null(ad_ped_matrix)) "ad" else NULL,
- if (!is.null(mit_ped_matrix)) "mt" else NULL,
- if (!is.null(cn_ped_matrix)) "cn" else NULL
- )), collapse = "-")
-
-
- # Count how many matrices are provided.
- sum_nulls <- sum(!is.null(ad_ped_matrix),
- !is.null(mit_ped_matrix),
- !is.null(cn_ped_matrix),
- na.rm = TRUE
- )
- if (verbose) {
- print(matrix_case)
- }
-
- # Extract the internal pointers (p, i, and x slots) for each provided matrix.
- if (!is.null(ad_ped_matrix)) {
- ad_ped_p <- ad_ped_matrix@p + 1L
- ad_ped_i <- ad_ped_matrix@i + 1L
- ad_ped_x <- ad_ped_matrix@x
- }
- if (!is.null(mit_ped_matrix)) {
- mt_p <- mit_ped_matrix@p + 1L
- mt_i <- mit_ped_matrix@i + 1L
- mt_x <- mit_ped_matrix@x
- }
- if (!is.null(cn_ped_matrix)) {
- cn_p <- cn_ped_matrix@p + 1L
- cn_i <- cn_ped_matrix@i + 1L
- cn_x <- cn_ped_matrix@x
- }
-
- # --- Process Based on the Number of Provided Matrices ---
- # --- Case: All Three Matrices Provided ---
- if (sum_nulls == 3) {
- # Set pointers for all three matrices.
- newColPos1 <- ad_ped_p
- iss1 <- ad_ped_i
- x1 <- ad_ped_x
-
- newColPos2 <- mt_p
- iss2 <- mt_i
- x2 <- mt_x
-
- newColPos3 <- cn_p
- iss3 <- cn_i
- x3 <- cn_x
-
- # Define relationship column names.
- relNames <- c("addRel", "mitRel", "cnuRel")
-
- # Optionally remove the original pointers to free memory.
- if (gc == TRUE) {
- remove(ad_ped_p, ad_ped_i, ad_ped_x, mt_p, mt_i, mt_x, cn_p, cn_i, cn_x)
- }
- if (verbose) {
- message("All 3 matrix is present")
- }
-
- # File names
- # rel_pairs_file <- paste0(outcome_name, "_dataRelatedPairs.csv")
- # mapa_id_file <- paste0(outcome_name, "_data_mapaID.csv")
-
- # Initialize the related pairs file with headers.
- df_relpairs <- initialize_empty_df(relNames = relNames)
-
- # Write the headers to the related pairs file.
- if (writetodisk == TRUE) {
- utils::write.table(
- df_relpairs,
- file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE
- )
-
- # Prepare an empty buffer for batching writes.
- write_buffer <- list()
- remove(df_relpairs)
- }
-
- # Loop over each column (individual) in the matrix.
- for (j in 1L:nc) {
- ID2 <- ids[j]
-
- # Extract column indices for the 1st component
- ncp1 <- newColPos1[j]
- ncp1p <- newColPos1[j + 1L]
- cond1 <- ncp1 < ncp1p
- if (cond1) {
- vv1 <- ncp1:(ncp1p - 1L)
- iss1vv <- iss1[vv1]
- }
- # Extract indices for the 2nd component
- ncp2 <- newColPos2[j]
- ncp2p <- newColPos2[j + 1L]
- cond2 <- ncp2 < ncp2p
- if (cond2) {
- vv2 <- ncp2:(ncp2p - 1L)
- iss2vv <- iss2[vv2]
- }
-
- # Extract indices for the 3rd component
- ncp3 <- newColPos3[j]
- ncp3p <- newColPos3[j + 1L]
- cond3 <- ncp3 < ncp3p
- if (cond3) {
- vv3 <- ncp3:(ncp3p - 1L)
- iss3vv <- iss3[vv3]
- }
-
- # Create a unique, sorted set of row indices from all provided matrices.
- u <- sort(igraph::union(igraph::union(if (cond1) {
- iss1vv
- }, if (cond2) {
- iss2vv
- }), if (cond3) {
- iss3vv
- }))
-
- # If any relationships exist for this individual, build the related pairs.
- if (cond1 || cond2 || cond3) {
- ID1 <- ids[u]
- tds <- data.frame(ID1 = ID1, ID2 = ID2)
- tds[[relNames[1]]] <- 0
- tds[[relNames[2]]] <- 0
- tds[[relNames[3]]] <- 0
-
- # Assign the relationship coefficients from each matrix.
- if (cond1) {
- tds[u %in% iss1vv, relNames[1]] <- x1[vv1]
- }
- if (cond2) {
- tds[u %in% iss2vv, relNames[2]] <- x2[vv2]
- }
- if (cond3) {
- tds[u %in% iss3vv, relNames[3]] <- x3[vv3]
- }
-
- # Optionally drop upper-triangular entries.
- if (drop_upper_triangular == TRUE) {
- tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle
- }
-
- # Write the batch to disk or accumulate in the data frame.
- if (nrow(tds) > 0) {
- if (writetodisk == TRUE) {
- write_buffer[[length(write_buffer) + 1]] <- tds
-
- if (length(write_buffer) >= write_buffer_size) { # Write in batches
- utils::write.table(do.call(rbind, write_buffer),
- file = rel_pairs_file,
- row.names = FALSE, col.names = FALSE, append = TRUE, sep = ","
- )
- write_buffer <- list()
- }
- } else {
- df_relpairs <- rbind(df_relpairs, tds)
- }
- }
- }
- if (verbose && (j %% update_rate == 0L)) {
- cat("Done with", j, "of", nc, "\n")
- }
- }
- } else if (sum_nulls == 2) {
- # --- Case: Two Matrices Provided ---
- # Set pointers and relationship names based on which matrix is missing.
-
- if (is.null(ad_ped_matrix)) {
- newColPos1 <- mt_p
- iss1 <- mt_i
- x1 <- mt_x
- newColPos2 <- cn_p
- iss2 <- cn_i
- x2 <- cn_x
- relNames <- c("mitRel", "cnuRel")
- if (gc == TRUE) {
- remove(mt_p, mt_i, mt_x, cn_p, cn_i, cn_x)
- }
- }
- if (is.null(mit_ped_matrix)) {
- newColPos1 <- ad_ped_p
- iss1 <- ad_ped_i
- x1 <- ad_ped_x
- newColPos2 <- cn_p
- iss2 <- cn_i
- x2 <- cn_x
- relNames <- c("addRel", "cnuRel")
- if (gc == TRUE) {
- remove(ad_ped_p, ad_ped_i, ad_ped_x, cn_p, cn_i, cn_x)
- }
- }
- if (is.null(cn_ped_matrix)) {
- newColPos1 <- ad_ped_p
- iss1 <- ad_ped_i
- x1 <- ad_ped_x
- newColPos2 <- mt_p
- iss2 <- mt_i
- x2 <- mt_x
- relNames <- c("addRel", "mitRel")
- if (gc == TRUE) {
- remove(ad_ped_p, ad_ped_i, ad_ped_x, mt_p, mt_i, mt_x)
- }
- }
-
- # Initialize the related pairs file with the appropriate headers.
- df_relpairs <- initialize_empty_df(relNames = relNames)
-
- if (writetodisk == TRUE) {
- utils::write.table(
- df_relpairs,
- file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE
- )
- # initial buffer
- write_buffer <- list()
- remove(df_relpairs)
- }
-
- # Process each column to extract relationships.
- for (j in 1L:nc) {
- ID2 <- ids[j]
-
- # Extract indices from the first matrix.
- ncp1 <- newColPos1[j]
- ncp1p <- newColPos1[j + 1L]
- cond1 <- ncp1 < ncp1p
- if (cond1) {
- vv1 <- ncp1:(ncp1p - 1L)
- iss1vv <- iss1[vv1]
- }
- # Extract indices from the second matrix.
- ncp2 <- newColPos2[j]
- ncp2p <- newColPos2[j + 1L]
- cond2 <- ncp2 < ncp2p
- if (cond2) {
- vv2 <- ncp2:(ncp2p - 1L)
- iss2vv <- iss2[vv2]
- }
-
- # Merge the indices from both matrices.
- u <- sort(igraph::union(if (cond1) {
- iss1vv
- }, if (cond2) {
- iss2vv
- }))
-
- # Create related pairs if relationships are found.
- if (cond1 || cond2) {
- ID1 <- ids[u]
- tds <- data.frame(ID1 = ID1, ID2 = ID2)
- tds[[relNames[1]]] <- 0
- tds[[relNames[2]]] <- 0
-
- if (cond1) {
- tds[u %in% iss1vv, relNames[1]] <- x1[vv1]
- }
- if (cond2) {
- tds[u %in% iss2vv, relNames[2]] <- x2[vv2]
- }
- if (drop_upper_triangular == TRUE) {
- tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle
- }
-
- # Write the batch to disk or accumulate in the data frame.
- if (nrow(tds) > 0) {
- if (writetodisk == TRUE) {
- write_buffer[[length(write_buffer) + 1]] <- tds
-
- if (length(write_buffer) >= write_buffer_size) { # Write in batches
- utils::write.table(do.call(rbind, write_buffer),
- file = rel_pairs_file,
- row.names = FALSE, col.names = FALSE, append = TRUE, sep = ","
- )
- write_buffer <- list()
- }
- } else {
- df_relpairs <- rbind(df_relpairs, tds)
- }
- }
- }
- if (verbose && (j %% update_rate == 0L)) {
- cat("Done with", j, "of", nc, "\n")
- }
- }
- } else if (sum_nulls == 1) {
- # --- Case: Only One Matrix Provided ---
- if (verbose) {
- message("Only one matrix is present")
- }
- if (!is.null(ad_ped_matrix)) {
- newColPos1 <- ad_ped_p
- iss1 <- ad_ped_i
- x1 <- ad_ped_x
- relNames <- c("addRel")
- if (gc == TRUE) {
- remove(ad_ped_p, ad_ped_i, ad_ped_x)
- }
- }
- if (!is.null(mit_ped_matrix)) {
- newColPos1 <- mt_p
- iss1 <- mt_i
- x1 <- mt_x
- relNames <- c("mitRel")
- if (gc == TRUE) {
- remove(mt_p, mt_i, mt_x)
- }
- }
- if (!is.null(cn_ped_matrix)) {
- newColPos1 <- cn_p
- iss1 <- cn_i
- x1 <- cn_x
- relNames <- c("cnuRel")
- if (gc == TRUE) {
- remove(cn_p, cn_i, cn_x)
- }
- }
-
- # Initialize the related pairs file.
- df_relpairs <- initialize_empty_df(relNames = relNames)
-
- if (writetodisk == TRUE) {
- utils::write.table(
- df_relpairs,
- file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE
- )
-
- # initial buffer
- write_buffer <- list()
-
- remove(df_relpairs)
- }
-
- # Process each column.
- for (j in 1L:nc) {
- ID2 <- ids[j]
- # Extract column indices
- ncp1 <- newColPos1[j]
- ncp1p <- newColPos1[j + 1L]
- cond1 <- ncp1 < ncp1p
- if (cond1) {
- vv1 <- ncp1:(ncp1p - 1L)
- iss1vv <- iss1[vv1]
- }
-
- # Use the indices from the single matrix.
- u <- sort(iss1vv)
-
- if (cond1) {
- ID1 <- ids[u]
- tds <- data.frame(ID1 = ID1, ID2 = ID2)
- tds[[relNames[1]]] <- 0
-
- if (cond1) {
- tds[u %in% iss1vv, relNames[1]] <- x1[vv1]
- }
- if (drop_upper_triangular == TRUE) {
- tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle
- }
-
- # Write the batch to disk or accumulate in the data frame.
- if (nrow(tds) > 0) {
- if (writetodisk == TRUE) {
- write_buffer[[length(write_buffer) + 1]] <- tds
-
- if (length(write_buffer) >= write_buffer_size) { # Write in batches
- utils::write.table(do.call(rbind, write_buffer),
- file = rel_pairs_file,
- row.names = FALSE, col.names = FALSE, append = TRUE, sep = ","
- )
- write_buffer <- list()
- }
- } else {
- df_relpairs <- rbind(df_relpairs, tds)
- }
- }
- }
- if (verbose && (j %% update_rate == 0L)) {
- cat("Done with", j, "of", nc, "\n")
- }
- }
- } else {
- stop("No matrices provided")
- }
-
- # If not writing to disk, return the accumulated data frame.
- if (writetodisk == FALSE) {
- return(df_relpairs)
- } else {
- # Write any remaining buffered rows.
- if (length(write_buffer) > 0) {
- utils::write.table(do.call(rbind, write_buffer),
- file = rel_pairs_file,
- row.names = FALSE, col.names = FALSE, append = TRUE, sep = ","
- )
- }
- # return(NULL)
- }
- } else if (legacy) {
- # --- Legacy Mode ---
- # In legacy mode, convert matrices to the expected symmetric formats.
- com2links.og(
- rel_pairs_file = rel_pairs_file,
- ad_ped_matrix = ad_ped_matrix,
- mit_ped_matrix = mit_ped_matrix,
- cn_ped_matrix = cn_ped_matrix,
- update_rate = update_rate,
- verbose = verbose,
- outcome_name = outcome_name
- )
- return(NULL)
- }
-
- # --- End of Legacy Mode ---
-
- # Merge and write the parentage matrices
- # df <- full_join(mat_ped_matrix %>% arrange(ID), pat_ped_matrix %>% arrange(ID))
-
- # write.table(df, file = mapa_id_file, sep = ",", append = FALSE, row.names = FALSE)
-}
-
-#' Convert Pedigree Matrices to Related Pairs File (Legacy)
-#' @description
-#' This legacy function converts pedigree matrices into a related pairs file.
-#' @inheritParams com2links
-#' @keywords internal
-
-
-com2links.og <- function(
- rel_pairs_file = "dataRelatedPairs.csv",
- ad_ped_matrix = NULL,
- mit_ped_matrix = mt_ped_matrix,
- mt_ped_matrix = NULL,
- cn_ped_matrix = NULL,
- update_rate = 500,
- verbose = FALSE,
- outcome_name = "data",
- ...) {
- # --- Legacy Mode ---
- if (verbose) {
- message("Using legacy mode")
- }
- # In legacy mode, convert matrices to the expected symmetric formats.
-
- # load(paste0(outcome_name,'_dataBiggestCnPedigree.Rdata'))
- # biggestCnPed <- methods::as(biggestCnPed, "symmetricMatrix")
- # load(paste0(outcome_name,'_dataBiggestPedigree.Rdata'))
- # load(paste0(outcome_name,'_dataBiggestMtPedigree.Rdata'))
-
- # rel_pairs_file <- paste0(outcome_name,'_datacnmitBiggestRelatedPairsTake3.csv')
-
- biggestMtPed <- mit_ped_matrix
- remove(mit_ped_matrix)
- biggestCnPed <- methods::as(cn_ped_matrix, "symmetricMatrix")
- remove(cn_ped_matrix)
- biggestPed <- ad_ped_matrix
- remove(ad_ped_matrix)
- biggestMtPed@x[biggestMtPed@x > 0] <- 1
-
- # Set the output file name.
- if (exists("rel_pairs_file")) {
- fname <- rel_pairs_file
- } else {
- fname <- paste0(outcome_name, "_dataBiggestRelatedPairsTake2.csv")
- }
- # Initialize the output file with headers.
- ds <- data.frame(
- ID1 = numeric(0), ID2 = numeric(0),
- addRel = numeric(0),
- mitRel = numeric(0), cnuRel = numeric(0)
- )
-
- utils::write.table(ds,
- file = fname, sep = ",",
- append = FALSE, row.names = FALSE
- )
-
- # Extract IDs from the common nuclear matrix.
- ids <- as.numeric(dimnames(biggestCnPed)[[1]])
-
- # Extract pointers from the legacy matrices.
- newColPos1 <- biggestPed@p + 1L
- iss1 <- biggestPed@i + 1L
- newColPos2 <- biggestMtPed@p + 1L
- iss2 <- biggestMtPed@i + 1L
- newColPos3 <- biggestCnPed@p + 1L
- iss3 <- biggestCnPed@i + 1L
- nc <- ncol(biggestPed)
-
- # Process each individual.
- for (j in 1L:nc) {
- ID2 <- ids[j]
- ncp1 <- newColPos1[j]
- ncp1p <- newColPos1[j + 1L]
- cond1 <- ncp1 < ncp1p
- if (cond1) {
- vv1 <- ncp1:(ncp1p - 1L)
- iss1vv <- iss1[vv1]
- }
- ncp2 <- newColPos2[j]
- ncp2p <- newColPos2[j + 1L]
- cond2 <- ncp2 < ncp2p
- if (cond2) {
- vv2 <- ncp2:(ncp2p - 1L)
- iss2vv <- iss2[vv2]
- }
- ncp3 <- newColPos3[j]
- ncp3p <- newColPos3[j + 1L]
- cond3 <- ncp3 < ncp3p
- if (cond3) {
- vv3 <- ncp3:(ncp3p - 1L)
- iss3vv <- iss3[vv3]
- }
-
- # Merge indices from all three matrices.
- u <- sort(igraph::union(igraph::union(if (cond1) {
- iss1vv
- }, if (cond2) {
- iss2vv
- }), if (cond3) {
- iss3vv
- }))
- # browser()
- if (cond1 || cond2 || cond3) {
- ID1 <- ids[u]
- tds <- data.frame(
- ID1 = ID1, ID2 = ID2,
- addRel = 0, mitRel = 0, cnuRel = 0
- )
- if (cond1) {
- tds$addRel[u %in% iss1vv] <- biggestPed@x[vv1]
- }
- if (cond2) {
- tds$mitRel[u %in% iss2vv] <- biggestMtPed@x[vv2]
- }
- if (cond3) {
- tds$cnuRel[u %in% iss3vv] <- biggestCnPed@x[vv3]
- }
- utils::write.table(tds,
- file = fname, row.names = FALSE,
- col.names = FALSE, append = TRUE, sep = ","
- )
- }
- if (!(j %% update_rate)) {
- cat(paste0("Done with ", j, " of ", nc, "\n"))
- }
- }
- return(NULL)
-}
-
process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, write_buffer_size, drop_upper_triangular, update_rate, verbose, gc, ...) {
# Extract pointers and indices from the matrix.
@@ -1149,11 +523,12 @@ process_two <- function(
#'
#' @return The validated and converted matrix.
validate_and_convert_matrix <- function(mat, name, ensure_symmetric = FALSE, force_binary = FALSE) {
- if (!inherits(mat, c("matrix", "dgCMatrix", "dsCMatrix"))) {
- stop(paste0("The '", name, "' must be a matrix or dgCMatrix."))
+ if (!inherits(mat, c("matrix", "dgCMatrix", "dsCMatrix","generalMatrix",
+ "symmetricMatrix", "triangularMatrix", "dsyMatrix", "dspMatrix", "dsyMatrix"))) {
+ stop(paste0("The '", name, "' must be a matrix or generalMatrix"))
}
- if (!inherits(mat, "dgCMatrix")) {
- mat <- methods::as(mat, if (ensure_symmetric) "symmetricMatrix" else "dgCMatrix")
+ if (!inherits(mat, "generalMatrix")) {
+ mat <- methods::as(mat, if (ensure_symmetric) "symmetricMatrix" else "generalMatrix")
}
if (force_binary) {
mat@x[mat@x > 0] <- 1
diff --git a/R/makeLinkslegacy.R b/R/makeLinkslegacy.R
new file mode 100644
index 00000000..27a6e45a
--- /dev/null
+++ b/R/makeLinkslegacy.R
@@ -0,0 +1,631 @@
+#' Convert Sparse Relationship Matrices to Kinship Links
+#' @inheritParams com2links
+#' @keywords internal
+
+
+com2links.legacy <- function(
+ rel_pairs_file = "dataRelatedPairs.csv",
+ ad_ped_matrix = NULL,
+ mit_ped_matrix = mt_ped_matrix,
+ mt_ped_matrix = NULL,
+ cn_ped_matrix = NULL,
+ # pat_ped_matrix = NULL,
+ # mat_ped_matrix = NULL,
+ # mapa_id_file = "data_mapaID.csv",
+ write_buffer_size = 1000,
+ update_rate = 1000,
+ gc = TRUE,
+ writetodisk = TRUE,
+ verbose = FALSE,
+ legacy = FALSE,
+ outcome_name = "data",
+ drop_upper_triangular = TRUE,
+ ...) {
+ # Non-legacy mode processing
+
+ if (!legacy) {
+ # --- Input Validations and Preprocessing ---
+
+ # Ensure that at least one relationship matrix is provided.
+ if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) {
+ stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.")
+ }
+ # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided.
+ if (!is.null(ad_ped_matrix)) {
+ ad_ped_matrix <- validate_and_convert_matrix(
+ mat = ad_ped_matrix,
+ name = "ad_ped_matrix"
+ )
+ }
+
+ # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided.
+ if (!is.null(cn_ped_matrix)) {
+ cn_ped_matrix <- validate_and_convert_matrix(
+ mat = cn_ped_matrix,
+ name = "cn_ped_matrix",
+ ensure_symmetric = TRUE
+ )
+ }
+
+ # Validate and process mit_ped_matrix: convert and ensure binary values.
+ if (!is.null(mit_ped_matrix)) {
+ mit_ped_matrix <- validate_and_convert_matrix(
+ mat = mit_ped_matrix,
+ name = "mit_ped_matrix", force_binary = TRUE,
+ ensure_symmetric = TRUE
+ )
+ }
+
+ # --- Build IDs and Prepare Matrix Pointers ---
+
+ # Extract individual IDs from the first available matrix.
+ ids <- NULL
+
+
+
+ if (!is.null(cn_ped_matrix)) {
+ ids <- as.numeric(dimnames(cn_ped_matrix)[[1]])
+ nc <- ncol(cn_ped_matrix)
+ } else if (!is.null(ad_ped_matrix)) {
+ ids <- as.numeric(dimnames(ad_ped_matrix)[[1]])
+ nc <- ncol(ad_ped_matrix)
+ } else if (!is.null(mit_ped_matrix)) {
+ ids <- as.numeric(dimnames(mit_ped_matrix)[[1]])
+ nc <- ncol(mit_ped_matrix)
+ }
+
+ if (is.null(ids)) {
+ stop("Could not extract IDs from the provided matrices.")
+ }
+
+
+ # Construct case identifier
+ matrix_case <- paste(sort(c(
+ if (!is.null(ad_ped_matrix)) "ad" else NULL,
+ if (!is.null(mit_ped_matrix)) "mt" else NULL,
+ if (!is.null(cn_ped_matrix)) "cn" else NULL
+ )), collapse = "-")
+
+
+ # Count how many matrices are provided.
+ sum_nulls <- sum(!is.null(ad_ped_matrix),
+ !is.null(mit_ped_matrix),
+ !is.null(cn_ped_matrix),
+ na.rm = TRUE
+ )
+ if (verbose) {
+ print(matrix_case)
+ }
+
+ # Extract the internal pointers (p, i, and x slots) for each provided matrix.
+ if (!is.null(ad_ped_matrix)) {
+ ad_ped_p <- ad_ped_matrix@p + 1L
+ ad_ped_i <- ad_ped_matrix@i + 1L
+ ad_ped_x <- ad_ped_matrix@x
+ }
+ if (!is.null(mit_ped_matrix)) {
+ mt_p <- mit_ped_matrix@p + 1L
+ mt_i <- mit_ped_matrix@i + 1L
+ mt_x <- mit_ped_matrix@x
+ }
+ if (!is.null(cn_ped_matrix)) {
+ cn_p <- cn_ped_matrix@p + 1L
+ cn_i <- cn_ped_matrix@i + 1L
+ cn_x <- cn_ped_matrix@x
+ }
+
+ # --- Process Based on the Number of Provided Matrices ---
+ # --- Case: All Three Matrices Provided ---
+ if (sum_nulls == 3) {
+ # Set pointers for all three matrices.
+ newColPos1 <- ad_ped_p
+ iss1 <- ad_ped_i
+ x1 <- ad_ped_x
+
+ newColPos2 <- mt_p
+ iss2 <- mt_i
+ x2 <- mt_x
+
+ newColPos3 <- cn_p
+ iss3 <- cn_i
+ x3 <- cn_x
+
+ # Define relationship column names.
+ relNames <- c("addRel", "mitRel", "cnuRel")
+
+ # Optionally remove the original pointers to free memory.
+ if (gc == TRUE) {
+ remove(ad_ped_p, ad_ped_i, ad_ped_x, mt_p, mt_i, mt_x, cn_p, cn_i, cn_x)
+ }
+ if (verbose) {
+ message("All 3 matrix is present")
+ }
+
+ # File names
+ # rel_pairs_file <- paste0(outcome_name, "_dataRelatedPairs.csv")
+ # mapa_id_file <- paste0(outcome_name, "_data_mapaID.csv")
+
+ # Initialize the related pairs file with headers.
+ df_relpairs <- initialize_empty_df(relNames = relNames)
+
+ # Write the headers to the related pairs file.
+ if (writetodisk == TRUE) {
+ utils::write.table(
+ df_relpairs,
+ file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE
+ )
+
+ # Prepare an empty buffer for batching writes.
+ write_buffer <- list()
+ remove(df_relpairs)
+ }
+
+ # Loop over each column (individual) in the matrix.
+ for (j in 1L:nc) {
+ ID2 <- ids[j]
+
+ # Extract column indices for the 1st component
+ ncp1 <- newColPos1[j]
+ ncp1p <- newColPos1[j + 1L]
+ cond1 <- ncp1 < ncp1p
+ if (cond1) {
+ vv1 <- ncp1:(ncp1p - 1L)
+ iss1vv <- iss1[vv1]
+ }
+ # Extract indices for the 2nd component
+ ncp2 <- newColPos2[j]
+ ncp2p <- newColPos2[j + 1L]
+ cond2 <- ncp2 < ncp2p
+ if (cond2) {
+ vv2 <- ncp2:(ncp2p - 1L)
+ iss2vv <- iss2[vv2]
+ }
+
+ # Extract indices for the 3rd component
+ ncp3 <- newColPos3[j]
+ ncp3p <- newColPos3[j + 1L]
+ cond3 <- ncp3 < ncp3p
+ if (cond3) {
+ vv3 <- ncp3:(ncp3p - 1L)
+ iss3vv <- iss3[vv3]
+ }
+
+ # Create a unique, sorted set of row indices from all provided matrices.
+ u <- sort(igraph::union(igraph::union(if (cond1) {
+ iss1vv
+ }, if (cond2) {
+ iss2vv
+ }), if (cond3) {
+ iss3vv
+ }))
+
+ # If any relationships exist for this individual, build the related pairs.
+ if (cond1 || cond2 || cond3) {
+ ID1 <- ids[u]
+ tds <- data.frame(ID1 = ID1, ID2 = ID2)
+ tds[[relNames[1]]] <- 0
+ tds[[relNames[2]]] <- 0
+ tds[[relNames[3]]] <- 0
+
+ # Assign the relationship coefficients from each matrix.
+ if (cond1) {
+ tds[u %in% iss1vv, relNames[1]] <- x1[vv1]
+ }
+ if (cond2) {
+ tds[u %in% iss2vv, relNames[2]] <- x2[vv2]
+ }
+ if (cond3) {
+ tds[u %in% iss3vv, relNames[3]] <- x3[vv3]
+ }
+
+ # Optionally drop upper-triangular entries.
+ if (drop_upper_triangular == TRUE) {
+ tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle
+ }
+
+ # Write the batch to disk or accumulate in the data frame.
+ if (nrow(tds) > 0) {
+ if (writetodisk == TRUE) {
+ write_buffer[[length(write_buffer) + 1]] <- tds
+
+ if (length(write_buffer) >= write_buffer_size) { # Write in batches
+ utils::write.table(do.call(rbind, write_buffer),
+ file = rel_pairs_file,
+ row.names = FALSE, col.names = FALSE, append = TRUE, sep = ","
+ )
+ write_buffer <- list()
+ }
+ } else {
+ df_relpairs <- rbind(df_relpairs, tds)
+ }
+ }
+ }
+ if (verbose && (j %% update_rate == 0L)) {
+ cat("Done with", j, "of", nc, "\n")
+ }
+ }
+ } else if (sum_nulls == 2) {
+ # --- Case: Two Matrices Provided ---
+ # Set pointers and relationship names based on which matrix is missing.
+
+ if (is.null(ad_ped_matrix)) {
+ newColPos1 <- mt_p
+ iss1 <- mt_i
+ x1 <- mt_x
+ newColPos2 <- cn_p
+ iss2 <- cn_i
+ x2 <- cn_x
+ relNames <- c("mitRel", "cnuRel")
+ if (gc == TRUE) {
+ remove(mt_p, mt_i, mt_x, cn_p, cn_i, cn_x)
+ }
+ }
+ if (is.null(mit_ped_matrix)) {
+ newColPos1 <- ad_ped_p
+ iss1 <- ad_ped_i
+ x1 <- ad_ped_x
+ newColPos2 <- cn_p
+ iss2 <- cn_i
+ x2 <- cn_x
+ relNames <- c("addRel", "cnuRel")
+ if (gc == TRUE) {
+ remove(ad_ped_p, ad_ped_i, ad_ped_x, cn_p, cn_i, cn_x)
+ }
+ }
+ if (is.null(cn_ped_matrix)) {
+ newColPos1 <- ad_ped_p
+ iss1 <- ad_ped_i
+ x1 <- ad_ped_x
+ newColPos2 <- mt_p
+ iss2 <- mt_i
+ x2 <- mt_x
+ relNames <- c("addRel", "mitRel")
+ if (gc == TRUE) {
+ remove(ad_ped_p, ad_ped_i, ad_ped_x, mt_p, mt_i, mt_x)
+ }
+ }
+
+ # Initialize the related pairs file with the appropriate headers.
+ df_relpairs <- initialize_empty_df(relNames = relNames)
+
+ if (writetodisk == TRUE) {
+ utils::write.table(
+ df_relpairs,
+ file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE
+ )
+ # initial buffer
+ write_buffer <- list()
+ remove(df_relpairs)
+ }
+
+ # Process each column to extract relationships.
+ for (j in 1L:nc) {
+ ID2 <- ids[j]
+
+ # Extract indices from the first matrix.
+ ncp1 <- newColPos1[j]
+ ncp1p <- newColPos1[j + 1L]
+ cond1 <- ncp1 < ncp1p
+ if (cond1) {
+ vv1 <- ncp1:(ncp1p - 1L)
+ iss1vv <- iss1[vv1]
+ }
+ # Extract indices from the second matrix.
+ ncp2 <- newColPos2[j]
+ ncp2p <- newColPos2[j + 1L]
+ cond2 <- ncp2 < ncp2p
+ if (cond2) {
+ vv2 <- ncp2:(ncp2p - 1L)
+ iss2vv <- iss2[vv2]
+ }
+
+ # Merge the indices from both matrices.
+ u <- sort(igraph::union(if (cond1) {
+ iss1vv
+ }, if (cond2) {
+ iss2vv
+ }))
+
+ # Create related pairs if relationships are found.
+ if (cond1 || cond2) {
+ ID1 <- ids[u]
+ tds <- data.frame(ID1 = ID1, ID2 = ID2)
+ tds[[relNames[1]]] <- 0
+ tds[[relNames[2]]] <- 0
+
+ if (cond1) {
+ tds[u %in% iss1vv, relNames[1]] <- x1[vv1]
+ }
+ if (cond2) {
+ tds[u %in% iss2vv, relNames[2]] <- x2[vv2]
+ }
+ if (drop_upper_triangular == TRUE) {
+ tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle
+ }
+
+ # Write the batch to disk or accumulate in the data frame.
+ if (nrow(tds) > 0) {
+ if (writetodisk == TRUE) {
+ write_buffer[[length(write_buffer) + 1]] <- tds
+
+ if (length(write_buffer) >= write_buffer_size) { # Write in batches
+ utils::write.table(do.call(rbind, write_buffer),
+ file = rel_pairs_file,
+ row.names = FALSE, col.names = FALSE, append = TRUE, sep = ","
+ )
+ write_buffer <- list()
+ }
+ } else {
+ df_relpairs <- rbind(df_relpairs, tds)
+ }
+ }
+ }
+ if (verbose && (j %% update_rate == 0L)) {
+ cat("Done with", j, "of", nc, "\n")
+ }
+ }
+ } else if (sum_nulls == 1) {
+ # --- Case: Only One Matrix Provided ---
+ if (verbose) {
+ message("Only one matrix is present")
+ }
+ if (!is.null(ad_ped_matrix)) {
+ newColPos1 <- ad_ped_p
+ iss1 <- ad_ped_i
+ x1 <- ad_ped_x
+ relNames <- c("addRel")
+ if (gc == TRUE) {
+ remove(ad_ped_p, ad_ped_i, ad_ped_x)
+ }
+ }
+ if (!is.null(mit_ped_matrix)) {
+ newColPos1 <- mt_p
+ iss1 <- mt_i
+ x1 <- mt_x
+ relNames <- c("mitRel")
+ if (gc == TRUE) {
+ remove(mt_p, mt_i, mt_x)
+ }
+ }
+ if (!is.null(cn_ped_matrix)) {
+ newColPos1 <- cn_p
+ iss1 <- cn_i
+ x1 <- cn_x
+ relNames <- c("cnuRel")
+ if (gc == TRUE) {
+ remove(cn_p, cn_i, cn_x)
+ }
+ }
+
+ # Initialize the related pairs file.
+ df_relpairs <- initialize_empty_df(relNames = relNames)
+
+ if (writetodisk == TRUE) {
+ utils::write.table(
+ df_relpairs,
+ file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE
+ )
+
+ # initial buffer
+ write_buffer <- list()
+
+ remove(df_relpairs)
+ }
+
+ # Process each column.
+ for (j in 1L:nc) {
+ ID2 <- ids[j]
+ # Extract column indices
+ ncp1 <- newColPos1[j]
+ ncp1p <- newColPos1[j + 1L]
+ cond1 <- ncp1 < ncp1p
+ if (cond1) {
+ vv1 <- ncp1:(ncp1p - 1L)
+ iss1vv <- iss1[vv1]
+ }
+
+ # Use the indices from the single matrix.
+ u <- sort(iss1vv)
+
+ if (cond1) {
+ ID1 <- ids[u]
+ tds <- data.frame(ID1 = ID1, ID2 = ID2)
+ tds[[relNames[1]]] <- 0
+
+ if (cond1) {
+ tds[u %in% iss1vv, relNames[1]] <- x1[vv1]
+ }
+ if (drop_upper_triangular == TRUE) {
+ tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle
+ }
+
+ # Write the batch to disk or accumulate in the data frame.
+ if (nrow(tds) > 0) {
+ if (writetodisk == TRUE) {
+ write_buffer[[length(write_buffer) + 1]] <- tds
+
+ if (length(write_buffer) >= write_buffer_size) { # Write in batches
+ utils::write.table(do.call(rbind, write_buffer),
+ file = rel_pairs_file,
+ row.names = FALSE, col.names = FALSE, append = TRUE, sep = ","
+ )
+ write_buffer <- list()
+ }
+ } else {
+ df_relpairs <- rbind(df_relpairs, tds)
+ }
+ }
+ }
+ if (verbose && (j %% update_rate == 0L)) {
+ cat("Done with", j, "of", nc, "\n")
+ }
+ }
+ } else {
+ stop("No matrices provided")
+ }
+
+ # If not writing to disk, return the accumulated data frame.
+ if (writetodisk == FALSE) {
+ return(df_relpairs)
+ } else {
+ # Write any remaining buffered rows.
+ if (length(write_buffer) > 0) {
+ utils::write.table(do.call(rbind, write_buffer),
+ file = rel_pairs_file,
+ row.names = FALSE, col.names = FALSE, append = TRUE, sep = ","
+ )
+ }
+ # return(NULL)
+ }
+ } else if (legacy) {
+ # --- Legacy Mode ---
+ # In legacy mode, convert matrices to the expected symmetric formats.
+ com2links.og(
+ rel_pairs_file = rel_pairs_file,
+ ad_ped_matrix = ad_ped_matrix,
+ mit_ped_matrix = mit_ped_matrix,
+ cn_ped_matrix = cn_ped_matrix,
+ update_rate = update_rate,
+ verbose = verbose,
+ outcome_name = outcome_name
+ )
+ return(NULL)
+ }
+
+ # --- End of Legacy Mode ---
+
+ # Merge and write the parentage matrices
+ # df <- full_join(mat_ped_matrix %>% arrange(ID), pat_ped_matrix %>% arrange(ID))
+
+ # write.table(df, file = mapa_id_file, sep = ",", append = FALSE, row.names = FALSE)
+}
+
+#' Convert Pedigree Matrices to Related Pairs File (Legacy)
+#' @description
+#' This legacy function converts pedigree matrices into a related pairs file.
+#' @inheritParams com2links
+#' @keywords internal
+
+
+com2links.og <- function(
+ rel_pairs_file = "dataRelatedPairs.csv",
+ ad_ped_matrix = NULL,
+ mit_ped_matrix = mt_ped_matrix,
+ mt_ped_matrix = NULL,
+ cn_ped_matrix = NULL,
+ update_rate = 500,
+ verbose = FALSE,
+ outcome_name = "data",
+ ...) {
+ # --- Legacy Mode ---
+ if (verbose) {
+ message("Using legacy mode")
+ }
+ # In legacy mode, convert matrices to the expected symmetric formats.
+
+ # load(paste0(outcome_name,'_dataBiggestCnPedigree.Rdata'))
+ # biggestCnPed <- methods::as(biggestCnPed, "symmetricMatrix")
+ # load(paste0(outcome_name,'_dataBiggestPedigree.Rdata'))
+ # load(paste0(outcome_name,'_dataBiggestMtPedigree.Rdata'))
+
+ # rel_pairs_file <- paste0(outcome_name,'_datacnmitBiggestRelatedPairsTake3.csv')
+
+ biggestMtPed <- mit_ped_matrix
+ remove(mit_ped_matrix)
+ biggestCnPed <- methods::as(cn_ped_matrix, "symmetricMatrix")
+ remove(cn_ped_matrix)
+ biggestPed <- ad_ped_matrix
+ remove(ad_ped_matrix)
+ biggestMtPed@x[biggestMtPed@x > 0] <- 1
+
+ # Set the output file name.
+ if (exists("rel_pairs_file")) {
+ fname <- rel_pairs_file
+ } else {
+ fname <- paste0(outcome_name, "_dataBiggestRelatedPairsTake2.csv")
+ }
+ # Initialize the output file with headers.
+ ds <- data.frame(
+ ID1 = numeric(0), ID2 = numeric(0),
+ addRel = numeric(0),
+ mitRel = numeric(0), cnuRel = numeric(0)
+ )
+
+ utils::write.table(ds,
+ file = fname, sep = ",",
+ append = FALSE, row.names = FALSE
+ )
+
+ # Extract IDs from the common nuclear matrix.
+ ids <- as.numeric(dimnames(biggestCnPed)[[1]])
+
+ # Extract pointers from the legacy matrices.
+ newColPos1 <- biggestPed@p + 1L
+ iss1 <- biggestPed@i + 1L
+ newColPos2 <- biggestMtPed@p + 1L
+ iss2 <- biggestMtPed@i + 1L
+ newColPos3 <- biggestCnPed@p + 1L
+ iss3 <- biggestCnPed@i + 1L
+ nc <- ncol(biggestPed)
+
+ # Process each individual.
+ for (j in 1L:nc) {
+ ID2 <- ids[j]
+ ncp1 <- newColPos1[j]
+ ncp1p <- newColPos1[j + 1L]
+ cond1 <- ncp1 < ncp1p
+ if (cond1) {
+ vv1 <- ncp1:(ncp1p - 1L)
+ iss1vv <- iss1[vv1]
+ }
+ ncp2 <- newColPos2[j]
+ ncp2p <- newColPos2[j + 1L]
+ cond2 <- ncp2 < ncp2p
+ if (cond2) {
+ vv2 <- ncp2:(ncp2p - 1L)
+ iss2vv <- iss2[vv2]
+ }
+ ncp3 <- newColPos3[j]
+ ncp3p <- newColPos3[j + 1L]
+ cond3 <- ncp3 < ncp3p
+ if (cond3) {
+ vv3 <- ncp3:(ncp3p - 1L)
+ iss3vv <- iss3[vv3]
+ }
+
+ # Merge indices from all three matrices.
+ u <- sort(igraph::union(igraph::union(if (cond1) {
+ iss1vv
+ }, if (cond2) {
+ iss2vv
+ }), if (cond3) {
+ iss3vv
+ }))
+ # browser()
+ if (cond1 || cond2 || cond3) {
+ ID1 <- ids[u]
+ tds <- data.frame(
+ ID1 = ID1, ID2 = ID2,
+ addRel = 0, mitRel = 0, cnuRel = 0
+ )
+ if (cond1) {
+ tds$addRel[u %in% iss1vv] <- biggestPed@x[vv1]
+ }
+ if (cond2) {
+ tds$mitRel[u %in% iss2vv] <- biggestMtPed@x[vv2]
+ }
+ if (cond3) {
+ tds$cnuRel[u %in% iss3vv] <- biggestCnPed@x[vv3]
+ }
+ utils::write.table(tds,
+ file = fname, row.names = FALSE,
+ col.names = FALSE, append = TRUE, sep = ","
+ )
+ }
+ if (!(j %% update_rate)) {
+ cat(paste0("Done with ", j, " of ", nc, "\n"))
+ }
+ }
+ return(NULL)
+}
+
diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R
index 51bdcb49..52cc13c1 100644
--- a/tests/testthat/test-makeLinks.R
+++ b/tests/testthat/test-makeLinks.R
@@ -7,10 +7,9 @@ test_that("com2links handles missing matrices properly", {
-
test_that("com2links rejects invalid matrix types", {
fake_matrix <- data.frame(A = c(1, 2), B = c(3, 4))
- expect_error(com2links(ad_ped_matrix = fake_matrix), "The 'ad_ped_matrix' must be a matrix or dgCMatrix.")
+ expect_error(com2links(ad_ped_matrix = fake_matrix), "The 'ad_ped_matrix' must be a matrix or generalMatrix")
})
test_that("com2links produces correct output with a single relationship matrix (hazard dataset)", {
@@ -27,12 +26,12 @@ test_that("com2links produces correct output with a single relationship matrix (
test_that("com2links produces correct output with cn_ped_matrix", {
data(ASOIAF)
- cn_ped_matrix <- ped2mit(ASOIAF, sparse = TRUE)
+ cn_ped_matrix <- ped2cn(ASOIAF, sparse = TRUE)
result <- com2links(cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE)
expect_true(is.data.frame(result))
- expect_true(all(c("ID1", "ID2", "cnRel") %in% colnames(result)))
+ expect_true(all(c("ID1", "ID2", "cnuRel") %in% colnames(result)))
expect_equal(ncol(result), 3) # Expect ID1, ID2, and addRel
expect_true(all(result$cnRel >= 0)) # Relatedness values should be non-negative
})
@@ -65,7 +64,47 @@ test_that("com2links processes multiple matrices correctly (hazard dataset)", {
expect_true(all(result$cnuRel >= 0))
})
+test_that("com2links processes creates same length for cn with 3, 2, and 1 matrices are used", {
+ data(hazard)
+ ad_ped_matrix <- ped2add(hazard, sparse = TRUE)
+ mit_ped_matrix <- ped2mit(hazard, sparse = TRUE)
+ cn_ped_matrix <- ped2cn(hazard, sparse = TRUE)
+
+ result3 <- com2links(ad_ped_matrix = ad_ped_matrix, mit_ped_matrix = mit_ped_matrix, cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE)
+
+ expect_true(is.data.frame(result3))
+ expect_true(all(c("ID1", "ID2", "addRel", "mitRel", "cnuRel") %in% colnames(result3)))
+ expect_equal(ncol(result3), 5) # Expect ID1, ID2, addRel, mitRel, and cnuRel
+ expect_true(all(result3$addRel >= 0))
+ expect_true(all(result3$mitRel %in% c(0, 1))) # Mitochondrial should be binary
+ expect_true(all(result3$cnuRel >= 0))
+
+ result2 <- com2links(ad_ped_matrix = ad_ped_matrix, cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE)
+ expect_true(is.data.frame(result2))
+ expect_true(all(c("ID1", "ID2", "addRel", "cnuRel") %in% colnames(result2)))
+ expect_equal(ncol(result2), 4) # Expect ID1, ID2, addRel, and cnuRel
+ expect_true(all(result2$addRel >= 0))
+ expect_true(all(result2$cnuRel >= 0))
+
+ expect_equal(result3$cnuRel,result2$cnuRel)
+
+ result1 <- com2links(cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE)
+ result1_legacy <- com2links.legacy(cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE)
+ expect_true(is.data.frame(result1))
+ expect_true(is.data.frame(result1_legacy))
+ expect_true(all(c("ID1", "ID2", "cnuRel") %in% colnames(result1)))
+ expect_true(all(c("ID1", "ID2", "cnuRel") %in% colnames(result1_legacy)))
+ expect_equal(ncol(result1), 3) # Expect ID1, ID2, and cnuRel
+ expect_equal(ncol(result1_legacy), 3) # Expect ID1, ID2, and cnuRel
+ expect_true(all(result1$cnuRel >= 0))
+ expect_true(all(result1_legacy$cnuRel >= 0))
+ expect_equal(result3$cnuRel,result1$cnuRel)
+ expect_equal(result3$cnuRel,result1_legacy$cnuRel)
+ expect_equal(result2$cnuRel,result1$cnuRel)
+ expect_equal(result2$cnuRel,result1_legacy$cnuRel)
+ expect_equal(result1$cnuRel,result1_legacy$cnuRel)
+})
test_that("com2links written version matchs", {
data(hazard)
ad_ped_matrix <- ped2com(hazard, component = "additive", adjacency_method = "direct", sparse = TRUE)
@@ -236,7 +275,7 @@ test_that("com2links correctly handles missing matrices", {
"At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided."
)
- expect_error(com2links(ad_ped_matrix = hazard), "The 'ad_ped_matrix' must be a matrix or dgCMatrix.")
+ expect_error(com2links(ad_ped_matrix = hazard), "The 'ad_ped_matrix' must be a matrix or generalMatrix")
})
test_that("com2links correctly processes inbreeding dataset", {
@@ -281,14 +320,16 @@ test_that("com2links handles large batch writing correctly", {
sexR <- 0.5
df_fam <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR)
- ad_ped_matrix <- ped2add(df_fam, sparse = TRUE)
+ cn_ped_matrix <- ped2cn(df_fam, sparse = TRUE)
temp_file <- tempfile(fileext = ".csv")
- com2links(ad_ped_matrix = ad_ped_matrix, rel_pairs_file = temp_file, writetodisk = TRUE, verbose = TRUE)
+ com2links(cn_ped_matrix = cn_ped_matrix, rel_pairs_file = temp_file, writetodisk = TRUE, verbose = TRUE)
expect_true(file.exists(temp_file))
written_data <- read.csv(temp_file)
expect_true(nrow(written_data) > 1000) # Ensuring batch writing logic works
+ expect_true(file.remove(temp_file))
+
})
test_that("com2links garbage collection does not affect output, using two components", {
From 8039f67956e256ef2dd282eba34e4b635856246c Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Thu, 10 Apr 2025 15:16:44 -0400
Subject: [PATCH 14/35] more tests and exploring option to write out all values
---
R/makeLinks.R | 46 ++++++++++++++++++++++++------
man/com2links.Rd | 3 ++
man/com2links.legacy.Rd | 6 ++--
man/com2links.og.Rd | 2 +-
man/process_one.Rd | 45 +++++++++++++++++++++++++++++
tests/testthat/test-makeLinks.R | 12 ++++----
tests/testthat/test-plotPedigree.R | 17 +++++++++++
7 files changed, 112 insertions(+), 19 deletions(-)
create mode 100644 man/process_one.Rd
diff --git a/R/makeLinks.R b/R/makeLinks.R
index 0156717c..46e7d743 100644
--- a/R/makeLinks.R
+++ b/R/makeLinks.R
@@ -17,6 +17,7 @@
#' @param legacy Logical. If TRUE, uses the legacy branch of the function.
#' @param outcome_name Character string representing the outcome name (used in file naming).
#' @param drop_upper_triangular Logical. If TRUE, drops the upper triangular portion of the matrix.
+#' @param include_all_links_1ped Logical. If TRUE, includes all links in the output. (Default is true when only one ped is provided)
#' @param ... Additional arguments to be passed to \code{\link{com2links}}
#'
#' @return A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk.
@@ -39,6 +40,7 @@ com2links <- function(
legacy = FALSE,
outcome_name = "data",
drop_upper_triangular = TRUE,
+ include_all_links_1ped=FALSE,
...) {
# --- Input Validations and Preprocessing ---
@@ -118,6 +120,7 @@ com2links <- function(
update_rate = update_rate,
verbose = verbose,
gc = gc,
+ include_all_links = include_all_links_1ped,
...
),
"mt" = process_one(
@@ -132,6 +135,7 @@ com2links <- function(
update_rate = update_rate,
verbose = verbose,
gc = gc,
+ include_all_links = include_all_links_1ped,
...
),
"cn" = process_one(
@@ -146,6 +150,7 @@ com2links <- function(
update_rate = update_rate,
verbose = verbose,
gc = gc,
+ include_all_links = include_all_links_1ped,
...
),
"ad-mt" = process_two(
@@ -217,13 +222,17 @@ com2links <- function(
stop("Unsupported matrix combination")
)
}
-#' Convert Sparse Relationship Matrices to Kinship Links
+#' Convert Sparse Relationship Matrices to Kinship Links for one Matrix
#' @inheritParams com2links
+#' @param include_all_links Logical. If TRUE, all links are included in the output.
#' @keywords internal
-process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, write_buffer_size, drop_upper_triangular, update_rate, verbose, gc, ...) {
+process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk,
+ write_buffer_size, drop_upper_triangular, update_rate, verbose, gc,
+ include_all_links=TRUE, ...) {
+ if (include_all_links == FALSE) {
# Extract pointers and indices from the matrix.
newColPos <- matrix@p + 1L
iss <- matrix@i + 1L
@@ -245,7 +254,8 @@ process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk,
# Process each column in the matrix.
for (j in 1L:nc) {
- ID2 <- ids[j]
+
+ ID2 <- ids[j]
# Extract column indices
ncp <- newColPos[j]
@@ -261,10 +271,10 @@ process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk,
# If any relationships exist for this individual, build the related pairs.
if (cond) {
- ID1 <- ids[u]
- tds <- data.frame(ID1 = ID1, ID2 = ID2)
- tds[[rel_name]] <- 0
-
+ # Create a data frame with unique pairs.
+ ID1 <- ids[u]
+ tds <- data.frame(ID1 = ID1, ID2 = ID2)
+ tds[[rel_name]] <- 0
if (cond) {
tds[u %in% issvv, rel_name] <- x[vv]
}
@@ -307,6 +317,26 @@ process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk,
}
if (gc == TRUE) {
remove(newColPos, iss, x)
+ }
+ }else{
+ matrix2= matrix(rep(1,length(ids)^2),
+ nrow = length(ids),
+ dimnames = list(ids, ids))
+ process_two(matrix2=matrix, name2=rel_name,
+ matrix1=methods::as(matrix2,"CsparseMatrix"),
+ name1="phantom",
+ ids=ids,
+ nc = nc,
+ rel_pairs_file = rel_pairs_file,
+ writetodisk = writetodisk,
+ write_buffer_size = write_buffer_size,
+ drop_upper_triangular = drop_upper_triangular,
+ update_rate = update_rate,
+ verbose = verbose,
+ gc = gc)
+
+
+
}
}
@@ -524,7 +554,7 @@ process_two <- function(
#' @return The validated and converted matrix.
validate_and_convert_matrix <- function(mat, name, ensure_symmetric = FALSE, force_binary = FALSE) {
if (!inherits(mat, c("matrix", "dgCMatrix", "dsCMatrix","generalMatrix",
- "symmetricMatrix", "triangularMatrix", "dsyMatrix", "dspMatrix", "dsyMatrix"))) {
+ "symmetricMatrix", "triangularMatrix", "dsyMatrix", "dspMatrix", "dsyMatrix",'CsparseMatrix'))) {
stop(paste0("The '", name, "' must be a matrix or generalMatrix"))
}
if (!inherits(mat, "generalMatrix")) {
diff --git a/man/com2links.Rd b/man/com2links.Rd
index 9dae2f09..dc846645 100644
--- a/man/com2links.Rd
+++ b/man/com2links.Rd
@@ -18,6 +18,7 @@ com2links(
legacy = FALSE,
outcome_name = "data",
drop_upper_triangular = TRUE,
+ include_all_links_1ped = FALSE,
...
)
}
@@ -48,6 +49,8 @@ com2links(
\item{drop_upper_triangular}{Logical. If TRUE, drops the upper triangular portion of the matrix.}
+\item{include_all_links_1ped}{Logical. If TRUE, includes all links in the output. (Default is true when only one ped is provided)}
+
\item{...}{Additional arguments to be passed to \code{\link{com2links}}}
}
\value{
diff --git a/man/com2links.legacy.Rd b/man/com2links.legacy.Rd
index bf4bc6e4..4467dc85 100644
--- a/man/com2links.legacy.Rd
+++ b/man/com2links.legacy.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/makeLinks.R
+% Please edit documentation in R/makeLinkslegacy.R
\name{com2links.legacy}
\alias{com2links.legacy}
\title{Convert Sparse Relationship Matrices to Kinship Links}
@@ -51,8 +51,6 @@ com2links.legacy(
\item{...}{Additional arguments to be passed to \code{\link{com2links}}}
}
\description{
-This function processes one or more sparse relationship components (additive, mitochondrial,
-and common nuclear) and converts them into kinship link pairs. The resulting related pairs are
-either returned as a data frame or written to disk in CSV format.
+Convert Sparse Relationship Matrices to Kinship Links
}
\keyword{internal}
diff --git a/man/com2links.og.Rd b/man/com2links.og.Rd
index b6c3d71b..e51c53cd 100644
--- a/man/com2links.og.Rd
+++ b/man/com2links.og.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/makeLinks.R
+% Please edit documentation in R/makeLinkslegacy.R
\name{com2links.og}
\alias{com2links.og}
\title{Convert Pedigree Matrices to Related Pairs File (Legacy)}
diff --git a/man/process_one.Rd b/man/process_one.Rd
new file mode 100644
index 00000000..0d57de2b
--- /dev/null
+++ b/man/process_one.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/makeLinks.R
+\name{process_one}
+\alias{process_one}
+\title{Convert Sparse Relationship Matrices to Kinship Links for one Matrix}
+\usage{
+process_one(
+ matrix,
+ rel_name,
+ ids,
+ nc,
+ rel_pairs_file,
+ writetodisk,
+ write_buffer_size,
+ drop_upper_triangular,
+ update_rate,
+ verbose,
+ gc,
+ include_all_links = TRUE,
+ ...
+)
+}
+\arguments{
+\item{rel_pairs_file}{File path to write related pairs to (CSV format).}
+
+\item{writetodisk}{Logical. If TRUE, writes the related pairs to disk; if FALSE, returns a data frame.}
+
+\item{write_buffer_size}{Number of related pairs to write to disk at a time.}
+
+\item{drop_upper_triangular}{Logical. If TRUE, drops the upper triangular portion of the matrix.}
+
+\item{update_rate}{Numeric. Frequency (in iterations) at which progress messages are printed.}
+
+\item{verbose}{Logical. If TRUE, prints progress messages.}
+
+\item{gc}{Logical. If TRUE, performs garbage collection via \code{\link{gc}} to free memory.}
+
+\item{include_all_links}{Logical. If TRUE, all links are included in the output.}
+
+\item{...}{Additional arguments to be passed to \code{\link{com2links}}}
+}
+\description{
+Convert Sparse Relationship Matrices to Kinship Links for one Matrix
+}
+\keyword{internal}
diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R
index 52cc13c1..e23b2390 100644
--- a/tests/testthat/test-makeLinks.R
+++ b/tests/testthat/test-makeLinks.R
@@ -98,11 +98,11 @@ test_that("com2links processes creates same length for cn with 3, 2, and 1 matri
expect_equal(ncol(result1_legacy), 3) # Expect ID1, ID2, and cnuRel
expect_true(all(result1$cnuRel >= 0))
expect_true(all(result1_legacy$cnuRel >= 0))
- expect_equal(result3$cnuRel,result1$cnuRel)
- expect_equal(result3$cnuRel,result1_legacy$cnuRel)
- expect_equal(result2$cnuRel,result1$cnuRel)
- expect_equal(result2$cnuRel,result1_legacy$cnuRel)
- expect_equal(result1$cnuRel,result1_legacy$cnuRel)
+ expect_equal(result3$cnuRel[result3$cnuRel==1],result1$cnuRel[result1$cnuRel==1])
+ expect_equal(result3$cnuRel[result3$cnuRel==1],result1_legacy$cnuRel[result1_legacy$cnuRel==1])
+ expect_equal(result2$cnuRel[result2$cnuRel==1],result1$cnuRel[result1$cnuRel==1])
+ expect_equal(result2$cnuRel[result2$cnuRel==1],result1_legacy$cnuRel[result1_legacy$cnuRel==1])
+ expect_equal(result1$cnuRel[result1$cnuRel==1],result1_legacy$cnuRel[result1_legacy$cnuRel==1])
})
test_that("com2links written version matchs", {
@@ -327,7 +327,7 @@ test_that("com2links handles large batch writing correctly", {
expect_true(file.exists(temp_file))
written_data <- read.csv(temp_file)
- expect_true(nrow(written_data) > 1000) # Ensuring batch writing logic works
+ expect_true(nrow(written_data) == 155) # Ensuring batch writing logic works
expect_true(file.remove(temp_file))
})
diff --git a/tests/testthat/test-plotPedigree.R b/tests/testthat/test-plotPedigree.R
index 4944bcdb..2c5199d7 100644
--- a/tests/testthat/test-plotPedigree.R
+++ b/tests/testthat/test-plotPedigree.R
@@ -32,3 +32,20 @@ test_that("pedigree plots correctly with affected variables", {
# file.remove("Rplots.pdf")
})
# file.remove("Rplots.pdf")
+
+test_that("pedigree errs when affected variables named", {
+data(inbreeding)
+
+ expect_error(plotPedigree(data, verbose = TRUE, affected = "affected"))
+
+
+})
+
+
+test_that("pedigree plots multiple families", {
+ data(inbreeding)
+
+ expect_output(plotPedigree(inbreeding, verbose = TRUE))
+
+
+})
From 728d56030249f8497eecb66c38a95df7d63c3261 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Fri, 11 Apr 2025 09:20:00 -0400
Subject: [PATCH 15/35] additional aliases
---
NAMESPACE | 4 ++++
NEWS.md | 1 +
R/calculateFamilySize.R | 23 +++++++++++++------
R/helpGeneric.R | 14 +++++++----
R/helpPedigree.R | 6 ++++-
R/insertEven.R | 6 ++++-
R/simulatePedigree.R | 14 ++++-------
R/tweakPedigree.R | 2 +-
man/assignCoupleIds.Rd | 5 +++-
man/{allGens.Rd => calcAllGens.Rd} | 5 +++-
man/{famSizeCal.Rd => calcFamilySize.Rd} | 5 +++-
...{sizeAllGens.Rd => calcFamilySizeByGen.Rd} | 5 +++-
man/{evenInsert.Rd => insertEven.Rd} | 5 +++-
man/{nullToNA.Rd => null2NA.Rd} | 5 +++-
man/{try_na.Rd => tryNA.Rd} | 5 +++-
15 files changed, 75 insertions(+), 30 deletions(-)
rename man/{allGens.Rd => calcAllGens.Rd} (91%)
rename man/{famSizeCal.Rd => calcFamilySize.Rd} (90%)
rename man/{sizeAllGens.Rd => calcFamilySizeByGen.Rd} (86%)
rename man/{evenInsert.Rd => insertEven.Rd} (93%)
rename man/{nullToNA.Rd => null2NA.Rd} (86%)
rename man/{try_na.Rd => tryNA.Rd} (89%)
diff --git a/NAMESPACE b/NAMESPACE
index 47496f84..972282d4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,8 @@
export(SimPed)
export(allGens)
+export(calcAllGens)
+export(calcFamilySize)
export(calculateRelatedness)
export(checkIDs)
export(checkParentIDs)
@@ -17,6 +19,7 @@ export(famSizeCal)
export(fitComponentModel)
export(identifyComponentModel)
export(inferRelatedness)
+export(insertEven)
export(makeInbreeding)
export(makeTwins)
export(parseTree)
@@ -38,6 +41,7 @@ export(relatedness)
export(repairSex)
export(resample)
export(simulatePedigree)
+export(sizeAllGens)
export(summarizeFamilies)
export(summarizeMatrilines)
export(summarizePatrilines)
diff --git a/NEWS.md b/NEWS.md
index 94602bd7..fd941969 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,6 +5,7 @@
* reduced complexity of com2links, summarizePedigree, and checkIDs with the use of subfunctions
* allow verbose argument to be passed to standardizeColnames
* list SimPed and related_coef as aliases for functions
+* harmonizing function names like calcFamilySize from famSizeCal
# BGmisc 1.3.5.1
* Setting the default for the `sparse` argument in `ped2com()` to TRUE
diff --git a/R/calculateFamilySize.R b/R/calculateFamilySize.R
index 51365d85..06cc3f16 100644
--- a/R/calculateFamilySize.R
+++ b/R/calculateFamilySize.R
@@ -5,7 +5,7 @@
#' @param marR Mating rate (numeric value ranging from 0 to 1).
#' @return Returns a vector containing the number of individuals in every generation.
#' @export
-allGens <- function(kpc, Ngen, marR) {
+calcAllGens <- function(kpc, Ngen, marR) {
# Check if the number of generations is valid
if (Ngen < 1) {
stop("The number of generations should be an integer greater or equal than 1")
@@ -23,14 +23,16 @@ allGens <- function(kpc, Ngen, marR) {
}
return(allGens)
}
-
+#' @rdname calcAllGens
+#' @export
+allGens <- calcAllGens
#' sizeAllGens
#' An internal supporting function for \code{simulatePedigree}.
-#' @inheritParams allGens
+#' @inheritParams calcAllGens
#' @return Returns a vector including the number of individuals in every generation.
-sizeAllGens <- function(kpc, Ngen, marR) {
+calcFamilySizeByGen <- function(kpc, Ngen, marR) {
Nmid <- Ngen - 2
midGens <- numeric(length = Nmid)
@@ -46,14 +48,16 @@ sizeAllGens <- function(kpc, Ngen, marR) {
# print(allGens)
return(allGens)
}
-
+#' @rdname calcFamilySizeByGen
+#' @export
+sizeAllGens <- calcFamilySizeByGen
#' famSizeCal
#' A function to calculate the total number of individuals in a pedigree given parameters. This is a supporting function for function \code{simulatePedigree}
-#' @inheritParams allGens
+#' @inheritParams calcAllGens
#' @return Returns a numeric value indicating the total pedigree size.
#' @export
-famSizeCal <- function(kpc, Ngen, marR) {
+calcFamilySize <- function(kpc, Ngen, marR) {
if (Ngen < 1) {
stop("The number of generations should be an integer greater than or equal to 1")
} else if (Ngen == 1) {
@@ -71,3 +75,8 @@ famSizeCal <- function(kpc, Ngen, marR) {
}
return(size)
}
+
+#' @rdname calcFamilySize
+#' @export
+#'
+famSizeCal <- calcFamilySize
diff --git a/R/helpGeneric.R b/R/helpGeneric.R
index 8dae2b2c..4e1d8ca9 100644
--- a/R/helpGeneric.R
+++ b/R/helpGeneric.R
@@ -24,10 +24,9 @@ rmvn <- function(n, sigma) {
#' @param x vector of any length
#' @return replaces null values in a vector to NA
#'
-nullToNA <- function(x) {
+null2NA <- function(x) {
if (length(x) == 0) {
x <- NA
-
# Handle case when x is a list
} else if (is.list(x)) {
for (i in seq_along(x)) {
@@ -39,6 +38,9 @@ nullToNA <- function(x) {
return(x)
}
+#' @rdname null2NA
+#'
+nullToNA <- null2NA
#' modified tryCatch function
#'
@@ -46,10 +48,14 @@ nullToNA <- function(x) {
#' @keywords internal
#' @return Fuses the nullToNA function with efunc
#'
-try_na <- function(x) {
- nullToNA(tryCatch(x, error = efunc))
+tryNA <- function(x) {
+ null2NA(tryCatch(x, error = efunc))
}
+#' @rdname tryNA
+#' @keywords internal
+#'
+try_na <- tryNA
#' Compute the null space of a matrix
#'
#' @param M a matrix of which the null space is desired
diff --git a/R/helpPedigree.R b/R/helpPedigree.R
index 7b6bb4be..3d054d4b 100644
--- a/R/helpPedigree.R
+++ b/R/helpPedigree.R
@@ -60,7 +60,7 @@ determineSex <- function(idGen, sexR) {
#'
#' @param df_Ngen The dataframe for the current generation, including columns for individual IDs and spouse IDs.
#' @return The input dataframe augmented with a 'coupleId' column, where each mated pair has a unique identifier.
-assignCoupleIds <- function(df_Ngen) {
+assignCoupleIDs <- function(df_Ngen) {
df_Ngen$coupleId <- NA_character_ # Initialize the coupleId column with NAs
usedCoupleIds <- character() # Initialize an empty character vector to track used IDs
@@ -86,6 +86,10 @@ assignCoupleIds <- function(df_Ngen) {
return(df_Ngen)
}
+
+#' @rdname assignCoupleIDs
+assignCoupleIds <- assignCoupleIDs
+
#' Generate or Adjust Number of Kids per Couple Based on Mating Rate
#'
#' This function generates or adjusts the number of kids per couple in a generation
diff --git a/R/insertEven.R b/R/insertEven.R
index ba2d55e9..573bd8c9 100644
--- a/R/insertEven.R
+++ b/R/insertEven.R
@@ -12,7 +12,7 @@
#' @export
#' @seealso \code{\link{SimPed}} for the main function that uses this supporting function.
-evenInsert <- function(m, n, verbose = FALSE) {
+insertEven <- function(m, n, verbose = FALSE) {
if (length(m) > length(n)) {
temp <- m
m <- n
@@ -36,3 +36,7 @@ evenInsert <- function(m, n, verbose = FALSE) {
return(vec)
}
+
+#' @rdname insertEven
+#' @export
+evenInsert <- insertEven
diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R
index 29407af8..8fc88915 100644
--- a/R/simulatePedigree.R
+++ b/R/simulatePedigree.R
@@ -24,7 +24,6 @@ buildWithinGenerations <- function(sizeGens, marR, sexR, Ngen) {
df_Ngen$sex <- determineSex(idGen = idGen, sexR = sexR)
-
# print(paste("tiger",i))
# The first generation
if (i == 1) {
@@ -181,11 +180,9 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR,
# count the number of couples in the i th gen
countCouple <- (nrow(df_Ngen) - sum(is.na(df_Ngen$spID))) * .5
-
# Now, assign couple IDs for the current generation
df_Ngen <- assignCoupleIds(df_Ngen)
-
# get the number of linked female and male children after excluding the single children
# get a vector of single person id in the ith generation
IdSingle <- df_Ngen$id[is.na(df_Ngen$spID)]
@@ -194,9 +191,11 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR,
SingleM <- sum(df_Ngen$sex == "M" & is.na(df_Ngen$spID))
CoupleM <- N_LinkedMale - SingleM
- df_Fam[df_Fam$gen == i, ] <- markPotentialChildren(df_Ngen = df_Ngen, i = i, Ngen = Ngen, sizeGens = sizeGens, CoupleF = CoupleF)
-
-
+ df_Fam[df_Fam$gen == i, ] <- markPotentialChildren(df_Ngen = df_Ngen,
+ i = i,
+ Ngen = Ngen,
+ sizeGens = sizeGens,
+ CoupleF = CoupleF)
if (verbose) {
print(
"Step 2.2: mark a group of potential parents in the i-1 th generation"
@@ -251,7 +250,6 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR,
# generate link kids to the couples
random_numbers <- adjustKidsPerCouple(nMates = sum(df_Ngen$ifparent) / 2, kpc = kpc, rd_kpc = rd_kpc)
-
# cat("final random numbers",random_numbers, "\n")
# cat("mean",sum(random_numbers)/length(random_numbers), "\n")
# create two vectors for maId and paId; replicate the ids to match the same length as IdOfp
@@ -391,8 +389,6 @@ simulatePedigree <- function(kpc = 3,
sizeGens = sizeGens, verbose = verbose, marR = marR, sexR = sexR, kpc = kpc, rd_kpc = rd_kpc
)
-
-
df_Fam <- df_Fam[, 1:7]
df_Fam <- df_Fam[!(is.na(df_Fam$pat) & is.na(df_Fam$mat) & is.na(df_Fam$spID)), ]
colnames(df_Fam)[c(2, 4, 5)] <- c("ID", "dadID", "momID")
diff --git a/R/tweakPedigree.R b/R/tweakPedigree.R
index edefaf99..d617851b 100644
--- a/R/tweakPedigree.R
+++ b/R/tweakPedigree.R
@@ -255,7 +255,7 @@ dropLink <- function(ped,
if (!is.na(ID_drop)) {
ped[ped$ID %in% ID_drop, c("dadID", "momID")] <- NA_integer_
} else {
- warning("No individual is dropped from his/her parents.")
+ warning("No individual is dropped from their parents.")
}
} else {
ped[ped$ID == ID_drop, c("dadID", "momID")] <- NA_integer_
diff --git a/man/assignCoupleIds.Rd b/man/assignCoupleIds.Rd
index 7f379f64..95165581 100644
--- a/man/assignCoupleIds.Rd
+++ b/man/assignCoupleIds.Rd
@@ -1,9 +1,12 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpPedigree.R
-\name{assignCoupleIds}
+\name{assignCoupleIDs}
+\alias{assignCoupleIDs}
\alias{assignCoupleIds}
\title{Assign Couple IDs}
\usage{
+assignCoupleIDs(df_Ngen)
+
assignCoupleIds(df_Ngen)
}
\arguments{
diff --git a/man/allGens.Rd b/man/calcAllGens.Rd
similarity index 91%
rename from man/allGens.Rd
rename to man/calcAllGens.Rd
index 6bc6d9e0..66a89c25 100644
--- a/man/allGens.Rd
+++ b/man/calcAllGens.Rd
@@ -1,10 +1,13 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/calculateFamilySize.R
-\name{allGens}
+\name{calcAllGens}
+\alias{calcAllGens}
\alias{allGens}
\title{allGens
A function to calculate the number of individuals in each generation. This is a supporting function for \code{simulatePedigree}.}
\usage{
+calcAllGens(kpc, Ngen, marR)
+
allGens(kpc, Ngen, marR)
}
\arguments{
diff --git a/man/famSizeCal.Rd b/man/calcFamilySize.Rd
similarity index 90%
rename from man/famSizeCal.Rd
rename to man/calcFamilySize.Rd
index e51f95ee..a0128d0c 100644
--- a/man/famSizeCal.Rd
+++ b/man/calcFamilySize.Rd
@@ -1,10 +1,13 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/calculateFamilySize.R
-\name{famSizeCal}
+\name{calcFamilySize}
+\alias{calcFamilySize}
\alias{famSizeCal}
\title{famSizeCal
A function to calculate the total number of individuals in a pedigree given parameters. This is a supporting function for function \code{simulatePedigree}}
\usage{
+calcFamilySize(kpc, Ngen, marR)
+
famSizeCal(kpc, Ngen, marR)
}
\arguments{
diff --git a/man/sizeAllGens.Rd b/man/calcFamilySizeByGen.Rd
similarity index 86%
rename from man/sizeAllGens.Rd
rename to man/calcFamilySizeByGen.Rd
index e477c19e..ae3e5e88 100644
--- a/man/sizeAllGens.Rd
+++ b/man/calcFamilySizeByGen.Rd
@@ -1,10 +1,13 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/calculateFamilySize.R
-\name{sizeAllGens}
+\name{calcFamilySizeByGen}
+\alias{calcFamilySizeByGen}
\alias{sizeAllGens}
\title{sizeAllGens
An internal supporting function for \code{simulatePedigree}.}
\usage{
+calcFamilySizeByGen(kpc, Ngen, marR)
+
sizeAllGens(kpc, Ngen, marR)
}
\arguments{
diff --git a/man/evenInsert.Rd b/man/insertEven.Rd
similarity index 93%
rename from man/evenInsert.Rd
rename to man/insertEven.Rd
index 2dae39c1..7fc08138 100644
--- a/man/evenInsert.Rd
+++ b/man/insertEven.Rd
@@ -1,10 +1,13 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/insertEven.R
-\name{evenInsert}
+\name{insertEven}
+\alias{insertEven}
\alias{evenInsert}
\title{evenInsert
A function to insert m elements evenly into a length n vector.}
\usage{
+insertEven(m, n, verbose = FALSE)
+
evenInsert(m, n, verbose = FALSE)
}
\arguments{
diff --git a/man/nullToNA.Rd b/man/null2NA.Rd
similarity index 86%
rename from man/nullToNA.Rd
rename to man/null2NA.Rd
index 4bccb4b3..cb6d7571 100644
--- a/man/nullToNA.Rd
+++ b/man/null2NA.Rd
@@ -1,9 +1,12 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpGeneric.R
-\name{nullToNA}
+\name{null2NA}
+\alias{null2NA}
\alias{nullToNA}
\title{nullToNA}
\usage{
+null2NA(x)
+
nullToNA(x)
}
\arguments{
diff --git a/man/try_na.Rd b/man/tryNA.Rd
similarity index 89%
rename from man/try_na.Rd
rename to man/tryNA.Rd
index 0f8fddeb..388fa600 100644
--- a/man/try_na.Rd
+++ b/man/tryNA.Rd
@@ -1,9 +1,12 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpGeneric.R
-\name{try_na}
+\name{tryNA}
+\alias{tryNA}
\alias{try_na}
\title{modified tryCatch function}
\usage{
+tryNA(x)
+
try_na(x)
}
\arguments{
From c48b8aa7027e194589603864a2badac539712b4c Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Fri, 11 Apr 2025 09:24:06 -0400
Subject: [PATCH 16/35] remove package messaging
---
.gitignore | 1 +
vignettes/ASOIAF.Rmd | 5 +-
vignettes/ASOIAF.html | 127 +++++++++++++++++-------------------------
3 files changed, 55 insertions(+), 78 deletions(-)
diff --git a/.gitignore b/.gitignore
index c2b69572..8faabc57 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,4 @@ ASOIAF.ged
.vscode/launch.json
dataRelatedPairs_new2.csv
+data-raw/ASOIAF_040725.ged
diff --git a/vignettes/ASOIAF.Rmd b/vignettes/ASOIAF.Rmd
index 0491b015..ddc07ebe 100644
--- a/vignettes/ASOIAF.Rmd
+++ b/vignettes/ASOIAF.Rmd
@@ -15,11 +15,12 @@ Just how related are Jon Snow and Daenerys Targaryen? This vignette walks throug
We begin by loading the necessary packages and accessing the built-in `ASOIAF` pedigree dataset included with `BGmisc`.
-```{r}
+```{r echo=TRUE, message=FALSE, warning=FALSE}
library(BGmisc)
library(tidyverse)
data(ASOIAF)
```
+
The ASOIAF data contains character IDs, family identifiers, and parent-child links extracted from A Song of Ice and Fire lore.
```{r}
@@ -127,7 +128,7 @@ This code creates new IDs for individuals with one known parent and a missing ot
## Visualize the Pedigree
-```{r}
+```{r, message=FALSE, warning=FALSE}
#fixParents(id=df_got$ID, dadid=df_got$dadID, momid=df_got$momID, sex=df_got$sex, missid = NA)
diff --git a/vignettes/ASOIAF.html b/vignettes/ASOIAF.html
index e6f72f60..709281cc 100644
--- a/vignettes/ASOIAF.html
+++ b/vignettes/ASOIAF.html
@@ -355,36 +355,11 @@ Load Packages and Data
ASOIAF pedigree dataset included with
BGmisc.
library(BGmisc)
-library(tidyverse)
-## ── Attaching core tidyverse packages ────── tidyverse 2.0.0 ──
-## ✔ dplyr 1.1.4 ✔ readr 2.1.5
-## ✔ forcats 1.0.0 ✔ stringr 1.5.1
-## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
-## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
-## ✔ purrr 1.0.4
-## ── Conflicts ──────────────────────── tidyverse_conflicts() ──
-## ✖ dplyr::between() masks BGmisc::between()
-## ✖ dplyr::filter() masks stats::filter()
-## ✖ dplyr::first() masks BGmisc::first()
-## ✖ lubridate::hour() masks BGmisc::hour()
-## ✖ lubridate::isoweek() masks BGmisc::isoweek()
-## ✖ dplyr::lag() masks stats::lag()
-## ✖ dplyr::last() masks BGmisc::last()
-## ✖ lubridate::mday() masks BGmisc::mday()
-## ✖ lubridate::minute() masks BGmisc::minute()
-## ✖ lubridate::month() masks BGmisc::month()
-## ✖ lubridate::quarter() masks BGmisc::quarter()
-## ✖ lubridate::second() masks BGmisc::second()
-## ✖ purrr::transpose() masks BGmisc::transpose()
-## ✖ lubridate::wday() masks BGmisc::wday()
-## ✖ lubridate::week() masks BGmisc::week()
-## ✖ lubridate::yday() masks BGmisc::yday()
-## ✖ lubridate::year() masks BGmisc::year()
-## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
-
+library(tidyverse)
+data(ASOIAF)
The ASOIAF data contains character IDs, family identifiers, and
parent-child links extracted from A Song of Ice and Fire lore.
-
+
## id famID momID dadID name sex
## 1 1 1 NA NA Walder Frey M
## 2 2 1 NA NA Perra Royce F
@@ -398,11 +373,11 @@ Prepare and Validate Sex Codes
We use checkSex() to ensure that all individuals have
valid sex codes, repairing as needed. This is important for correct
pedigree plotting and downstream calculations.
-df_got <- checkSex(ASOIAF,
- code_male = 1,
- code_female = 0,
- verbose = FALSE, repair = TRUE
-)
+df_got <- checkSex(ASOIAF,
+ code_male = 1,
+ code_female = 0,
+ verbose = FALSE, repair = TRUE
+)
Locate Jon and Daenerys
@@ -451,22 +426,22 @@
Locate Jon and Daenerys
the
df_links dataframe where either ID1 or ID2 corresponds
to Jon Snow, and then filter again to find the row where the other ID
corresponds to Daenerys Targaryen.
-
# Find the IDs of Jon Snow and Daenerys Targaryen
-
-jon_id <- df_got %>%
- filter(name == "Jon Snow") %>%
- pull(ID)
-
-dany_id <- df_got %>%
- filter(name == "Daenerys Targaryen") %>%
- pull(ID)
+
# Find the IDs of Jon Snow and Daenerys Targaryen
+
+jon_id <- df_got %>%
+ filter(name == "Jon Snow") %>%
+ pull(ID)
+
+dany_id <- df_got %>%
+ filter(name == "Daenerys Targaryen") %>%
+ pull(ID)
We then filter the pairwise table to retrieve the row containing
their relationship.
-
jon_dany_row <- df_links %>%
- filter(ID1 == jon_id | ID2 == jon_id) %>%
- filter(ID1 %in% dany_id| ID2 %in% dany_id)
-
-jon_dany_row
+
jon_dany_row <- df_links %>%
+ filter(ID1 == jon_id | ID2 == jon_id) %>%
+ filter(ID1 %in% dany_id| ID2 %in% dany_id)
+
+jon_dany_row
## ID1 ID2 addRel cnuRel
## 1 206 211 0.31274414 0
## 2 211 304 0.01953125 0
@@ -481,14 +456,14 @@
Plotting the Pedigree with incomplete parental information
To facilitate plotting, we check for individuals with one known
parent but a missing other. For those cases, we assign a placeholder ID
to the missing parent.
-
df_repaired <- checkParentIDs(df_got,addphantoms=TRUE,
- repair=TRUE,
- parentswithoutrow=FALSE,
- repairsex=FALSE
- ) %>% mutate(fam=1,
- affected = case_when(ID %in% c(jon_id,dany_id) ~ 1,
- TRUE ~ 0)
- )
+
df_repaired <- checkParentIDs(df_got,addphantoms=TRUE,
+ repair=TRUE,
+ parentswithoutrow=FALSE,
+ repairsex=FALSE
+ ) %>% mutate(fam=1,
+ affected = case_when(ID %in% c(jon_id,dany_id) ~ 1,
+ TRUE ~ 0)
+ )
## REPAIR IN EARLY ALPHA
This code creates new IDs for individuals with one known parent and a
missing other. It checks if either momID or
@@ -498,9 +473,9 @@
Plotting the Pedigree with incomplete parental information
Visualize the Pedigree
-
#fixParents(id=df_got$ID, dadid=df_got$dadID, momid=df_got$momID, sex=df_got$sex, missid = NA)
-
-plotPedigree(df_repaired,affected=df_repaired$affected,verbose=FALSE)
+
#fixParents(id=df_got$ID, dadid=df_got$dadID, momid=df_got$momID, sex=df_got$sex, missid = NA)
+
+plotPedigree(df_repaired,affected=df_repaired$affected,verbose=FALSE)

## Did not plot the following people: 85 88 125 142 228 229 258 259 274 275 305 336 357 381 388 405 409 418 420 424 428 451 487
## named list()
From 9641a042e25ae46be36446baaddf076599deec83 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Fri, 11 Apr 2025 09:28:45 -0400
Subject: [PATCH 17/35] Delete assignCoupleIds.Rd
---
man/assignCoupleIds.Rd | 21 ---------------------
1 file changed, 21 deletions(-)
delete mode 100644 man/assignCoupleIds.Rd
diff --git a/man/assignCoupleIds.Rd b/man/assignCoupleIds.Rd
deleted file mode 100644
index 95165581..00000000
--- a/man/assignCoupleIds.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helpPedigree.R
-\name{assignCoupleIDs}
-\alias{assignCoupleIDs}
-\alias{assignCoupleIds}
-\title{Assign Couple IDs}
-\usage{
-assignCoupleIDs(df_Ngen)
-
-assignCoupleIds(df_Ngen)
-}
-\arguments{
-\item{df_Ngen}{The dataframe for the current generation, including columns for individual IDs and spouse IDs.}
-}
-\value{
-The input dataframe augmented with a 'coupleId' column, where each mated pair has a unique identifier.
-}
-\description{
-This subfunction assigns a unique couple ID to each mated pair in the generation.
-Unmated individuals are assigned NA for their couple ID.
-}
From 9525061de57fe2111b9cb6ae290bbc7d64a28ccf Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Fri, 11 Apr 2025 17:48:40 -0400
Subject: [PATCH 18/35] allow NAs to behave
---
R/checkParents.R | 27 ++++++++++++++++++++++-----
R/cleanPedigree.R | 4 ++--
2 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/R/checkParents.R b/R/checkParents.R
index b80669bf..7d142af1 100644
--- a/R/checkParents.R
+++ b/R/checkParents.R
@@ -145,7 +145,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
# Are any parents in both momID and dadID?
momdad <- intersect(ped$dadID, ped$momID)
- if (!is.na(momdad) && length(momdad) > 0) {
+ if (length(momdad) > 0&& !is.na(momdad)) {
validation_results$parents_in_both <- momdad
if (verbose) {
cat(paste(
@@ -185,21 +185,38 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
- if (!is.na(validation_results$female_var)) {
+ if (length(validation_results$female_var) > 0 && !is.na(validation_results$female_var)){
corrected_moms <- ped$ID[mom_indices[!is.na(mom_indices)]]
ped$sex[mom_indices[!is.na(mom_indices)]] <- validation_results$female_var
changes$corrected_mom_sex <- corrected_moms
if (verbose && length(corrected_moms) > 0) {
cat("Corrected sex of moms for:", paste(corrected_moms, collapse = ", "), "\n")
}
+ } else {
+ corrected_moms <- ped$ID[mom_indices[!is.na(mom_indices)]]
+ ped$sex[mom_indices[!is.na(mom_indices)]] <- 0
+
+ changes$corrected_mom_sex <- corrected_moms
+ if (verbose && length(corrected_moms) > 0) {
+ cat("Corrected sex of moms for:", paste(corrected_moms, collapse = ", "), "\n")
+ }
+
}
- if (!is.na(validation_results$male_var)) {
+ if (length(validation_results$male_var) > 0 && !is.na(validation_results$male_var)){
corrected_dads <- ped$ID[dad_indices[!is.na(dad_indices)]]
ped$sex[dad_indices[!is.na(dad_indices)]] <- validation_results$male_var
changes$corrected_dad_sex <- corrected_dads
if (verbose && length(corrected_dads) > 0) {
cat("Corrected sex of dads for:", paste(corrected_dads, collapse = ", "), "\n")
}
+ } else {
+ corrected_dads <- ped$ID[dad_indices[!is.na(dad_indices)]]
+ ped$sex[dad_indices[!is.na(dad_indices)]] <- 1
+ changes$corrected_dad_sex <- corrected_dads
+ if (verbose && length(corrected_dads) > 0) {
+ cat("Corrected sex of dads for:", paste(corrected_dads, collapse = ", "), "\n")
+ }
+
}
}
}
@@ -218,7 +235,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
new_entry$ID <- new_id
new_entry$dadID <- NA
new_entry$momID <- NA
- new_entry$sex <- validation_results$male_var
+ new_entry$sex <- if(length(validation_results$male_var) > 0 && !is.na(validation_results$male_var)) validation_results$male_var else 1
new_entries <- rbind(new_entries, new_entry)
}
@@ -231,7 +248,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
new_entry$ID <- new_id
new_entry$dadID <- NA
new_entry$momID <- NA
- new_entry$sex <- validation_results$female_var
+ new_entry$sex <- if(length(validation_results$female_var) > 0 && !is.na(validation_results$female_var)) validation_results$female_var else 0
new_entries <- rbind(new_entries, new_entry)
}
diff --git a/R/cleanPedigree.R b/R/cleanPedigree.R
index b03ca270..737b558c 100644
--- a/R/cleanPedigree.R
+++ b/R/cleanPedigree.R
@@ -16,9 +16,9 @@ standardizeColnames <- function(df, verbose = FALSE) {
"fam" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)",
"ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)",
"gen" = "^(?:gen(?:s|eration)?)",
- "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*)",
+ "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)",
"patID" = "^(?:dat[\\.\\-_]?id|pat[\\.\\-_]?id|paternal[\\.\\-_]?(?:id)?)",
- "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*)",
+ "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*|dame)",
"matID" = "^(?:mat[\\.\\-_]?id|maternal[\\.\\-_]?(?:id)?)",
"spID" = "^(?:s(?:pt)?id|spouse[\\.\\-_]?(?:id)?|partner[\\.\\-_]?(?:id)?|husb(?:and)?[\\.\\-_]?id|wife[\\.\\-_]?(?:id)?|pid[\\.\\-_]?spouse1?)",
"twinID" = "^(?:twin[\\.\\-_]?(?:id)?)",
From c18dbd758587b62d5a6af29422cd57795006ce95 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Fri, 11 Apr 2025 17:48:40 -0400
Subject: [PATCH 19/35] allow NAs to behave
---
R/checkParents.R | 27 ++++++++++++++++++++++-----
R/cleanPedigree.R | 4 ++--
2 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/R/checkParents.R b/R/checkParents.R
index b80669bf..7d142af1 100644
--- a/R/checkParents.R
+++ b/R/checkParents.R
@@ -145,7 +145,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
# Are any parents in both momID and dadID?
momdad <- intersect(ped$dadID, ped$momID)
- if (!is.na(momdad) && length(momdad) > 0) {
+ if (length(momdad) > 0&& !is.na(momdad)) {
validation_results$parents_in_both <- momdad
if (verbose) {
cat(paste(
@@ -185,21 +185,38 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
- if (!is.na(validation_results$female_var)) {
+ if (length(validation_results$female_var) > 0 && !is.na(validation_results$female_var)){
corrected_moms <- ped$ID[mom_indices[!is.na(mom_indices)]]
ped$sex[mom_indices[!is.na(mom_indices)]] <- validation_results$female_var
changes$corrected_mom_sex <- corrected_moms
if (verbose && length(corrected_moms) > 0) {
cat("Corrected sex of moms for:", paste(corrected_moms, collapse = ", "), "\n")
}
+ } else {
+ corrected_moms <- ped$ID[mom_indices[!is.na(mom_indices)]]
+ ped$sex[mom_indices[!is.na(mom_indices)]] <- 0
+
+ changes$corrected_mom_sex <- corrected_moms
+ if (verbose && length(corrected_moms) > 0) {
+ cat("Corrected sex of moms for:", paste(corrected_moms, collapse = ", "), "\n")
+ }
+
}
- if (!is.na(validation_results$male_var)) {
+ if (length(validation_results$male_var) > 0 && !is.na(validation_results$male_var)){
corrected_dads <- ped$ID[dad_indices[!is.na(dad_indices)]]
ped$sex[dad_indices[!is.na(dad_indices)]] <- validation_results$male_var
changes$corrected_dad_sex <- corrected_dads
if (verbose && length(corrected_dads) > 0) {
cat("Corrected sex of dads for:", paste(corrected_dads, collapse = ", "), "\n")
}
+ } else {
+ corrected_dads <- ped$ID[dad_indices[!is.na(dad_indices)]]
+ ped$sex[dad_indices[!is.na(dad_indices)]] <- 1
+ changes$corrected_dad_sex <- corrected_dads
+ if (verbose && length(corrected_dads) > 0) {
+ cat("Corrected sex of dads for:", paste(corrected_dads, collapse = ", "), "\n")
+ }
+
}
}
}
@@ -218,7 +235,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
new_entry$ID <- new_id
new_entry$dadID <- NA
new_entry$momID <- NA
- new_entry$sex <- validation_results$male_var
+ new_entry$sex <- if(length(validation_results$male_var) > 0 && !is.na(validation_results$male_var)) validation_results$male_var else 1
new_entries <- rbind(new_entries, new_entry)
}
@@ -231,7 +248,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
new_entry$ID <- new_id
new_entry$dadID <- NA
new_entry$momID <- NA
- new_entry$sex <- validation_results$female_var
+ new_entry$sex <- if(length(validation_results$female_var) > 0 && !is.na(validation_results$female_var)) validation_results$female_var else 0
new_entries <- rbind(new_entries, new_entry)
}
diff --git a/R/cleanPedigree.R b/R/cleanPedigree.R
index b03ca270..737b558c 100644
--- a/R/cleanPedigree.R
+++ b/R/cleanPedigree.R
@@ -16,9 +16,9 @@ standardizeColnames <- function(df, verbose = FALSE) {
"fam" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)",
"ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)",
"gen" = "^(?:gen(?:s|eration)?)",
- "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*)",
+ "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)",
"patID" = "^(?:dat[\\.\\-_]?id|pat[\\.\\-_]?id|paternal[\\.\\-_]?(?:id)?)",
- "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*)",
+ "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*|dame)",
"matID" = "^(?:mat[\\.\\-_]?id|maternal[\\.\\-_]?(?:id)?)",
"spID" = "^(?:s(?:pt)?id|spouse[\\.\\-_]?(?:id)?|partner[\\.\\-_]?(?:id)?|husb(?:and)?[\\.\\-_]?id|wife[\\.\\-_]?(?:id)?|pid[\\.\\-_]?spouse1?)",
"twinID" = "^(?:twin[\\.\\-_]?(?:id)?)",
From 31f9a42777d5f2b72f4fd0b4a9422c9f92153c2f Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Sat, 12 Apr 2025 16:21:24 -0400
Subject: [PATCH 20/35] nicer plots
---
man/assignCoupleIDs.Rd | 21 +++
vignettes/partial.Rmd | 143 +++++++++++-------
vignettes/partial.html | 330 ++++++++++++++++++++++++++++-------------
3 files changed, 341 insertions(+), 153 deletions(-)
create mode 100644 man/assignCoupleIDs.Rd
diff --git a/man/assignCoupleIDs.Rd b/man/assignCoupleIDs.Rd
new file mode 100644
index 00000000..95165581
--- /dev/null
+++ b/man/assignCoupleIDs.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpPedigree.R
+\name{assignCoupleIDs}
+\alias{assignCoupleIDs}
+\alias{assignCoupleIds}
+\title{Assign Couple IDs}
+\usage{
+assignCoupleIDs(df_Ngen)
+
+assignCoupleIds(df_Ngen)
+}
+\arguments{
+\item{df_Ngen}{The dataframe for the current generation, including columns for individual IDs and spouse IDs.}
+}
+\value{
+The input dataframe augmented with a 'coupleId' column, where each mated pair has a unique identifier.
+}
+\description{
+This subfunction assigns a unique couple ID to each mated pair in the generation.
+Unmated individuals are assigned NA for their couple ID.
+}
diff --git a/vignettes/partial.Rmd b/vignettes/partial.Rmd
index 45fb7ce6..52a9e45e 100644
--- a/vignettes/partial.Rmd
+++ b/vignettes/partial.Rmd
@@ -49,11 +49,13 @@ We compute the additive genetic relationship matrix using both the classic and p
ped_add_partial_complete <- ped2com(df,
isChild_method = "partialparent",
component = "additive",
- adjacency_method = "direct"
+ adjacency_method = "direct",
+ sparse = FALSE
)
ped_add_classic_complete <- ped2com(df,
isChild_method = "classic",
- component = "additive", adjacency_method = "direct"
+ component = "additive", adjacency_method = "direct",
+ sparse = FALSE
)
```
@@ -67,23 +69,29 @@ library(corrplot)
corrplot(as.matrix(ped_add_classic_complete),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Additive component - Classic method"
-)
+ is.corr = FALSE, title = "Additive component - Classic method",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
corrplot(as.matrix(ped_add_partial_complete),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Additive component - Partial parent method"
-)
+ is.corr = FALSE, title = "Additive component - Partial parent method",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
```
To verify this, we subtract one matrix from the other and calculate RMSE. The difference should be numerically zero. Indeed, it is `r sqrt(mean((ped_add_classic_complete-ped_add_partial_complete)^2))`.
-```{r}
-corrplot(as.matrix(ped_add_classic_complete - ped_add_partial_complete),
+```{r,warning=FALSE}
+corrplot((as.matrix(ped_add_classic_complete) - as.matrix(ped_add_partial_complete)),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE
-)
+ is.corr = FALSE, order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
```
@@ -101,12 +109,14 @@ df$momID[df$ID == 4] <- NA
ped_add_partial_mom <- ped_add_partial <- ped2com(df,
isChild_method = "partialparent",
component = "additive",
- adjacency_method = "direct"
+ adjacency_method = "direct",
+ sparse = FALSE
)
ped_add_classic_mom <- ped_add_classic <- ped2com(df,
isChild_method = "classic",
- component = "additive", adjacency_method = "direct"
+ component = "additive", adjacency_method = "direct",
+ sparse = FALSE
)
```
@@ -119,30 +129,35 @@ The resulting additive matrices reflect this difference. The RMSE between the tw
```{r}
corrplot(as.matrix(ped_add_classic),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Classic (mother removed)"
-)
-
+ is.corr = FALSE, title = "Classic (mother removed)",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
corrplot(as.matrix(ped_add_partial),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Partial (mother removed)"
-)
+ is.corr = FALSE, title = "Partial (mother removed)",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
```
We quantify the overall matrix difference:
```{r}
-sqrt(mean((ped_add_classic - ped_add_partial)^2))
+sqrt(mean((as.matrix(ped_add_classic) - as.matrix(ped_add_partial))^2))
```
Next, we compare each method to the matrix from the complete pedigree. This evaluates how much each method deviates from the correct additive structure.
```{r}
-corrplot(as.matrix(ped_add_classic_complete - ped_add_classic),
+corrplot(as.matrix(ped_add_classic_complete) - as.matrix(ped_add_classic),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE
-)
+ is.corr = FALSE,
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
sqrt(mean((ped_add_classic_complete - ped_add_classic)^2))
```
@@ -152,8 +167,10 @@ The RMSE between the true additive component and the classic method is `r sqrt(m
```{r}
corrplot(as.matrix(ped_add_classic_complete - ped_add_partial),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE
-)
+ is.corr = FALSE,
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
sqrt(mean((ped_add_classic_complete - ped_add_partial)^2))
```
@@ -190,18 +207,22 @@ ped_add_classic_dad <- ped_add_classic <- ped2com(df,
```
-As we can see, the two matrices are different. The RMSE between the two matrices is `r sqrt(mean((ped_add_classic-ped_add_partial)^2))`.
+As we can see, the two matrices are different. The RMSE between the two matrices is `r sqrt(mean((as.matrix(ped_add_classic)-as.matrix(ped_add_partial))^2))`.
```{r}
corrplot(as.matrix(ped_add_classic_dad),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Classic (father removed)"
-)
+ is.corr = FALSE, title = "Classic (father removed)",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
corrplot(as.matrix(ped_add_partial_dad),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Partial (father removed)"
-)
+ is.corr = FALSE, title = "Partial (father removed)",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
```
Again, we compare to the true matrix from the complete pedigree:
@@ -210,8 +231,10 @@ Again, we compare to the true matrix from the complete pedigree:
```{r}
corrplot(as.matrix(ped_add_classic_complete - ped_add_classic),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE
-)
+ is.corr = FALSE,
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
sqrt(mean((ped_add_classic_complete - ped_add_classic)^2))
```
@@ -220,8 +243,10 @@ sqrt(mean((ped_add_classic_complete - ped_add_classic)^2))
```{r}
corrplot(as.matrix(ped_add_classic_complete - ped_add_partial),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE
-)
+ is.corr = FALSE,
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
sqrt(mean((ped_add_classic_complete - ped_add_partial)^2))
```
@@ -362,28 +387,38 @@ fam1 <- inbreeding_list[[1]]
corrplot(as.matrix(fam1$ped_add_classic_complete),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Classic - Complete"
-)
+ is.corr = FALSE, title = "Classic - Complete",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
corrplot(as.matrix(fam1$ped_add_classic_mom),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Classic - Mom Missing"
-)
+ is.corr = FALSE, title = "Classic - Mom Missing",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
corrplot(as.matrix(fam1$ped_add_partial_mom),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Partial - Mom Missing"
-)
+ is.corr = FALSE, title = "Partial - Mom Missing",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
corrplot(as.matrix(fam1$ped_add_classic_dad),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Classic - Dad Missing"
-)
+ is.corr = FALSE, title = "Classic - Dad Missing",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
corrplot(as.matrix(fam1$ped_add_partial_dad),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Partial - Dad Missing"
-)
+ is.corr = FALSE, title = "Partial - Dad Missing",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
```
@@ -392,23 +427,31 @@ To visualize the differences from the true matrix:
```{r}
corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_classic_mom),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Classic Mom Diff from Complete"
-)
+ is.corr = FALSE, title = "Classic Mom Diff from Complete",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_partial_mom),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Partial Mom Diff from Complete"
-)
+ is.corr = FALSE, title = "Partial Mom Diff from Complete",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_classic_dad),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Classic Dad Diff from Complete"
-)
+ is.corr = FALSE, title = "Classic Dad Diff from Complete",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_partial_dad),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Partial Dad Diff from Complete"
-)
+ is.corr = FALSE, title = "Partial Dad Diff from Complete",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
```
These plots show how each method responds to missing data, and whether it maintains consistency with the complete pedigree. We observe that the partial parent method typically introduces smaller deviations. If desired, this same diagnostic can be repeated for additional families, such as inbreeding_list[[2]].
diff --git a/vignettes/partial.html b/vignettes/partial.html
index aaa1fbec..9a2e5535 100644
--- a/vignettes/partial.html
+++ b/vignettes/partial.html
@@ -377,12 +377,14 @@ Hazard Data Example
ped_add_partial_complete <- ped2com(df,
isChild_method = "partialparent",
component = "additive",
- adjacency_method = "direct"
-)
-ped_add_classic_complete <- ped2com(df,
- isChild_method = "classic",
- component = "additive", adjacency_method = "direct"
-)
+ adjacency_method = "direct",
+ sparse = FALSE
+)
+ped_add_classic_complete <- ped2com(df,
+ isChild_method = "classic",
+ component = "additive", adjacency_method = "direct",
+ sparse = FALSE
+)
The following plots display the full additive matrices. These
matrices should be identical.
This can be confirmed visually and numerically.
@@ -392,25 +394,28 @@ Hazard Data Example
corrplot(as.matrix(ped_add_classic_complete),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Additive component - Classic method"
-)
-
+ is.corr = FALSE, title = "Additive component - Classic method",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
-corrplot(as.matrix(ped_add_partial_complete),
- method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Additive component - Partial parent method"
-)
-
+
+corrplot(as.matrix(ped_add_partial_complete),
+ method = "color", type = "lower", col.lim = c(0, 1),
+ is.corr = FALSE, title = "Additive component - Partial parent method",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
To verify this, we subtract one matrix from the other and calculate
RMSE. The difference should be numerically zero. Indeed, it is 0.
-corrplot(as.matrix(ped_add_classic_complete - ped_add_partial_complete),
+corrplot((as.matrix(ped_add_classic_complete) - as.matrix(ped_add_partial_complete)),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE
-)
-#> Warning in corrplot(as.matrix(ped_add_classic_complete -
-#> ped_add_partial_complete), : col.lim interval too wide, please set a suitable
-#> value
-
+ is.corr = FALSE, order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
Introducing Missingness: Remove a Parent
@@ -420,13 +425,15 @@
Introducing Missingness: Remove a Parent
ped_add_partial_mom <- ped_add_partial <- ped2com(df,
isChild_method = "partialparent",
component = "additive",
- adjacency_method = "direct"
-)
-
-ped_add_classic_mom <- ped_add_classic <- ped2com(df,
- isChild_method = "classic",
- component = "additive", adjacency_method = "direct"
-)
+
adjacency_method = "direct",
+
sparse = FALSE
+
)
+
+
ped_add_classic_mom <- ped_add_classic <- ped2com(df,
+
isChild_method = "classic",
+
component = "additive", adjacency_method = "direct",
+
sparse = FALSE
+
)
The two methods now treat individual 4 differently in the parent
adjacency matrix. The classic method applies a fixed contribution
because one parent remains. The partial parent method inflates the
@@ -436,26 +443,31 @@
Introducing Missingness: Remove a Parent
between the two matrices is 0.009811.
corrplot(as.matrix(ped_add_classic),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Classic (mother removed)"
-)
-
-
-corrplot(as.matrix(ped_add_partial),
- method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Partial (mother removed)"
-)
-
+ is.corr = FALSE, title = "Classic (mother removed)",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
+corrplot(as.matrix(ped_add_partial),
+ method = "color", type = "lower", col.lim = c(0, 1),
+ is.corr = FALSE, title = "Partial (mother removed)",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
We quantify the overall matrix difference:
-sqrt(mean((ped_add_classic - ped_add_partial)^2))
+sqrt(mean((as.matrix(ped_add_classic) - as.matrix(ped_add_partial))^2))
#> [1] 0.009811047
Next, we compare each method to the matrix from the complete
pedigree. This evaluates how much each method deviates from the correct
additive structure.
-corrplot(as.matrix(ped_add_classic_complete - ped_add_classic),
+corrplot(as.matrix(ped_add_classic_complete) - as.matrix(ped_add_classic),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE
-)
-
+ is.corr = FALSE,
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
sqrt(mean((ped_add_classic_complete - ped_add_classic)^2))
#> [1] 0.02991371
@@ -463,9 +475,11 @@ Introducing Missingness: Remove a Parent
is 0.0299137.
corrplot(as.matrix(ped_add_classic_complete - ped_add_partial),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE
-)
-
+ is.corr = FALSE,
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
sqrt(mean((ped_add_classic_complete - ped_add_partial)^2))
#> [1] 0.02825904
@@ -499,32 +513,44 @@ Removing the Father Instead
two matrices is 0.009811.
corrplot(as.matrix(ped_add_classic_dad),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Classic (father removed)"
-)
-
+ is.corr = FALSE, title = "Classic (father removed)",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
corrplot(as.matrix(ped_add_partial_dad),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Partial (father removed)"
-)
-
+ is.corr = FALSE, title = "Partial (father removed)",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
Again, we compare to the true matrix from the complete pedigree:
corrplot(as.matrix(ped_add_classic_complete - ped_add_classic),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE
-)
-
+ is.corr = FALSE,
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
sqrt(mean((ped_add_classic_complete - ped_add_classic)^2))
-#> [1] 0.02991371
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic)^2):
+#> argument is not numeric or logical: returning NA
+#> [1] NA
corrplot(as.matrix(ped_add_classic_complete - ped_add_partial),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE
-)
-
+ is.corr = FALSE,
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
sqrt(mean((ped_add_classic_complete - ped_add_partial)^2))
-#> [1] 0.02825904
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial)^2):
+#> argument is not numeric or logical: returning NA
+#> [1] NA
The partial parent method again yields a matrix closer to the
full-data version.
@@ -638,7 +664,85 @@ Inbreeding Dataset: Family-Level Evaluation
ped_add_partial_mom = ped_add_partial_mom,
ped_add_classic_mom = ped_add_classic_mom
)
-}
+}
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2):
+#> argument is not numeric or logical: returning NA
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2):
+#> argument is not numeric or logical: returning NA
Example: Family 1
To understand what these matrices look like, we visualize them for
@@ -651,57 +755,75 @@
Example: Family 1
corrplot(as.matrix(fam1$ped_add_classic_complete),
method = "color", type = "lower", col.lim = c(0, 1),
-
is.corr = FALSE, title = "Classic - Complete"
-
)
-
+ is.corr = FALSE, title = "Classic - Complete",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
corrplot(as.matrix(fam1$ped_add_classic_mom),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Classic - Mom Missing"
-)
-
+ is.corr = FALSE, title = "Classic - Mom Missing",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
corrplot(as.matrix(fam1$ped_add_partial_mom),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Partial - Mom Missing"
-)
-
+ is.corr = FALSE, title = "Partial - Mom Missing",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
corrplot(as.matrix(fam1$ped_add_classic_dad),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Classic - Dad Missing"
-)
-
+ is.corr = FALSE, title = "Classic - Dad Missing",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
corrplot(as.matrix(fam1$ped_add_partial_dad),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Partial - Dad Missing"
-)
-
+ is.corr = FALSE, title = "Partial - Dad Missing",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
To visualize the differences from the true matrix:
corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_classic_mom),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Classic Mom Diff from Complete"
-)
-
+ is.corr = FALSE, title = "Classic Mom Diff from Complete",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_partial_mom),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Partial Mom Diff from Complete"
-)
-
+ is.corr = FALSE, title = "Partial Mom Diff from Complete",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_classic_dad),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Classic Dad Diff from Complete"
-)
-
+ is.corr = FALSE, title = "Classic Dad Diff from Complete",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_partial_dad),
method = "color", type = "lower", col.lim = c(0, 1),
- is.corr = FALSE, title = "Partial Dad Diff from Complete"
-)
-
+ is.corr = FALSE, title = "Partial Dad Diff from Complete",
+ order = "hclust",
+ tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+ col = COL1('Reds', 100), mar=c(0,0,2,0))
+
These plots show how each method responds to missing data, and
whether it maintains consistency with the complete pedigree. We observe
that the partial parent method typically introduces smaller deviations.
@@ -728,13 +850,14 @@
Summary
results %>%
select(RMSE_diff_mom, RMSE_diff_dad) %>%
summary()
-#> RMSE_diff_mom RMSE_diff_dad
-#> Min. :0.001222 Min. :0.001222
-#> 1st Qu.:0.001869 1st Qu.:0.002036
-#> Median :0.002538 Median :0.002520
-#> Mean :0.005763 Mean :0.005786
-#> 3rd Qu.:0.005625 3rd Qu.:0.005625
-#> Max. :0.024221 Max. :0.024221
+#> RMSE_diff_mom RMSE_diff_dad
+#> Min. : NA Min. : NA
+#> 1st Qu.: NA 1st Qu.: NA
+#> Median : NA Median : NA
+#> Mean :NaN Mean :NaN
+#> 3rd Qu.: NA 3rd Qu.: NA
+#> Max. : NA Max. : NA
+#> NA's :8 NA's :8
In all families, both RMSE_diff_mom and
RMSE_diff_dad are positive—indicating that the classic
method produces larger the errors relative to the partial method. This
@@ -742,9 +865,9 @@
Summary
father.
To verify this directly:
mean(results$RMSE_diff_mom > 0, na.rm = TRUE)
-#> [1] 1
+#> [1] NaN
mean(results$RMSE_diff_dad > 0, na.rm = TRUE)
-#> [1] 1
+#> [1] NaN
These proportions show how often the partial method produces a lower
RMSE across the dataset. This confirms the earlier findings: when
pedigree data are incomplete, the partial parent method more faithfully
@@ -756,13 +879,14 @@
Summary
-max_R_partial_dad, -max_R_classic_mom, -max_R_partial_mom, -max_R_classic
) %>%
summary()
-#> RMSE_partial_dad RMSE_partial_mom RMSE_classic_dad RMSE_classic_mom
-#> Min. :0.04773 Min. :0.04773 Min. :0.04895 Min. :0.04895
-#> 1st Qu.:0.05570 1st Qu.:0.05349 1st Qu.:0.05774 1st Qu.:0.05555
-#> Median :0.06206 Median :0.06899 Median :0.06457 Median :0.07158
-#> Mean :0.07545 Mean :0.07686 Mean :0.08124 Mean :0.08262
-#> 3rd Qu.:0.08237 3rd Qu.:0.08323 3rd Qu.:0.08866 3rd Qu.:0.08866
-#> Max. :0.15547 Max. :0.15547 Max. :0.17969 Max. :0.17969
+#> RMSE_partial_dad RMSE_partial_mom RMSE_classic_dad RMSE_classic_mom
+#> Min. : NA Min. : NA Min. : NA Min. : NA
+#> 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA
+#> Median : NA Median : NA Median : NA Median : NA
+#> Mean :NaN Mean :NaN Mean :NaN Mean :NaN
+#> 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA
+#> Max. : NA Max. : NA Max. : NA Max. : NA
+#> NA's :8 NA's :8 NA's :8 NA's :8
From c7320ebcf519d0b870d3a3fbbee3013f6bb07956 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Sun, 13 Apr 2025 15:05:39 -0400
Subject: [PATCH 21/35] more tests
---
R/readGedcom.R | 7 +++++
R/summarizePedigree.R | 22 +++++++++++++--
tests/testthat/test-summarizePedigrees.R | 34 ++++++++++++++++++++++--
3 files changed, 59 insertions(+), 4 deletions(-)
diff --git a/R/readGedcom.R b/R/readGedcom.R
index f9066c91..c0eea803 100644
--- a/R/readGedcom.R
+++ b/R/readGedcom.R
@@ -587,3 +587,10 @@ collapseNames <- function(verbose, df_temp) {
}
return(df_temp)
}
+#' @rdname readGedcom
+#' @export
+readGed <- readGedcom
+
+#' @rdname readGedcom
+#' @export
+readgedcom <- readGedcom
diff --git a/R/summarizePedigree.R b/R/summarizePedigree.R
index ea8f81e8..6cee4e5e 100644
--- a/R/summarizePedigree.R
+++ b/R/summarizePedigree.R
@@ -235,12 +235,11 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
)
}
}
-
return(output)
}
-# Function to calculate summary statistics for all numeric variables
+#' Function to calculate summary statistics for all numeric variables
#' This function calculates summary statistics for all numeric variables in a data.table. It is supposed to be used internally by the \code{summarize_pedigree} function.
#' @inheritParams summarizePedigrees
#' @param data A data.table containing the pedigree data.
@@ -352,6 +351,9 @@ summarizeMatrilines <- function(ped, famID = "famID", personID = "ID",
)
}
+
+
+
#' Summarize the paternal lines in a pedigree
#' @inheritParams summarizePedigrees
#' @seealso [summarizePedigrees ()]
@@ -432,3 +434,19 @@ findBiggest <- function(foo_summary_dt, nbiggest, n_foo) {
)])
return(biggest_foo)
}
+
+#' @rdname summarizePedigrees
+#' @export
+summarisePedigrees <- summarizePedigrees
+
+#' @rdname summarizeFamilies
+#' @export
+summariseFamilies <- summarizeFamilies
+
+#' @rdname summarizeMatrilines
+#' @export
+summariseMatrilines <- summarizeMatrilines
+
+#' @rdname summarizePatrilines
+#' @export
+summarisePatrilines <- summarizePatrilines
diff --git a/tests/testthat/test-summarizePedigrees.R b/tests/testthat/test-summarizePedigrees.R
index 5bc1d6ea..3fd62464 100644
--- a/tests/testthat/test-summarizePedigrees.R
+++ b/tests/testthat/test-summarizePedigrees.R
@@ -7,7 +7,13 @@ test_that("Counts the correct number people", {
expect_equal(result_observed, result_expected)
})
-
+# Test: SummarizeFamilies is used when SummariseFamilies
+test_that("SummarizeFamilies works like SummariseFamilies", {
+ df <- ped2fam(potter, famID = "newFamID", personID = "personID")
+ df_summarized <- summarizeFamilies(df, famID = "newFamID", personID = "personID")
+ df_summarised <- summariseFamilies(df, famID = "newFamID", personID = "personID")
+ expect_equal(df_summarised, df_summarized)
+})
# Test Case 2: Multiple families
test_that("summarizeFamilies() works with multiple families", {
df <- ped2fam(inbreeding, famID = "newFamID", personID = "ID")
@@ -72,7 +78,13 @@ test_that("summarizeMatrilines() works", {
result_observed <- nrow(df_summarized$biggest_maternal)
expect_equal(result_observed, nbiggest)
})
-
+# Test: SummarizeMatrilines is used when SummariseMatrilines
+test_that("SummarizeMatrilines works like SummariseMatrilines", {
+ df <- ped2fam(potter, famID = "newFamID", personID = "personID")
+ df_summarized <- summarizeMatrilines(df, famID = "newFamID", personID = "personID")
+ df_summarised <- summariseMatrilines(df, famID = "newFamID", personID = "personID")
+ expect_equal(df_summarised, df_summarized)
+})
# Test Case 5: Does this function work for summarizePatrilines
test_that("summarizePatrilines() works", {
nbiggest <- 4
@@ -98,6 +110,13 @@ test_that("summarizePatrilines() works", {
expect_equal(result_observed, nbiggest)
})
+# Test: summarizePatrilines is used when SummarisePatrilines
+test_that("summarizePatrilines works like SummarisePatrilines", {
+ df <- ped2fam(potter, famID = "newFamID", personID = "personID")
+ df_summarized <- summarizePatrilines(df, famID = "newFamID", personID = "personID")
+ df_summarised <- summarisePatrilines(df, famID = "newFamID", personID = "personID")
+ expect_equal(df_summarised, df_summarized)
+})
# Test Case 6: Handling of missing values in critical columns
test_that("summarizePedigrees() handles missing values correctly", {
df <- data.frame(
@@ -137,6 +156,9 @@ test_that("summarizePedigrees() throws error on invalid column names", {
expect_error(summarizePedigrees(df, byr = "unknown_column"))
})
+
+
+
# Test Case 9: Handling empty dataset
# test_that("summarizePedigrees() handles empty dataset gracefully", {
# df <- data.frame(ID = integer(), momID = integer(), dadID = integer(), famID = integer())
@@ -155,3 +177,11 @@ test_that("summarizePedigrees() works for single-entry pedigree", {
expect_equal(nrow(df_summarized$family_summary), 1)
expect_equal(df_summarized$oldest_families$byr_mean, 1920)
})
+
+# Test: summarizePedigrees is used when SummarisePedigrees
+test_that("SummarizePedigrees works like SummarisePedigrees", {
+ df <- ped2fam(potter, famID = "newFamID", personID = "personID")
+ df_summarized <- summarizePedigrees(df, famID = "newFamID", personID = "personID")
+ df_summarised <- summarisePedigrees(df, famID = "newFamID", personID = "personID")
+ expect_equal(df_summarised, df_summarized)
+})
From f45712fa7d116e6e37e60290d46a375d09ba9b7f Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Sun, 13 Apr 2025 15:07:10 -0400
Subject: [PATCH 22/35] aliases
---
NAMESPACE | 6 ++++++
man/calculateSummaryDT.Rd | 4 +++-
man/readGedcom.Rd | 24 ++++++++++++++++++++++++
man/summarizeFamilies.Rd | 19 +++++++++++++++++++
man/summarizeMatrilines.Rd | 19 +++++++++++++++++++
man/summarizePatrilines.Rd | 19 +++++++++++++++++++
man/summarizePedigrees.Rd | 21 +++++++++++++++++++++
7 files changed, 111 insertions(+), 1 deletion(-)
diff --git a/NAMESPACE b/NAMESPACE
index 972282d4..1f9bacee 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -33,8 +33,10 @@ export(ped2maternal)
export(ped2mit)
export(ped2paternal)
export(plotPedigree)
+export(readGed)
export(readGedcom)
export(readWikifamilytree)
+export(readgedcom)
export(recodeSex)
export(related_coef)
export(relatedness)
@@ -42,6 +44,10 @@ export(repairSex)
export(resample)
export(simulatePedigree)
export(sizeAllGens)
+export(summariseFamilies)
+export(summariseMatrilines)
+export(summarisePatrilines)
+export(summarisePedigrees)
export(summarizeFamilies)
export(summarizeMatrilines)
export(summarizePatrilines)
diff --git a/man/calculateSummaryDT.Rd b/man/calculateSummaryDT.Rd
index 45e67d3f..cb403b04 100644
--- a/man/calculateSummaryDT.Rd
+++ b/man/calculateSummaryDT.Rd
@@ -2,7 +2,8 @@
% Please edit documentation in R/summarizePedigree.R
\name{calculateSummaryDT}
\alias{calculateSummaryDT}
-\title{This function calculates summary statistics for all numeric variables in a data.table. It is supposed to be used internally by the \code{summarize_pedigree} function.}
+\title{Function to calculate summary statistics for all numeric variables
+This function calculates summary statistics for all numeric variables in a data.table. It is supposed to be used internally by the \code{summarize_pedigree} function.}
\usage{
calculateSummaryDT(data, group_var, skip_var, five_num_summary = FALSE)
}
@@ -20,6 +21,7 @@ the minimum, median, and maximum values.}
A data.table containing the summary statistics for all numeric variables.
}
\description{
+Function to calculate summary statistics for all numeric variables
This function calculates summary statistics for all numeric variables in a data.table. It is supposed to be used internally by the \code{summarize_pedigree} function.
}
\keyword{internal}
diff --git a/man/readGedcom.Rd b/man/readGedcom.Rd
index fdb158e1..7bab49b1 100644
--- a/man/readGedcom.Rd
+++ b/man/readGedcom.Rd
@@ -2,6 +2,8 @@
% Please edit documentation in R/readGedcom.R
\name{readGedcom}
\alias{readGedcom}
+\alias{readGed}
+\alias{readgedcom}
\title{Read a GEDCOM File}
\usage{
readGedcom(
@@ -14,6 +16,28 @@ readGedcom(
update_rate = 1000,
...
)
+
+readGed(
+ file_path,
+ verbose = FALSE,
+ add_parents = TRUE,
+ remove_empty_cols = TRUE,
+ combine_cols = TRUE,
+ skinny = FALSE,
+ update_rate = 1000,
+ ...
+)
+
+readgedcom(
+ file_path,
+ verbose = FALSE,
+ add_parents = TRUE,
+ remove_empty_cols = TRUE,
+ combine_cols = TRUE,
+ skinny = FALSE,
+ update_rate = 1000,
+ ...
+)
}
\arguments{
\item{file_path}{The path to the GEDCOM file.}
diff --git a/man/summarizeFamilies.Rd b/man/summarizeFamilies.Rd
index 8903eb93..2cb50761 100644
--- a/man/summarizeFamilies.Rd
+++ b/man/summarizeFamilies.Rd
@@ -2,6 +2,7 @@
% Please edit documentation in R/summarizePedigree.R
\name{summarizeFamilies}
\alias{summarizeFamilies}
+\alias{summariseFamilies}
\title{Summarize the families in a pedigree}
\usage{
summarizeFamilies(
@@ -21,6 +22,24 @@ summarizeFamilies(
five_num_summary = FALSE,
verbose = FALSE
)
+
+summariseFamilies(
+ ped,
+ famID = "famID",
+ personID = "ID",
+ momID = "momID",
+ dadID = "dadID",
+ matID = "matID",
+ patID = "patID",
+ byr = NULL,
+ founder_sort_var = NULL,
+ include_founder = FALSE,
+ nbiggest = 5,
+ noldest = 5,
+ skip_var = NULL,
+ five_num_summary = FALSE,
+ verbose = FALSE
+)
}
\arguments{
\item{ped}{a pedigree dataset. Needs ID, momID, and dadID columns}
diff --git a/man/summarizeMatrilines.Rd b/man/summarizeMatrilines.Rd
index 2890b622..577204f6 100644
--- a/man/summarizeMatrilines.Rd
+++ b/man/summarizeMatrilines.Rd
@@ -2,6 +2,7 @@
% Please edit documentation in R/summarizePedigree.R
\name{summarizeMatrilines}
\alias{summarizeMatrilines}
+\alias{summariseMatrilines}
\title{Summarize the maternal lines in a pedigree}
\usage{
summarizeMatrilines(
@@ -21,6 +22,24 @@ summarizeMatrilines(
five_num_summary = FALSE,
verbose = FALSE
)
+
+summariseMatrilines(
+ ped,
+ famID = "famID",
+ personID = "ID",
+ momID = "momID",
+ dadID = "dadID",
+ matID = "matID",
+ patID = "patID",
+ byr = NULL,
+ include_founder = FALSE,
+ founder_sort_var = NULL,
+ nbiggest = 5,
+ noldest = 5,
+ skip_var = NULL,
+ five_num_summary = FALSE,
+ verbose = FALSE
+)
}
\arguments{
\item{ped}{a pedigree dataset. Needs ID, momID, and dadID columns}
diff --git a/man/summarizePatrilines.Rd b/man/summarizePatrilines.Rd
index aed89bcd..27fd9494 100644
--- a/man/summarizePatrilines.Rd
+++ b/man/summarizePatrilines.Rd
@@ -2,6 +2,7 @@
% Please edit documentation in R/summarizePedigree.R
\name{summarizePatrilines}
\alias{summarizePatrilines}
+\alias{summarisePatrilines}
\title{Summarize the paternal lines in a pedigree}
\usage{
summarizePatrilines(
@@ -21,6 +22,24 @@ summarizePatrilines(
five_num_summary = FALSE,
verbose = FALSE
)
+
+summarisePatrilines(
+ ped,
+ famID = "famID",
+ personID = "ID",
+ momID = "momID",
+ dadID = "dadID",
+ matID = "matID",
+ patID = "patID",
+ byr = NULL,
+ founder_sort_var = NULL,
+ include_founder = FALSE,
+ nbiggest = 5,
+ noldest = 5,
+ skip_var = NULL,
+ five_num_summary = FALSE,
+ verbose = FALSE
+)
}
\arguments{
\item{ped}{a pedigree dataset. Needs ID, momID, and dadID columns}
diff --git a/man/summarizePedigrees.Rd b/man/summarizePedigrees.Rd
index 3ed2f0a5..a4f6a6e9 100644
--- a/man/summarizePedigrees.Rd
+++ b/man/summarizePedigrees.Rd
@@ -2,6 +2,7 @@
% Please edit documentation in R/summarizePedigree.R
\name{summarizePedigrees}
\alias{summarizePedigrees}
+\alias{summarisePedigrees}
\title{Summarize Pedigree Data}
\usage{
summarizePedigrees(
@@ -23,6 +24,26 @@ summarizePedigrees(
network_checks = FALSE,
verbose = FALSE
)
+
+summarisePedigrees(
+ ped,
+ famID = "famID",
+ personID = "ID",
+ momID = "momID",
+ dadID = "dadID",
+ matID = "matID",
+ patID = "patID",
+ type = c("fathers", "mothers", "families"),
+ byr = NULL,
+ include_founder = FALSE,
+ founder_sort_var = NULL,
+ nbiggest = 5,
+ noldest = nbiggest,
+ skip_var = NULL,
+ five_num_summary = FALSE,
+ network_checks = FALSE,
+ verbose = FALSE
+)
}
\arguments{
\item{ped}{a pedigree dataset. Needs ID, momID, and dadID columns}
From d023b785d81f99e969e5e94176c6cf2403605a94 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Mon, 14 Apr 2025 14:07:33 -0400
Subject: [PATCH 23/35] default now is direct method
---
R/convertPedigree.R | 16 +++++++++++++---
tests/testthat/test-convertPedigree.R | 6 +++---
2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/R/convertPedigree.R b/R/convertPedigree.R
index 9cab93f0..3d5ac7ca 100644
--- a/R/convertPedigree.R
+++ b/R/convertPedigree.R
@@ -31,7 +31,7 @@ ped2com <- function(ped, component,
flatten.diag = FALSE,
standardize.colnames = TRUE,
transpose_method = "tcrossprod",
- adjacency_method = "indexed",
+ adjacency_method = "direct",
isChild_method = "classic",
saveable = FALSE,
resume = FALSE,
@@ -615,8 +615,18 @@ ped2ce <- function(ped,
iss <- c(mIDs$rID, dIDs$rID)
jss <- c(mIDs$cID, dIDs$cID)
} else if (component %in% c("common nuclear")) {
- stop("Common Nuclear component is not yet implemented for direct method. Use index method.\n")
+ message("Common Nuclear component is not yet implemented for direct method. Using index method.\n")
# change to warning and call indexed version
+ list_of_adjacency <- .adjIndexed(ped = ped, component = component,
+ saveable = saveable, resume = resume,
+ save_path = save_path, verbose = verbose,
+ lastComputed = lastComputed, nr = nr,
+ checkpoint_files = checkpoint_files,
+ update_rate = update_rate, parList = parList,
+ lens = lens, save_rate_parlist = save_rate_parlist,
+ ...
+ )
+ return(list_of_adjacency)
} else if (component %in% c("mitochondrial")) {
mIDs <- stats::na.omit(data.frame(rID = ped$ID, cID = ped$momID))
iss <- c(mIDs$rID)
@@ -641,7 +651,7 @@ ped2ce <- function(ped,
#' @param checkpoint_files a list of checkpoint files
compute_parent_adjacency <- function(ped, component,
- adjacency_method = "indexed",
+ adjacency_method = "direct",
saveable, resume,
save_path, verbose,
lastComputed, nr, checkpoint_files, update_rate,
diff --git a/tests/testthat/test-convertPedigree.R b/tests/testthat/test-convertPedigree.R
index 11dfbeca..ec5af8b1 100644
--- a/tests/testthat/test-convertPedigree.R
+++ b/tests/testthat/test-convertPedigree.R
@@ -301,10 +301,10 @@ test_that("adjacency_method 'indexed', 'loop', and direct produce the same resu
# common nuclear
ped_common_indexed <- ped2com(hazard, component = "common nuclear", adjacency_method = "indexed")
ped_common_loop <- ped2com(hazard, component = "common nuclear", adjacency_method = "loop")
- # ped_common_direct <- ped2com(hazard, component = "common nuclear", adjacency_method = "direct")
+ ped_common_direct <- ped2com(hazard, component = "common nuclear", adjacency_method = "direct")
expect_equal(ped_common_indexed, ped_common_loop, tolerance = tolerance)
- # expect_equal(ped_common_loop, ped_common_direct, tolerance = tolerance)
- # expect_equal(ped_common_indexed, ped_common_direct, tolerance = tolerance)
+ expect_equal(ped_common_loop, ped_common_direct, tolerance = tolerance)
+ expect_equal(ped_common_indexed, ped_common_direct, tolerance = tolerance)
})
From b55ae50c22a2272ee71b2cfaaa8d80e0b5c6c579 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Mon, 14 Apr 2025 17:13:48 -0400
Subject: [PATCH 24/35] Update readGedcom.R
subfactor
---
NAMESPACE | 1 +
R/helpPedigree.R | 11 +-
R/readGedcom.R | 69 ++-
R/readGedcom_alpha.R | 616 ++++++++++++++++++++++
man/compute_parent_adjacency.Rd | 2 +-
man/determineSex.Rd | 2 +-
man/ped2com.Rd | 2 +-
man/postProcessGedcom.Rd | 34 ++
man/readGedcom.Rd | 3 +
tests/testthat/test-readPedigrees_alpha.R | 231 ++++++++
10 files changed, 946 insertions(+), 25 deletions(-)
create mode 100644 R/readGedcom_alpha.R
create mode 100644 man/postProcessGedcom.Rd
create mode 100644 tests/testthat/test-readPedigrees_alpha.R
diff --git a/NAMESPACE b/NAMESPACE
index 1f9bacee..69e23307 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
export(SimPed)
export(allGens)
+export(assignCoupleIDs)
export(calcAllGens)
export(calcFamilySize)
export(calculateRelatedness)
diff --git a/R/helpPedigree.R b/R/helpPedigree.R
index 3d054d4b..f9e7c855 100644
--- a/R/helpPedigree.R
+++ b/R/helpPedigree.R
@@ -41,13 +41,13 @@ createGenDataFrame <- function(sizeGens, genIndex, idGen) {
#' @param sexR Numeric value indicating the sex ratio (proportion of males).
#' @return Vector of sexes ("M" for male, "F" for female) for the offspring.
#' @importFrom stats runif
-determineSex <- function(idGen, sexR) {
+determineSex <- function(idGen, sexR, code_male = "M", code_female = "F") {
if (runif(1) > .5) {
- sexVec1 <- rep("M", floor(length(idGen) * sexR))
- sexVec2 <- rep("F", length(idGen) - length(sexVec1))
+ sexVec1 <- rep(code_male, floor(length(idGen) * sexR))
+ sexVec2 <- rep(code_female, length(idGen) - length(sexVec1))
} else {
- sexVec1 <- rep("F", floor(length(idGen) * (1 - sexR)))
- sexVec2 <- rep("M", length(idGen) - length(sexVec1))
+ sexVec1 <- rep(code_female, floor(length(idGen) * (1 - sexR)))
+ sexVec2 <- rep(code_male, length(idGen) - length(sexVec1))
}
sexVec <- sample(c(sexVec1, sexVec2))
return(sexVec)
@@ -60,6 +60,7 @@ determineSex <- function(idGen, sexR) {
#'
#' @param df_Ngen The dataframe for the current generation, including columns for individual IDs and spouse IDs.
#' @return The input dataframe augmented with a 'coupleId' column, where each mated pair has a unique identifier.
+#' @export
assignCoupleIDs <- function(df_Ngen) {
df_Ngen$coupleId <- NA_character_ # Initialize the coupleId column with NAs
usedCoupleIds <- character() # Initialize an empty character vector to track used IDs
diff --git a/R/readGedcom.R b/R/readGedcom.R
index c0eea803..f271632c 100644
--- a/R/readGedcom.R
+++ b/R/readGedcom.R
@@ -55,6 +55,7 @@ readGedcom <- function(file_path,
combine_cols = TRUE,
skinny = FALSE,
update_rate = 1000,
+ post_process = TRUE,
...) {
# Checks
if (!file.exists(file_path)) stop("File does not exist: ", file_path)
@@ -300,6 +301,39 @@ readGedcom <- function(file_path,
if (nrow(df_temp) != num_rows$num_indi_rows) {
warning("The number of people found in the processed file does not match the number of individuals raw data")
}
+
+ if(post_process){
+ if (verbose) {
+ print("Post-processing data frame")
+ }
+ # Remove the first row (empty)
+df_temp <- postProcessGedcom(
+ df_temp = df_temp,
+ remove_empty_cols = remove_empty_cols,
+ combine_cols = combine_cols,
+ add_parents = add_parents,
+ skinny = skinny,
+ verbose = verbose
+ )
+
+ }
+
+ return(df_temp)
+}
+
+#' Post-process GEDCOM Data Frame
+#'
+#' @inheritParams readGedcom
+#' @inheritParams mapFAMS2parents
+#' @return A data frame with processed information.
+
+postProcessGedcom <- function(df_temp,
+ remove_empty_cols = TRUE,
+ combine_cols = TRUE,
+ add_parents = TRUE,
+ skinny = TRUE,
+ verbose = FALSE
+){
# Add mom and dad ids
if (add_parents) {
if (verbose) {
@@ -308,28 +342,29 @@ readGedcom <- function(file_path,
df_temp <- processParents(df_temp, datasource = "gedcom")
}
- if (combine_cols) {
- df_temp <- collapseNames(verbose = verbose, df_temp = df_temp)
- }
+if (combine_cols) {
+ df_temp <- collapseNames(verbose = verbose, df_temp = df_temp)
+}
- if (remove_empty_cols) {
- # Remove empty columns
- if (verbose) {
- print("Removing empty columns")
- }
- df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)]
+if (remove_empty_cols) {
+ # Remove empty columns
+ if (verbose) {
+ print("Removing empty columns")
}
- if (skinny) {
- if (verbose) {
- print("Slimming down the data frame")
- }
- df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)]
- df_temp$FAMC <- NULL
- df_temp$FAMS <- NULL
+ df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)]
+}
+if (skinny) {
+ if (verbose) {
+ print("Slimming down the data frame")
}
- return(df_temp)
+ df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)]
+ df_temp$FAMC <- NULL
+ df_temp$FAMS <- NULL
}
+return(df_temp)
+
+}
#' Create a mapping of family IDs to parent IDs
#'
diff --git a/R/readGedcom_alpha.R b/R/readGedcom_alpha.R
new file mode 100644
index 00000000..3cde78dc
--- /dev/null
+++ b/R/readGedcom_alpha.R
@@ -0,0 +1,616 @@
+#' Read a GEDCOM File
+#'
+#' This function reads a GEDCOM file and parses it into a structured data frame of individuals.
+#'
+#' @param file_path The path to the GEDCOM file.
+#' @param add_parents A logical value indicating whether to add parents to the data frame.
+#' @param remove_empty_cols A logical value indicating whether to remove columns with all missing values.
+#' @param combine_cols A logical value indicating whether to combine columns with duplicate values.
+#' @param verbose A logical value indicating whether to print messages.
+#' @param skinny A logical value indicating whether to return a skinny data frame.
+#' @param update_rate numeric. The rate at which to print progress
+#' @param ... Additional arguments to be passed to the function.
+#' @return A data frame containing information about individuals, with the following potential columns:
+#' - `id`: ID of the individual
+#' - `momID`: ID of the individual's mother
+#' - `dadID`: ID of the individual's father
+#' - `sex`: Sex of the individual
+#' - `name`: Full name of the individual
+#' - `name_given`: First name of the individual
+#' - `name_surn`: Last name of the individual
+#' - `name_marriedsurn`: Married name of the individual
+#' - `name_nick`: Nickname of the individual
+#' - `name_npfx`: Name prefix
+#' - `name_nsfx`: Name suffix
+#' - `birth_date`: Birth date of the individual
+#' - `birth_lat`: Latitude of the birthplace
+#' - `birth_long`: Longitude of the birthplace
+#' - `birth_place`: Birthplace of the individual
+#' - `death_caus`: Cause of death
+#' - `death_date`: Death date of the individual
+#' - `death_lat`: Latitude of the place of death
+#' - `death_long`: Longitude of the place of death
+#' - `death_place`: Place of death of the individual
+#' - `attribute_caste`: Caste of the individual
+#' - `attribute_children`: Number of children of the individual
+#' - `attribute_description`: Description of the individual
+#' - `attribute_education`: Education of the individual
+#' - `attribute_idnumber`: Identification number of the individual
+#' - `attribute_marriages`: Number of marriages of the individual
+#' - `attribute_nationality`: Nationality of the individual
+#' - `attribute_occupation`: Occupation of the individual
+#' - `attribute_property`: Property owned by the individual
+#' - `attribute_religion`: Religion of the individual
+#' - `attribute_residence`: Residence of the individual
+#' - `attribute_ssn`: Social security number of the individual
+#' - `attribute_title`: Title of the individual
+#' - `FAMC`: ID(s) of the family where the individual is a child
+#' - `FAMS`: ID(s) of the family where the individual is a spouse
+#' @export
+readGedcom.alpha <- function(file_path,
+ verbose = FALSE,
+ add_parents = TRUE,
+ remove_empty_cols = TRUE,
+ combine_cols = TRUE,
+ skinny = FALSE,
+ update_rate = 1000,
+ post_process = TRUE,
+ ...) {
+
+ # Ensure the file exists and read all lines.
+ if (!file.exists(file_path)) {
+ stop("File does not exist: ", file_path)
+ }
+ if (verbose) message("Reading file: ", file_path)
+ lines <- readLines(file_path)
+ total_lines <- length(lines)
+ if (verbose) message("File is ", total_lines, " lines long")
+
+ # Count pattern occurrences (pattern_rows remains used in subfunctions)
+ pattern_rows <- countPatternRows.alpha(data.frame(X1 = lines))
+
+ # List of variables to initialize
+ all_var_names <- unlist(list(
+ identifiers = c("id", "momID", "dadID"),
+ names = c("name", "name_given", "name_given_pieces", "name_surn", "name_surn_pieces", "name_marriedsurn",
+ "name_nick", "name_npfx", "name_nsfx"),
+ sex = c("sex"),
+ birth = c("birth_date", "birth_lat", "birth_long", "birth_place"),
+ death = c("death_caus", "death_date", "death_lat", "death_long", "death_place"),
+ attributes = c("attribute_caste", "attribute_children", "attribute_description", "attribute_education",
+ "attribute_idnumber", "attribute_marriages", "attribute_nationality", "attribute_occupation",
+ "attribute_property", "attribute_religion", "attribute_residence", "attribute_ssn",
+ "attribute_title"),
+ relationships = c("FAMC", "FAMS")
+ ), use.names = FALSE)
+
+ # Split the file into blocks; each block corresponds to one individual.
+ blocks <- splitIndividuals.alpha(lines, verbose)
+
+ # Parse each individual block into a record (a named list)
+ records <- lapply(blocks, parseIndividualBlock.alpha,
+ pattern_rows = pattern_rows,
+ all_var_names = all_var_names, verbose = verbose)
+
+ # Remove any NULLs (if a block did not contain an individual id)
+ records <- Filter(Negate(is.null), records)
+
+ if (length(records) == 0) {
+ warning("No people found in file")
+ return(NULL)
+ }
+
+ # Convert the list of records to a data frame.
+ df_temp <- do.call(rbind, lapply(records, function(rec) {
+ as.data.frame(rec, stringsAsFactors = FALSE)
+ }))
+
+ if (verbose) message("File has ", nrow(df_temp), " people")
+
+ # Run post-processing if requested.
+ if (post_process) {
+ if (verbose) message("Post-processing data frame")
+ df_temp <- postProcessGedcom.alpha(
+ df_temp = df_temp,
+ remove_empty_cols = remove_empty_cols,
+ combine_cols = combine_cols,
+ add_parents = add_parents,
+ skinny = skinny,
+ verbose = verbose
+ )
+ }
+
+ return(df_temp)
+}
+
+# --- SUBFUNCTIONS ---
+#' Split GEDCOM Lines into Individual Blocks
+#'
+#' This function partitions the GEDCOM file (as a vector of lines) into a list of blocks,
+#' where each block corresponds to a single individual starting with an "@ INDI" line.
+#'
+#' @param lines A character vector of lines from the GEDCOM file.
+#' @param verbose Logical indicating whether to output progress messages.
+#' @return A list of character vectors, each representing one individual.
+splitIndividuals.alpha <- function(lines, verbose = FALSE) {
+ indi_idx <- grep("@ INDI", lines)
+ if (length(indi_idx) == 0) return(list())
+
+ blocks <- list()
+ for (i in seq_along(indi_idx)) {
+ start <- indi_idx[i]
+ end <- if (i < length(indi_idx)) indi_idx[i + 1] - 1 else length(lines)
+ block <- lines[start:end]
+ blocks[[length(blocks) + 1]] <- block
+ }
+ if (verbose) message("Found ", length(blocks), " individual blocks")
+ return(blocks)
+}
+
+#' Initialize an Empty Individual Record
+#'
+#' Creates a named list with all GEDCOM fields set to NA.
+#'
+#' @param all_var_names A character vector of variable names.
+#' @return A named list representing an empty individual record.
+initializeRecord.alpha <- function(all_var_names) {
+ setNames(as.list(rep(NA_character_, length(all_var_names))), all_var_names)
+}
+
+#' Parse a GEDCOM Individual Block
+#'
+#' Processes a block of GEDCOM lines corresponding to a single individual.
+#'
+#' @param block A character vector containing the GEDCOM lines for one individual.
+#' @param pattern_rows A list with counts of lines matching specific GEDCOM tags.
+#' @param all_var_names A character vector of variable names.
+#' @param verbose Logical indicating whether to print progress messages.
+#' @return A named list representing the parsed record for the individual, or NULL if no ID is found.
+#' @keywords internal
+parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbose = FALSE) {
+ record <- initializeRecord.alpha(all_var_names)
+ n_lines <- length(block)
+
+ # Loop through the block by index so that we can look ahead for event details.
+ i <- 1
+ while (i <= n_lines) {
+ line <- block[i]
+
+ # Process individual identifier (e.g., "@ INDI ...")
+ if (grepl("@ INDI", line)) {
+ record$id <- stringr::str_extract(line, "(?<=@.)\\d*(?=@)")
+ i <- i + 1
+ next
+ }
+
+ # Special processing for full name using " NAME" tag.
+ if (grepl(" NAME", line) && pattern_rows$num_name_rows > 0) {
+ record <- parseNameLine.alpha(line, record)
+ i <- i + 1
+ next
+ }
+
+ # Process birth and death events by consuming multiple lines.
+ if (grepl(" BIRT", line) && pattern_rows$num_birt_rows > 0) {
+ record <- processEventLine.alpha("birth", block, i, record, pattern_rows)
+ i <- i + 1 # Skip further processing of this line.
+ next
+ }
+ if (grepl(" DEAT", line) && pattern_rows$num_deat_rows > 0) {
+ record <- processEventLine.alpha("death", block, i, record, pattern_rows)
+ i <- i + 1
+ next
+ }
+
+ # Process other tags using common mappings.
+ # Define mappings for name pieces (if not handled by NAME tag).
+ name_piece_mappings <- list(
+ list(tag = "GIVN", field = "name_given_pieces", mode = "replace"),
+ list(tag = "NPFX", field = "name_npfx", mode = "replace"),
+ list(tag = "NICK", field = "name_nick", mode = "replace"),
+ list(tag = "SURN", field = "name_surn_pieces", mode = "replace"),
+ list(tag = "NSFX", field = "name_nsfx", mode = "replace"),
+ list(tag = "_MARNM", field = "name_marriedsurn", mode = "replace")
+ )
+ out <- applyTagMappings.alpha(line, record, pattern_rows, name_piece_mappings)
+ if (out$matched) { record <- out$record
+ i <- i + 1
+ next }
+
+ # Process attribute tags.
+ attribute_mappings <- list(
+ list(tag = "SEX", field = "sex", mode = "replace"),
+ list(tag = "CAST", field = "attribute_caste", mode = "replace"),
+ list(tag = "DSCR", field = "attribute_description", mode = "replace"),
+ list(tag = "EDUC", field = "attribute_education", mode = "replace"),
+ list(tag = "IDNO", field = "attribute_idnumber", mode = "replace"),
+ list(tag = "NATI", field = "attribute_nationality", mode = "replace"),
+ list(tag = "NCHI", field = "attribute_children", mode = "replace"),
+ list(tag = "NMR", field = "attribute_marriages", mode = "replace"),
+ list(tag = "OCCU", field = "attribute_occupation", mode = "replace"),
+ list(tag = "PROP", field = "attribute_property", mode = "replace"),
+ list(tag = "RELI", field = "attribute_religion", mode = "replace"),
+ list(tag = "RESI", field = "attribute_residence", mode = "replace"),
+ list(tag = "SSN", field = "attribute_ssn", mode = "replace"),
+ list(tag = "TITL", field = "attribute_title", mode = "replace")
+ )
+ out <- applyTagMappings.alpha(line, record, pattern_rows, attribute_mappings)
+ if (out$matched) { record <- out$record
+ i <- i + 1
+ next }
+
+ # Process relationship tags, using a custom extractor.
+ relationship_mappings <- list(
+ list(tag = "FAMC", field = "FAMC", mode = "append",
+ extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)")),
+ list(tag = "FAMS", field = "FAMS", mode = "append",
+ extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)"))
+ )
+ out <- applyTagMappings.alpha(line, record, pattern_rows, relationship_mappings)
+ if (out$matched) { record <- out$record
+ i <- i + 1
+ next }
+
+ # Optionally print progress for long records.
+ i <- i + 1
+ }
+
+ # If the record has no ID, return NULL.
+ if (is.na(record$id)) return(NULL)
+ return(record)
+}
+
+#' Parse a Full Name Line
+#'
+#' Extracts full name information from a GEDCOM "NAME" line and updates the record accordingly.
+#'
+#' @param line A character string containing the name line.
+#' @param record A named list representing the individual's record.
+#' @return The updated record with parsed name information.
+parseNameLine.alpha <- function(line, record) {
+ record$name <- extract_info.alpha(line, "NAME")
+ record$name_given <- stringr::str_extract(record$name, ".*(?= /)")
+ record$name_surn <- stringr::str_extract(record$name, "(?<=/).*(?=/)")
+ record$name <- stringr::str_squish(stringr::str_replace(record$name, "/", " "))
+ return(record)
+}
+
+#' Process Event Lines (Birth or Death)
+#'
+#' Extracts event details (e.g., date, place, cause, latitude, longitude) from a block of GEDCOM lines.
+#' For "birth": expect DATE on line i+1, PLAC on i+2, LATI on i+4, LONG on i+5.
+#' For "death": expect DATE on line i+1, PLAC on i+2, CAUS on i+3, LATI on i+4, LONG on i+5.
+#' @param event A character string indicating the event type ("birth" or "death").
+#' @param block A character vector of GEDCOM lines.
+#' @param i The current line index where the event tag is found.
+#' @param record A named list representing the individual's record.
+#' @param pattern_rows A list with counts of GEDCOM tag occurrences.
+#' @return The updated record with parsed event information.#
+# For "death": expect DATE on line i+1, PLAC on i+2, CAUS on i+3, LATI on i+4, LONG on i+5.
+processEventLine.alpha <- function(event, block, i, record, pattern_rows) {
+ n_lines <- length(block)
+ if (event == "birth") {
+ if (i + 1 <= n_lines) record$birth_date <- extract_info.alpha(block[i+1], "DATE")
+ if (i + 2 <= n_lines) record$birth_place <- extract_info.alpha(block[i+2], "PLAC")
+ if (i + 4 <= n_lines) record$birth_lat <- extract_info.alpha(block[i+4], "LATI")
+ if (i + 5 <= n_lines) record$birth_long <- extract_info.alpha(block[i+5], "LONG")
+ } else if (event == "death") {
+ if (i + 1 <= n_lines) record$death_date <- extract_info.alpha(block[i+1], "DATE")
+ if (i + 2 <= n_lines) record$death_place <- extract_info.alpha(block[i+2], "PLAC")
+ if (i + 3 <= n_lines) record$death_caus <- extract_info.alpha(block[i+3], "CAUS")
+ if (i + 4 <= n_lines) record$death_lat <- extract_info.alpha(block[i+4], "LATI")
+ if (i + 5 <= n_lines) record$death_long <- extract_info.alpha(block[i+5], "LONG")
+ }
+ return(record)
+}
+
+#' Apply Tag Mappings to a Line
+#'
+#' Iterates over a list of tag mappings and, if a tag matches the line, updates the record.
+#'
+#' @param line A character string from the GEDCOM file.
+#' @param record A named list representing the individual's record.
+#' @param pattern_rows A list with GEDCOM tag counts.
+#' @param tag_mappings A list of lists. Each sublist should define:
+#' - \code{tag}: the GEDCOM tag,
+#' - \code{field}: the record field to update,
+#' - \code{mode}: either "replace" or "append",
+#' - \code{extractor}: (optional) a custom extraction function.
+#' @return A list with the updated record (\code{record}) and a logical flag (\code{matched}).
+applyTagMappings.alpha <- function(line, record, pattern_rows, tag_mappings) {
+ for (mapping in tag_mappings) {
+ extractor <- if (is.null(mapping$extractor)) NULL else mapping$extractor
+ result <- process_tag.alpha(mapping$tag, mapping$field, pattern_rows, line, record,
+ extractor = extractor, mode = mapping$mode)
+ record <- result$vars
+ if (result$matched) {
+ return(list(record = record, matched = TRUE))
+ }
+ }
+ return(list(record = record, matched = FALSE))
+}
+
+
+#' Extract Information from Line
+#'
+#' This function extracts information from a line based on a specified type.
+#' @param line A character string representing a line from a GEDCOM file.
+#' @param type A character string representing the type of information to extract.
+#' @return A character string with the extracted information.
+#' @keywords internal
+extract_info.alpha <- function(line, type) {
+ stringr::str_squish(stringr::str_extract(line, paste0("(?<=", type, " ).+")))
+}
+
+#' Count GEDCOM Pattern Rows
+#'
+#' Counts the number of lines in a file (passed as a data frame with column "X1")
+#' that match various GEDCOM patterns.
+#'
+#' @param file A data frame with a column \code{X1} containing GEDCOM lines.
+#' @return A list with counts of specific GEDCOM tag occurrences.
+countPatternRows.alpha <- function(file) {
+ pattern_counts <- sapply(
+ c(
+ "@ INDI", " NAME", " GIVN", " NPFX", " NICK", " SURN", " NSFX", " _MARNM",
+ " BIRT", " DEAT", " SEX", " CAST", " DSCR", " EDUC", " IDNO", " NATI",
+ " NCHI", " NMR", " OCCU", " PROP", " RELI", " RESI", " SSN", " TITL",
+ " FAMC", " FAMS", " PLAC", " LATI", " LONG", " DATE", " CAUS"
+ ),
+ function(pat) sum(grepl(pat, file$X1))
+ )
+ num_rows <- list(
+ num_indi_rows = pattern_counts["@ INDI"],
+ num_name_rows = pattern_counts[" NAME"],
+ num_givn_rows = pattern_counts[" GIVN"],
+ num_npfx_rows = pattern_counts[" NPFX"],
+ num_nick_rows = pattern_counts[" NICK"],
+ num_surn_rows = pattern_counts[" SURN"],
+ num_nsfx_rows = pattern_counts[" NSFX"],
+ num_marnm_rows = pattern_counts[" _MARNM"],
+ num_birt_rows = pattern_counts[" BIRT"],
+ num_deat_rows = pattern_counts[" DEAT"],
+ num_sex_rows = pattern_counts[" SEX"],
+ num_cast_rows = pattern_counts[" CAST"],
+ num_dscr_rows = pattern_counts[" DSCR"],
+ num_educ_rows = pattern_counts[" EDUC"],
+ num_idno_rows = pattern_counts[" IDNO"],
+ num_nati_rows = pattern_counts[" NATI"],
+ num_nchi_rows = pattern_counts[" NCHI"],
+ num_nmr_rows = pattern_counts[" NMR"],
+ num_occu_rows = pattern_counts[" OCCU"],
+ num_prop_rows = pattern_counts[" PROP"],
+ num_reli_rows = pattern_counts[" RELI"],
+ num_resi_rows = pattern_counts[" RESI"],
+ num_ssn_rows = pattern_counts[" SSN"],
+ num_titl_rows = pattern_counts[" TITL"],
+ num_famc_rows = pattern_counts[" FAMC"],
+ num_fams_rows = pattern_counts[" FAMS"],
+ num_plac_rows = pattern_counts[" PLAC"],
+ num_lati_rows = pattern_counts[" LATI"],
+ num_long_rows = pattern_counts[" LONG"],
+ num_date_rows = pattern_counts[" DATE"],
+ num_caus_rows = pattern_counts[" CAUS"]
+ )
+ return(num_rows)
+}
+
+#' Process a GEDCOM Tag
+#'
+#' Extracts and assigns a value to a specified field in `vars` if the pattern is present.
+#' Returns both the updated variable list and a flag indicating whether the tag was matched.
+#'
+#' @param tag The GEDCOM tag (e.g., "SEX", "CAST", etc.).
+#' @param field_name The name of the variable to assign to in `vars`.
+#' @param pattern_rows Output from `countPatternRows()`.
+#' @param line The GEDCOM line to parse.
+#' @param vars The current list of variables to update.
+#' @return A list with updated `vars` and a `matched` flag.
+#' @keywords internal
+process_tag.alpha <- function(tag, field_name, pattern_rows, line, vars,
+ extractor = NULL, mode = "replace") {
+ count_name <- paste0("num_", tolower(tag), "_rows")
+ matched <- FALSE
+ if (!is.null(pattern_rows[[count_name]]) &&
+ pattern_rows[[count_name]] > 0 &&
+ grepl(paste0(" ", tag), line)) {
+ value <- if (is.null(extractor)) {
+ extract_info.alpha(line, tag)
+ } else {
+ extractor(line)
+ }
+ if (mode == "append" && !is.na(vars[[field_name]])) {
+ vars[[field_name]] <- paste0(vars[[field_name]], ", ", value)
+ } else {
+ vars[[field_name]] <- value
+ }
+ matched <- TRUE
+ }
+ return(list(vars = vars, matched = matched))
+}
+
+#' Post-process GEDCOM Data Frame
+#'
+#' This function optionally adds parent information, combines duplicate columns,
+#' and removes empty columns from the GEDCOM data frame.
+#'
+#' @param df_temp A data frame produced by \code{readGedcom()}.
+#' @param remove_empty_cols Logical indicating whether to remove columns that are entirely missing.
+#' @param combine_cols Logical indicating whether to combine columns with duplicate values.
+#' @param add_parents Logical indicating whether to add parent information.
+#' @param skinny Logical indicating whether to slim down the data frame.
+#' @param verbose Logical indicating whether to print progress messages.
+#' @return The post-processed data frame.
+postProcessGedcom.alpha <- function(df_temp,
+ remove_empty_cols = TRUE,
+ combine_cols = TRUE,
+ add_parents = TRUE,
+ skinny = TRUE,
+ verbose = FALSE) {
+ if (add_parents) {
+ if (verbose) message("Processing parents")
+ df_temp <- processParents.alpha(df_temp, datasource = "gedcom")
+ }
+ if (combine_cols) {
+ df_temp <- collapseNames.alpha(verbose = verbose, df_temp = df_temp)
+ }
+ if (remove_empty_cols) {
+ if (verbose) message("Removing empty columns")
+ df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)]
+ }
+ if (skinny) {
+ if (verbose) message("Slimming down the data frame")
+ df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)]
+ df_temp$FAMC <- NULL
+ df_temp$FAMS <- NULL
+ }
+ return(df_temp)
+}
+
+#' Process Parents Information from GEDCOM Data
+#'
+#' Adds parent IDs to the individuals based on family relationship data.
+#'
+#' @param df_temp A data frame produced by \code{readGedcom()}.
+#' @param datasource Character string indicating the data source ("gedcom" or "wiki").
+#' @return The updated data frame with parent IDs added.
+processParents.alpha <- function(df_temp, datasource) {
+ if (datasource == "gedcom") {
+ required_cols <- c("FAMC", "sex", "FAMS")
+ } else if (datasource == "wiki") {
+ required_cols <- c("id")
+ } else {
+ stop("Invalid datasource")
+ }
+ if (!all(required_cols %in% colnames(df_temp))) {
+ missing_cols <- setdiff(required_cols, colnames(df_temp))
+ warning("Missing necessary columns: ", paste(missing_cols, collapse = ", "))
+ return(df_temp)
+ }
+ family_to_parents <- mapFAMS2parents.alpha(df_temp)
+ if (is.null(family_to_parents) || length(family_to_parents) == 0) {
+ return(df_temp)
+ }
+ df_temp <- mapFAMC2parents.alpha(df_temp, family_to_parents)
+ return(df_temp)
+}
+
+#' Create a Mapping from Family IDs to Parent IDs
+#'
+#' This function scans the data frame and creates a mapping of family IDs
+#' to the corresponding parent IDs.
+#'
+#' @param df_temp A data frame produced by \code{readGedcom()}.
+#' @return A list mapping family IDs to parent information.
+mapFAMS2parents.alpha <- function(df_temp) {
+ if (!all(c("FAMS", "sex") %in% colnames(df_temp))) {
+ warning("The data frame does not contain the necessary columns (FAMS, sex)")
+ return(NULL)
+ }
+ family_to_parents <- list()
+ for (i in 1:nrow(df_temp)) {
+ if (!is.na(df_temp$FAMS[i])) {
+ fams_ids <- unlist(strsplit(df_temp$FAMS[i], ", "))
+ for (fams_id in fams_ids) {
+ if (!is.null(family_to_parents[[fams_id]])) {
+ if (df_temp$sex[i] == "M") {
+ family_to_parents[[fams_id]]$father <- df_temp$id[i]
+ } else if (df_temp$sex[i] == "F") {
+ family_to_parents[[fams_id]]$mother <- df_temp$id[i]
+ }
+ } else {
+ family_to_parents[[fams_id]] <- list()
+ if (df_temp$sex[i] == "M") {
+ family_to_parents[[fams_id]]$father <- df_temp$id[i]
+ } else if (df_temp$sex[i] == "F") {
+ family_to_parents[[fams_id]]$mother <- df_temp$id[i]
+ }
+ }
+ }
+ }
+ }
+ return(family_to_parents)
+}
+
+#' Assign momID and dadID based on family mapping
+#'
+#' This function assigns mother and father IDs to individuals in the data frame
+#' based on the mapping of family IDs to parent IDs.
+#'
+#' @param df_temp A data frame containing individual information.
+#' @param family_to_parents A list mapping family IDs to parent IDs.
+#' @return A data frame with added momID and dad_ID columns.
+#' @keywords internal
+mapFAMC2parents.alpha <- function(df_temp, family_to_parents) {
+ df_temp$momID <- NA_character_
+ df_temp$dadID <- NA_character_
+ for (i in 1:nrow(df_temp)) {
+ if (!is.na(df_temp$FAMC[i])) {
+ famc_ids <- unlist(strsplit(df_temp$FAMC[i], ", "))
+ for (famc_id in famc_ids) {
+ if (!is.null(family_to_parents[[famc_id]])) {
+ if (!is.null(family_to_parents[[famc_id]]$father)) {
+ df_temp$dadID[i] <- family_to_parents[[famc_id]]$father
+ }
+ if (!is.null(family_to_parents[[famc_id]]$mother)) {
+ df_temp$momID[i] <- family_to_parents[[famc_id]]$mother
+ }
+ }
+ }
+ }
+ }
+ return(df_temp)
+}
+
+#' collapse Names
+#'
+#' This function combines the `name_given` and `name_given_pieces` columns in a data frame.
+#'
+#' @inheritParams readGedcom
+#' @param df_temp A data frame containing the columns to be combined.
+#' @return A data frame with the combined columns.
+collapseNames.alpha <- function(verbose, df_temp) {
+ if (verbose) message("Combining Duplicate Columns")
+
+ if (!all(is.na(df_temp$name_given_pieces)) | !all(is.na(df_temp$name_given))) {
+ result <- combine_columns.alpha(df_temp$name_given, df_temp$name_given_pieces)
+ df_temp$name_given <- result$combined
+ if (!result$retain_col2) df_temp$name_given_pieces <- NULL
+ }
+
+ if (!all(is.na(df_temp$name_surn_pieces)) | !all(is.na(df_temp$name_surn))) {
+ result <- combine_columns.alpha(df_temp$name_surn, df_temp$name_surn_pieces)
+ df_temp$name_surn <- result$combined
+ if (!result$retain_col2) df_temp$name_surn_pieces <- NULL
+ }
+ return(df_temp)
+}
+
+#' Combine Columns
+#'
+#' This function combines two columns, handling conflicts and merging non-conflicting data.
+#' @param col1 The first column to combine.
+#' @param col2 The second column to combine.
+#' @return A list with the combined column and a flag indicating if the second column should be retained.
+#' @keywords internal
+# Helper function to check for conflicts and merge columns
+combine_columns.alpha <- function(col1, col2) {
+ col1_lower <- stringr::str_to_lower(col1)
+ col2_lower <- stringr::str_to_lower(col2)
+ conflicts <- !is.na(col1_lower) & !is.na(col2_lower) & col1_lower != col2_lower
+ if (any(conflicts)) {
+ warning("Columns have conflicting values. They were not merged.")
+ return(list(combined = col1, retain_col2 = TRUE))
+ } else {
+ combined <- ifelse(is.na(col1), col2, col1)
+ return(list(combined = combined, retain_col2 = FALSE))
+ }
+}
+
+# --- Exported Aliases ---
+#' @rdname readGedcom.alpha
+#' @export
+readGed.alpha <- readGedcom.alpha
+#' @rdname readGedcom.alpha
+#' @export
+readgedcom.alpha <- readGedcom.alpha
diff --git a/man/compute_parent_adjacency.Rd b/man/compute_parent_adjacency.Rd
index 9cd4311c..21974673 100644
--- a/man/compute_parent_adjacency.Rd
+++ b/man/compute_parent_adjacency.Rd
@@ -7,7 +7,7 @@
compute_parent_adjacency(
ped,
component,
- adjacency_method = "indexed",
+ adjacency_method = "direct",
saveable,
resume,
save_path,
diff --git a/man/determineSex.Rd b/man/determineSex.Rd
index c98644f6..c1c096af 100644
--- a/man/determineSex.Rd
+++ b/man/determineSex.Rd
@@ -4,7 +4,7 @@
\alias{determineSex}
\title{Determine Sex of Offspring}
\usage{
-determineSex(idGen, sexR)
+determineSex(idGen, sexR, code_male = "M", code_female = "F")
}
\arguments{
\item{idGen}{Vector of IDs for the generation.}
diff --git a/man/ped2com.Rd b/man/ped2com.Rd
index 27f632f5..c47d5982 100644
--- a/man/ped2com.Rd
+++ b/man/ped2com.Rd
@@ -14,7 +14,7 @@ ped2com(
flatten.diag = FALSE,
standardize.colnames = TRUE,
transpose_method = "tcrossprod",
- adjacency_method = "indexed",
+ adjacency_method = "direct",
isChild_method = "classic",
saveable = FALSE,
resume = FALSE,
diff --git a/man/postProcessGedcom.Rd b/man/postProcessGedcom.Rd
new file mode 100644
index 00000000..9d0c7b2c
--- /dev/null
+++ b/man/postProcessGedcom.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/readGedcom.R
+\name{postProcessGedcom}
+\alias{postProcessGedcom}
+\title{Post-process GEDCOM Data Frame}
+\usage{
+postProcessGedcom(
+ df_temp,
+ remove_empty_cols = TRUE,
+ combine_cols = TRUE,
+ add_parents = TRUE,
+ skinny = TRUE,
+ verbose = FALSE
+)
+}
+\arguments{
+\item{df_temp}{A data frame containing information about individuals.}
+
+\item{remove_empty_cols}{A logical value indicating whether to remove columns with all missing values.}
+
+\item{combine_cols}{A logical value indicating whether to combine columns with duplicate values.}
+
+\item{add_parents}{A logical value indicating whether to add parents to the data frame.}
+
+\item{skinny}{A logical value indicating whether to return a skinny data frame.}
+
+\item{verbose}{A logical value indicating whether to print messages.}
+}
+\value{
+A data frame with processed information.
+}
+\description{
+Post-process GEDCOM Data Frame
+}
diff --git a/man/readGedcom.Rd b/man/readGedcom.Rd
index 7bab49b1..a54cd5aa 100644
--- a/man/readGedcom.Rd
+++ b/man/readGedcom.Rd
@@ -14,6 +14,7 @@ readGedcom(
combine_cols = TRUE,
skinny = FALSE,
update_rate = 1000,
+ post_process = TRUE,
...
)
@@ -25,6 +26,7 @@ readGed(
combine_cols = TRUE,
skinny = FALSE,
update_rate = 1000,
+ post_process = TRUE,
...
)
@@ -36,6 +38,7 @@ readgedcom(
combine_cols = TRUE,
skinny = FALSE,
update_rate = 1000,
+ post_process = TRUE,
...
)
}
diff --git a/tests/testthat/test-readPedigrees_alpha.R b/tests/testthat/test-readPedigrees_alpha.R
new file mode 100644
index 00000000..4a5a4e98
--- /dev/null
+++ b/tests/testthat/test-readPedigrees_alpha.R
@@ -0,0 +1,231 @@
+test_that("readGedcom.alpha reads and parses a GEDCOM file correctly", {
+ # Create a temporary GEDCOM file for testing
+ gedcom_content <- c(
+ "0 HEAD",
+ "1 GEDC",
+ "2 VERS 5.5",
+ "2 FORM LINEAGE-LINKED",
+ "1 CHAR UTF-8",
+ "1 LANG English",
+ "0 @I1@ INDI",
+ "1 NAME John /Doe/",
+ "1 SEX M",
+ "1 BIRT",
+ "2 DATE 1 JAN 1900",
+ "2 PLAC Someplace",
+ "0 @I2@ INDI",
+ "1 NAME Jane /Smith/",
+ "1 SEX F",
+ "1 BIRT",
+ "2 DATE 2 FEB 1910",
+ "2 PLAC Anotherplace",
+ "1 NCHI 2"
+ )
+ temp_file <- tempfile(fileext = ".ged")
+ writeLines(gedcom_content, temp_file)
+
+ # Call readGedcom.alpha
+ df <- readGedcom.alpha(temp_file, verbose = TRUE, skinny = FALSE)
+ # note to self, the code is not reading in the 2nd person. and is also not reading in the birth date and place
+ # Check that the data frame has the expected structure
+ expect_true("id" %in% colnames(df))
+ expect_true("name_given" %in% colnames(df))
+ expect_true("name_surn" %in% colnames(df))
+ expect_true("sex" %in% colnames(df))
+ expect_true("birth_date" %in% colnames(df))
+ expect_true("birth_place" %in% colnames(df))
+
+ # Check the contents of the data frame
+ expect_equal(nrow(df), 2)
+ expect_equal(df$name_given[1], "John")
+ expect_equal(df$name_surn[1], "Doe")
+ expect_equal(df$sex[1], "M")
+ expect_equal(df$birth_date[1], "1 JAN 1900")
+ expect_equal(df$birth_place[1], "Someplace")
+ expect_equal(df$name_given[2], "Jane")
+ expect_equal(df$name_surn[2], "Smith")
+ expect_equal(df$sex[2], "F")
+ expect_equal(df$birth_date[2], "2 FEB 1910")
+ expect_equal(df$birth_place[2], "Anotherplace")
+
+ # Clean up temporary file
+ unlink(temp_file)
+})
+
+test_that("readGedcom.alpha combines duplicate columns correctly", {
+ # Create a temporary GEDCOM file for testing
+ gedcom_content <- c(
+ "0 @I1@ INDI",
+ "1 NAME John /Doe/",
+ "1 GIVN John",
+ "1 SEX M",
+ "0 @I2@ INDI",
+ "1 NAME Jane /Smith/",
+ "1 GIVN Jane",
+ "1 SEX F"
+ )
+ temp_file <- tempfile(fileext = ".ged")
+ writeLines(gedcom_content, temp_file)
+
+ # Call readGedcom.alpha with combine_cols = TRUE
+ df <- readGedcom.alpha(temp_file, verbose = TRUE, combine_cols = TRUE)
+
+ # Check that the data frame has the expected structure
+ expect_true("name_given" %in% colnames(df))
+ expect_false("name_given_pieces" %in% colnames(df))
+
+ # Check the contents of the data frame
+ expect_equal(nrow(df), 2)
+ expect_equal(df$name_given[1], "John")
+ expect_equal(df$name_given[2], "Jane")
+
+ # Clean up temporary file
+ unlink(temp_file)
+})
+
+test_that("readGedcom.alpha removes empty columns correctly", {
+ # Create a temporary GEDCOM file for testing
+ gedcom_content <- c(
+ "0 @I1@ INDI",
+ "1 NAME John /Doe/",
+ "1 SEX M"
+ )
+ temp_file <- tempfile(fileext = ".ged")
+ writeLines(gedcom_content, temp_file)
+
+ # Call readGedcom.alpha with remove_empty_cols = TRUE
+ df <- readGedcom.alpha(temp_file, verbose = TRUE, remove_empty_cols = TRUE)
+
+ # Check that empty columns are removed
+ expect_false("birth_date" %in% colnames(df))
+ expect_false("birth_place" %in% colnames(df))
+
+ # Clean up temporary file
+ unlink(temp_file)
+})
+
+test_that("readGedcom.alpha handles skinny option correctly", {
+ # Create a temporary GEDCOM file for testing
+ gedcom_content <- c(
+ "0 @I1@ INDI",
+ "1 NAME John /Doe/",
+ "1 SEX M",
+ "1 FAMC @F1@",
+ "1 FAMS @F2@"
+ )
+ temp_file <- tempfile(fileext = ".ged")
+ writeLines(gedcom_content, temp_file)
+
+ # Call readGedcom.alpha with skinny = TRUE
+ df <- readGedcom.alpha(temp_file, verbose = TRUE, skinny = TRUE)
+
+ # Check that FAMC and FAMS columns are removed
+ expect_false("FAMC" %in% colnames(df))
+ expect_false("FAMS" %in% colnames(df))
+
+ # Clean up temporary file
+ unlink(temp_file)
+})
+
+test_that("processParents.alpha adds momID and dadID correctly", {
+ # Create a data frame for testing
+ df_temp <- data.frame(
+ id = c("I1", "I2", "I3"),
+ sex = c("M", "F", "M"),
+ FAMS = c("@F1@", "@F1@", NA),
+ FAMC = c(NA, NA, "@F1@"),
+ stringsAsFactors = FALSE
+ )
+
+ # Call processParents.alpha
+ df_temp <- processParents.alpha(df_temp, datasource = "gedcom")
+
+ # Check the structure of the data frame
+ expect_true("momID" %in% colnames(df_temp))
+ expect_true("dadID" %in% colnames(df_temp))
+
+ # Check the contents of the data frame
+ expect_equal(df_temp$momID[1], NA_character_)
+ expect_equal(df_temp$dadID[1], NA_character_)
+ expect_equal(df_temp$momID[2], NA_character_)
+ expect_equal(df_temp$dadID[2], NA_character_)
+ expect_equal(df_temp$momID[3], "I2")
+ expect_equal(df_temp$dadID[3], "I1")
+
+ # Create a more complex data frame for testing
+ df_temp <- data.frame(
+ id = c("I1", "I2", "I3", "I4", "I5"),
+ sex = c("M", "F", "M", "F", "M"),
+ FAMS = c("@F1@", "@F1@", "@F2@", "@F2@", "@F3@"),
+ FAMC = c(NA, NA, "@F1@", "@F1@", "@F2@"),
+ stringsAsFactors = FALSE
+ )
+
+ # Call processParents.alpha
+ df_temp <- processParents.alpha(df_temp, datasource = "gedcom")
+
+ # Check the contents of the data frame
+ expect_equal(df_temp$momID[3], "I2")
+ expect_equal(df_temp$dadID[3], "I1")
+ expect_equal(df_temp$momID[4], "I2")
+ expect_equal(df_temp$dadID[4], "I1")
+ expect_equal(df_temp$momID[5], "I4")
+ expect_equal(df_temp$dadID[5], "I3")
+})
+
+test_that("if file does not exist, readGedcom.alpha throws an error", {
+ # Call readGedcom.alpha with a non-existent file
+ expect_error(readGedcom.alpha("nonexistent.ged"))
+})
+
+test_that("readGedcom.alpha parses death event correctly", {
+ # Test that a GEDCOM file with a death event is parsed correctly.
+ gedcom_content <- c(
+ "0 @I1@ INDI",
+ "1 NAME John /Doe/",
+ "1 SEX M",
+ "1 DEAT",
+ "2 DATE 31 DEC 2000",
+ "2 PLAC Lastplace",
+ "2 CAUS Old age",
+ "2 LATI 12.3456",
+ "2 LONG -65.4321"
+ )
+ temp_file <- tempfile(fileext = ".ged")
+ writeLines(gedcom_content, temp_file)
+
+ df <- readGedcom.alpha(temp_file, verbose = TRUE)
+
+ expect_true("death_date" %in% colnames(df))
+ expect_true("death_place" %in% colnames(df))
+ expect_true("death_caus" %in% colnames(df))
+ expect_true("death_lat" %in% colnames(df))
+ expect_true("death_long" %in% colnames(df))
+
+ expect_equal(df$death_date[1], "31 DEC 2000")
+ expect_equal(df$death_place[1], "Lastplace")
+ expect_equal(df$death_caus[1], "Old age")
+ expect_equal(df$death_lat[1], "12.3456")
+ expect_equal(df$death_long[1], "-65.4321")
+
+ unlink(temp_file)
+})
+
+test_that("readGedcom.alpha handles incomplete individual records gracefully", {
+ # Test that an individual record missing a NAME line is handled without error.
+ gedcom_content <- c(
+ "0 @I1@ INDI",
+ "1 SEX M"
+ # No NAME or BIRT information.
+ )
+ temp_file <- tempfile(fileext = ".ged")
+ writeLines(gedcom_content, temp_file)
+
+ df <- readGedcom.alpha(temp_file, verbose = TRUE)
+
+ # Expect one record with missing name fields.
+ expect_equal(nrow(df), 1)
+ expect_true(is.null(df$name[1]))
+
+ unlink(temp_file)
+})
From 2541415bb113667cb2b7cedbd0a1e881a2077bfc Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Mon, 14 Apr 2025 19:46:46 -0400
Subject: [PATCH 25/35] Update readGedcom.R
---
R/readGedcom.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/R/readGedcom.R b/R/readGedcom.R
index f271632c..8d800a66 100644
--- a/R/readGedcom.R
+++ b/R/readGedcom.R
@@ -48,7 +48,7 @@
#' - `FAMC`: ID(s) of the family where the individual is a child
#' - `FAMS`: ID(s) of the family where the individual is a spouse
#' @export
-readGedcom <- function(file_path,
+readGedcom.legacy <- function(file_path,
verbose = FALSE,
add_parents = TRUE,
remove_empty_cols = TRUE,
From 65affa3dd2f09262798d3632cf5b1b591c5f5b04 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Mon, 14 Apr 2025 19:51:32 -0400
Subject: [PATCH 26/35] rename
---
R/{readGedcom.R => readGedcomlegacy.R} | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename R/{readGedcom.R => readGedcomlegacy.R} (100%)
diff --git a/R/readGedcom.R b/R/readGedcomlegacy.R
similarity index 100%
rename from R/readGedcom.R
rename to R/readGedcomlegacy.R
From 1ee34bab6060cf75177645a8a75edc9e70082526 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Mon, 14 Apr 2025 19:51:58 -0400
Subject: [PATCH 27/35] rename
---
R/{readGedcom_alpha.R => readGedcom.R} | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename R/{readGedcom_alpha.R => readGedcom.R} (100%)
diff --git a/R/readGedcom_alpha.R b/R/readGedcom.R
similarity index 100%
rename from R/readGedcom_alpha.R
rename to R/readGedcom.R
From 294186ba69f2a6c1d3e23e9af4792071ee48d527 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Mon, 14 Apr 2025 19:59:28 -0400
Subject: [PATCH 28/35] Update readGedcomlegacy.R
---
R/readGedcomlegacy.R | 84 ++++++++++++++++++++------------------------
1 file changed, 39 insertions(+), 45 deletions(-)
diff --git a/R/readGedcomlegacy.R b/R/readGedcomlegacy.R
index 8d800a66..b8697908 100644
--- a/R/readGedcomlegacy.R
+++ b/R/readGedcomlegacy.R
@@ -47,7 +47,7 @@
#' - `attribute_title`: Title of the individual
#' - `FAMC`: ID(s) of the family where the individual is a child
#' - `FAMS`: ID(s) of the family where the individual is a spouse
-#' @export
+#' @internal
readGedcom.legacy <- function(file_path,
verbose = FALSE,
add_parents = TRUE,
@@ -69,7 +69,7 @@ readGedcom.legacy <- function(file_path,
}
# Count the number of rows containing specific patterns
- num_rows <- countPatternRows(file)
+ num_rows <- countPatternRows.legacy(file)
# List of variables to initialize
var_names <- list(
@@ -128,51 +128,51 @@ readGedcom.legacy <- function(file_path,
# names
if (num_rows$num_name_rows > 0 && grepl(" NAME", tmpv)) {
- vars$name <- extract_info(tmpv, "NAME")
+ vars$name <- extract_info.legacy(tmpv, "NAME")
vars$name_given <- stringr::str_extract(vars$name, ".*(?= /)")
vars$name_surn <- stringr::str_extract(vars$name, "(?<=/).*(?=/)")
vars$name <- stringr::str_squish(stringr::str_replace(vars$name, "/", " "))
next
}
# PERSONAL_NAME_PIECES := NAME | NPFX | GIVN | NICK | SPFX | SURN | NSFX
- result <- process_tag("GIVN", "name_given_pieces", num_rows, tmpv, vars)
+ result <- process_tag.legacy("GIVN", "name_given_pieces", num_rows, tmpv, vars)
vars <- result$vars
if (result$matched) next
# npfx := Name Prefix
- result <- process_tag("NPFX", "name_npfx", num_rows, tmpv, vars)
+ result <- process_tag.legacy("NPFX", "name_npfx", num_rows, tmpv, vars)
vars <- result$vars
if (result$matched) next
# NICK := Nickname
- result <- process_tag("NICK", "name_nick", num_rows, tmpv, vars)
+ result <- process_tag.legacy("NICK", "name_nick", num_rows, tmpv, vars)
vars <- result$vars
if (result$matched) next
# surn := Surname
- result <- process_tag("SURN", "name_surn_pieces", num_rows, tmpv, vars)
+ result <- process_tag.legacy("SURN", "name_surn_pieces", num_rows, tmpv, vars)
vars <- result$vars
if (result$matched) next
# nsfx := Name suffix
- result <- process_tag("NSFX", "name_nsfx", num_rows, tmpv, vars)
+ result <- process_tag.legacy("NSFX", "name_nsfx", num_rows, tmpv, vars)
vars <- result$vars
if (result$matched) next
- result <- process_tag("_MARNM", "name_marriedsurn", num_rows, tmpv, vars)
+ result <- process_tag.legacy("_MARNM", "name_marriedsurn", num_rows, tmpv, vars)
vars <- result$vars
if (result$matched) next
# Birth event related information
if (num_rows$num_birt_rows > 0 && grepl(" BIRT", tmpv)) {
if (num_rows$num_date_rows > 0 && i + 1 <= file_length) {
- vars$birth_date <- extract_info(file[1][[1]][[i + 1]], "DATE")
+ vars$birth_date <- extract_info.legacy(file[1][[1]][[i + 1]], "DATE")
if (num_rows$num_plac_rows > 0 && i + 2 <= file_length) {
- vars$birth_place <- extract_info(file[1][[1]][[i + 2]], "PLAC")
+ vars$birth_place <- extract_info.legacy(file[1][[1]][[i + 2]], "PLAC")
if (num_rows$num_lati_rows > 0 && i + 4 <= file_length) {
- vars$birth_lat <- extract_info(file[1][[1]][[i + 4]], "LATI")
+ vars$birth_lat <- extract_info.legacy(file[1][[1]][[i + 4]], "LATI")
if (num_rows$num_long_rows > 0 && i + 5 <= file_length) {
- vars$birth_long <- extract_info(file[1][[1]][[i + 5]], "LONG")
+ vars$birth_long <- extract_info.legacy(file[1][[1]][[i + 5]], "LONG")
}
}
}
@@ -184,15 +184,15 @@ readGedcom.legacy <- function(file_path,
# the ifs are nested so that there is no need to check if you've already run out of
if (num_rows$num_deat_rows > 0 && grepl(" DEAT", tmpv)) {
if (num_rows$num_date_rows > 0 && i + 1 <= file_length) {
- vars$death_date <- extract_info(file[1][[1]][[i + 1]], "DATE")
+ vars$death_date <- extract_info.legacy(file[1][[1]][[i + 1]], "DATE")
if (num_rows$num_plac_rows > 0 && i + 2 <= file_length) {
- vars$death_place <- extract_info(file[1][[1]][[i + 2]], "PLAC")
+ vars$death_place <- extract_info.legacy(file[1][[1]][[i + 2]], "PLAC")
if (num_rows$num_caus_rows > 0 && i + 3 <= file_length) {
- vars$death_caus <- extract_info(file[1][[1]][[i + 3]], "CAUS")
+ vars$death_caus <- extract_info.legacy(file[1][[1]][[i + 3]], "CAUS")
if (num_rows$num_lati_rows > 0 && i + 4 <= file_length) {
- vars$death_lat <- extract_info(file[1][[1]][[i + 4]], "LATI")
+ vars$death_lat <- extract_info.legacy(file[1][[1]][[i + 4]], "LATI")
if (num_rows$num_long_rows > 0 && i + 5 <= file_length) {
- vars$death_long <- extract_info(file[1][[1]][[i + 5]], "LONG")
+ vars$death_long <- extract_info.legacy(file[1][[1]][[i + 5]], "LONG")
}
}
}
@@ -258,7 +258,7 @@ readGedcom.legacy <- function(file_path,
# g7:INDI-TITL A formal designation used by an individual in connection with positions of royalty or other social status, such as Grand Duke.
c("TITL", "attribute_title")
)) {
- result <- process_tag(tag_field[1], tag_field[2], num_rows, tmpv, vars)
+ result <- process_tag.legacy(tag_field[1], tag_field[2], num_rows, tmpv, vars)
vars <- result$vars
if (result$matched) next
}
@@ -266,7 +266,7 @@ readGedcom.legacy <- function(file_path,
# relationship data
# g7:INDI-FAMC
## The family in which an individual appears as a child. It is also used with a g7:FAMC-STAT substructure to show individuals who are not children of the family. See FAMILY_RECORD for more details.
- result <- process_tag("FAMC", "FAMC", num_rows, tmpv, vars,
+ result <- process_tag.legacy("FAMC", "FAMC", num_rows, tmpv, vars,
extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)"),
mode = "append"
)
@@ -275,7 +275,7 @@ readGedcom.legacy <- function(file_path,
# FAMS (Family spouse) g7:FAMS
# The family in which an individual appears as a partner. See FAMILY_RECORD for more details.
- result <- process_tag("FAMS", "FAMS", num_rows, tmpv, vars,
+ result <- process_tag.legacy("FAMS", "FAMS", num_rows, tmpv, vars,
extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)"),
mode = "append"
)
@@ -307,7 +307,7 @@ readGedcom.legacy <- function(file_path,
print("Post-processing data frame")
}
# Remove the first row (empty)
-df_temp <- postProcessGedcom(
+df_temp <- postProcessGedcom.legacy(
df_temp = df_temp,
remove_empty_cols = remove_empty_cols,
combine_cols = combine_cols,
@@ -323,11 +323,11 @@ df_temp <- postProcessGedcom(
#' Post-process GEDCOM Data Frame
#'
-#' @inheritParams readGedcom
-#' @inheritParams mapFAMS2parents
+#' @inheritParams readGedcom.legacy
+#' @inheritParams mapFAMS2parents.legacy
#' @return A data frame with processed information.
-postProcessGedcom <- function(df_temp,
+postProcessGedcom.legacy <- function(df_temp,
remove_empty_cols = TRUE,
combine_cols = TRUE,
add_parents = TRUE,
@@ -339,11 +339,11 @@ postProcessGedcom <- function(df_temp,
if (verbose) {
print("Processing parents")
}
- df_temp <- processParents(df_temp, datasource = "gedcom")
+ df_temp <- processParents.legacy(df_temp, datasource = "gedcom")
}
if (combine_cols) {
- df_temp <- collapseNames(verbose = verbose, df_temp = df_temp)
+ df_temp <- collapseNames.legacy(verbose = verbose, df_temp = df_temp)
}
if (remove_empty_cols) {
@@ -374,7 +374,7 @@ return(df_temp)
#' @return A list mapping family IDs to parent IDs.
#' @keywords internal
#'
-mapFAMS2parents <- function(df_temp) {
+mapFAMS2parents.legacy <- function(df_temp) {
if (!all(c("FAMS", "sex") %in% colnames(df_temp))) {
warning("The data frame does not contain the necessary columns (FAMS, sex)")
return(NULL)
@@ -413,7 +413,7 @@ mapFAMS2parents <- function(df_temp) {
#' @param family_to_parents A list mapping family IDs to parent IDs.
#' @return A data frame with added momID and dad_ID columns.
#' @keywords internal
-mapFAMC2parents <- function(df_temp, family_to_parents) {
+mapFAMC2parents.legacy <- function(df_temp, family_to_parents) {
df_temp$momID <- NA_character_
df_temp$dadID <- NA_character_
for (i in 1:nrow(df_temp)) {
@@ -441,7 +441,7 @@ mapFAMC2parents <- function(df_temp, family_to_parents) {
#' @param df_temp A data frame containing information about individuals.
#' @return A data frame with added momID and dadID columns.
#' @keywords internal
-processParents <- function(df_temp, datasource) {
+processParents.legacy <- function(df_temp, datasource) {
# Ensure required columns are present
if (datasource == "gedcom") {
required_cols <- c("FAMC", "sex", "FAMS")
@@ -457,11 +457,11 @@ processParents <- function(df_temp, datasource) {
return(df_temp)
}
- family_to_parents <- mapFAMS2parents(df_temp)
+ family_to_parents <- mapFAMS2parents.legacy(df_temp)
if (is.null(family_to_parents) || length(family_to_parents) == 0) {
return(df_temp)
}
- df_temp <- mapFAMC2parents(df_temp, family_to_parents)
+ df_temp <- mapFAMC2parents.legacy(df_temp, family_to_parents)
return(df_temp)
}
@@ -474,7 +474,7 @@ processParents <- function(df_temp, datasource) {
#' @param type A character string representing the type of information to extract.
#' @return A character string with the extracted information.
#' @keywords internal
-extract_info <- function(line, type) {
+extract_info.legacy <- function(line, type) {
stringr::str_squish(stringr::str_extract(line, paste0("(?<=", type, " ).+")))
}
@@ -509,7 +509,7 @@ combine_columns <- function(col1, col2) {
#' @return A list with the number of rows containing each pattern.
#' @keywords internal
#'
-countPatternRows <- function(file) {
+countPatternRows.legacy <- function(file) {
# Count the number of rows containing specific patterns
pattern_counts <- sapply(
c(
@@ -569,14 +569,14 @@ countPatternRows <- function(file) {
#' @return A list with updated `vars` and a `matched` flag.
#' @keywords internal
#'
-process_tag <- function(tag, field_name, pattern_rows, line, vars,
+process_tag.legacy <- function(tag, field_name, pattern_rows, line, vars,
extractor = NULL, mode = "replace") {
count_name <- paste0("num_", tolower(tag), "_rows")
matched <- FALSE
if (!is.null(pattern_rows[[count_name]]) &&
pattern_rows[[count_name]] > 0 &&
grepl(paste0(" ", tag), line)) {
- value <- if (is.null(extractor)) extract_info(line, tag) else extractor(line)
+ value <- if (is.null(extractor)) extract_info.legacy(line, tag) else extractor(line)
if (mode == "append" && !is.na(vars[[field_name]])) {
vars[[field_name]] <- paste0(vars[[field_name]], ", ", value)
@@ -593,10 +593,10 @@ process_tag <- function(tag, field_name, pattern_rows, line, vars,
#'
#' This function combines the `name_given` and `name_given_pieces` columns in a data frame.
#'
-#' @inheritParams readGedcom
+#' @inheritParams readGedcom.legacy
#' @param df_temp A data frame containing the columns to be combined.
-
-collapseNames <- function(verbose, df_temp) {
+#' @keywords internal
+collapseNames.legacy <- function(verbose, df_temp) {
if (verbose) {
print("Combining Duplicate Columns")
}
@@ -622,10 +622,4 @@ collapseNames <- function(verbose, df_temp) {
}
return(df_temp)
}
-#' @rdname readGedcom
-#' @export
-readGed <- readGedcom
-#' @rdname readGedcom
-#' @export
-readgedcom <- readGedcom
From 8244984df4aead2370965479be4bc3ea7afedf7f Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Mon, 14 Apr 2025 20:05:14 -0400
Subject: [PATCH 29/35] updating tests
---
tests/testthat/test-readPedigrees.R | 73 ++++---
tests/testthat/test-readPedigrees_alpha.R | 245 +++-------------------
2 files changed, 70 insertions(+), 248 deletions(-)
diff --git a/tests/testthat/test-readPedigrees.R b/tests/testthat/test-readPedigrees.R
index a6d6bdd0..b48bf74a 100644
--- a/tests/testthat/test-readPedigrees.R
+++ b/tests/testthat/test-readPedigrees.R
@@ -180,44 +180,55 @@ test_that("if file does not exist, readGedcom throws an error", {
-# readWikifamilytree
-
-test_that("readWikifamilytree reads a string correctly", {
- # Create a temporary WikiFamilyTree file for testing
- # Example usage
- family_tree_text <- "{{familytree/start |summary=I have a brother Joe and a little sister: my mom married my dad, and my dad's parents were Grandma and Grandpa; they had another child, Aunt Daisy.}}
-{{familytree | | | | GMa |~|y|~| GPa | | GMa=Gladys|GPa=Sydney}}
-{{familytree | | | | | | | |)|-|-|-|.| }}
-{{familytree | | | MOM |y| DAD | |DAISY| MOM=Mom|DAD=Dad|DAISY=[[Daisy Duke]]}}
-{{familytree | |,|-|-|-|+|-|-|-|.| | | }}
-{{familytree | JOE | | ME | | SIS | | | JOE=My brother Joe|ME='''Me!'''|SIS=My little sister}}
-{{familytree/end}}"
+test_that("readGedcom parses death event correctly", {
+ # Test that a GEDCOM file with a death event is parsed correctly.
+ gedcom_content <- c(
+ "0 @I1@ INDI",
+ "1 NAME John /Doe/",
+ "1 SEX M",
+ "1 DEAT",
+ "2 DATE 31 DEC 2000",
+ "2 PLAC Lastplace",
+ "2 CAUS Old age",
+ "2 LATI 12.3456",
+ "2 LONG -65.4321"
+ )
+ temp_file <- tempfile(fileext = ".ged")
+ writeLines(gedcom_content, temp_file)
- temp_file <- tempfile(fileext = ".txt")
- writeLines(family_tree_text, temp_file)
+ df <- readGedcom(temp_file, verbose = TRUE)
+ expect_true("death_date" %in% colnames(df))
+ expect_true("death_place" %in% colnames(df))
+ expect_true("death_caus" %in% colnames(df))
+ expect_true("death_lat" %in% colnames(df))
+ expect_true("death_long" %in% colnames(df))
- result <- readWikifamilytree(text = family_tree_text)
- result2 <- readWikifamilytree(file_path = temp_file)
+ expect_equal(df$death_date[1], "31 DEC 2000")
+ expect_equal(df$death_place[1], "Lastplace")
+ expect_equal(df$death_caus[1], "Old age")
+ expect_equal(df$death_lat[1], "12.3456")
+ expect_equal(df$death_long[1], "-65.4321")
- expect_equal(
- result$summary,
- "I have a brother Joe and a little sister: my mom married my dad, and my dad's parents were Grandma and Grandpa; they had another child, Aunt Daisy."
- )
+ unlink(temp_file)
+})
- expect_equal(
- result2$summary,
- "I have a brother Joe and a little sister: my mom married my dad, and my dad's parents were Grandma and Grandpa; they had another child, Aunt Daisy."
+test_that("readGedcom handles incomplete individual records gracefully", {
+ # Test that an individual record missing a NAME line is handled without error.
+ gedcom_content <- c(
+ "0 @I1@ INDI",
+ "1 SEX M"
+ # No NAME or BIRT information.
)
-})
+ temp_file <- tempfile(fileext = ".ged")
+ writeLines(gedcom_content, temp_file)
+ df <- readGedcom(temp_file, verbose = TRUE)
-# read E:/Dropbox/Lab/Research/Projects/2024/BGMiscJoss/BGmisc_main/data-raw/Targaryen tree Dance.txt
+ # Expect one record with missing name fields.
+ expect_equal(nrow(df), 1)
+ expect_true(is.null(df$name[1]))
-# test_that("readWikifamilytree reads a file correctly", {
-# Create a temporary WikiFamilyTree file for testing
-# Example usage
-# family_tree_file_path <- "data-raw/Targaryen tree Dance.txt" # system.file("extdata", "Targaryen tree Dance.txt", package = "BGmisc")
+ unlink(temp_file)
+})
-# result <- readWikifamilytree(file_path=family_tree_file_path)
-# })
diff --git a/tests/testthat/test-readPedigrees_alpha.R b/tests/testthat/test-readPedigrees_alpha.R
index 4a5a4e98..d73e5810 100644
--- a/tests/testthat/test-readPedigrees_alpha.R
+++ b/tests/testthat/test-readPedigrees_alpha.R
@@ -1,231 +1,42 @@
-test_that("readGedcom.alpha reads and parses a GEDCOM file correctly", {
- # Create a temporary GEDCOM file for testing
- gedcom_content <- c(
- "0 HEAD",
- "1 GEDC",
- "2 VERS 5.5",
- "2 FORM LINEAGE-LINKED",
- "1 CHAR UTF-8",
- "1 LANG English",
- "0 @I1@ INDI",
- "1 NAME John /Doe/",
- "1 SEX M",
- "1 BIRT",
- "2 DATE 1 JAN 1900",
- "2 PLAC Someplace",
- "0 @I2@ INDI",
- "1 NAME Jane /Smith/",
- "1 SEX F",
- "1 BIRT",
- "2 DATE 2 FEB 1910",
- "2 PLAC Anotherplace",
- "1 NCHI 2"
- )
- temp_file <- tempfile(fileext = ".ged")
- writeLines(gedcom_content, temp_file)
-
- # Call readGedcom.alpha
- df <- readGedcom.alpha(temp_file, verbose = TRUE, skinny = FALSE)
- # note to self, the code is not reading in the 2nd person. and is also not reading in the birth date and place
- # Check that the data frame has the expected structure
- expect_true("id" %in% colnames(df))
- expect_true("name_given" %in% colnames(df))
- expect_true("name_surn" %in% colnames(df))
- expect_true("sex" %in% colnames(df))
- expect_true("birth_date" %in% colnames(df))
- expect_true("birth_place" %in% colnames(df))
- # Check the contents of the data frame
- expect_equal(nrow(df), 2)
- expect_equal(df$name_given[1], "John")
- expect_equal(df$name_surn[1], "Doe")
- expect_equal(df$sex[1], "M")
- expect_equal(df$birth_date[1], "1 JAN 1900")
- expect_equal(df$birth_place[1], "Someplace")
- expect_equal(df$name_given[2], "Jane")
- expect_equal(df$name_surn[2], "Smith")
- expect_equal(df$sex[2], "F")
- expect_equal(df$birth_date[2], "2 FEB 1910")
- expect_equal(df$birth_place[2], "Anotherplace")
+# readWikifamilytree
- # Clean up temporary file
- unlink(temp_file)
-})
-
-test_that("readGedcom.alpha combines duplicate columns correctly", {
- # Create a temporary GEDCOM file for testing
- gedcom_content <- c(
- "0 @I1@ INDI",
- "1 NAME John /Doe/",
- "1 GIVN John",
- "1 SEX M",
- "0 @I2@ INDI",
- "1 NAME Jane /Smith/",
- "1 GIVN Jane",
- "1 SEX F"
- )
- temp_file <- tempfile(fileext = ".ged")
- writeLines(gedcom_content, temp_file)
+test_that("readWikifamilytree reads a string correctly", {
+ # Create a temporary WikiFamilyTree file for testing
+ # Example usage
+ family_tree_text <- "{{familytree/start |summary=I have a brother Joe and a little sister: my mom married my dad, and my dad's parents were Grandma and Grandpa; they had another child, Aunt Daisy.}}
+{{familytree | | | | GMa |~|y|~| GPa | | GMa=Gladys|GPa=Sydney}}
+{{familytree | | | | | | | |)|-|-|-|.| }}
+{{familytree | | | MOM |y| DAD | |DAISY| MOM=Mom|DAD=Dad|DAISY=[[Daisy Duke]]}}
+{{familytree | |,|-|-|-|+|-|-|-|.| | | }}
+{{familytree | JOE | | ME | | SIS | | | JOE=My brother Joe|ME='''Me!'''|SIS=My little sister}}
+{{familytree/end}}"
- # Call readGedcom.alpha with combine_cols = TRUE
- df <- readGedcom.alpha(temp_file, verbose = TRUE, combine_cols = TRUE)
+ temp_file <- tempfile(fileext = ".txt")
+ writeLines(family_tree_text, temp_file)
- # Check that the data frame has the expected structure
- expect_true("name_given" %in% colnames(df))
- expect_false("name_given_pieces" %in% colnames(df))
- # Check the contents of the data frame
- expect_equal(nrow(df), 2)
- expect_equal(df$name_given[1], "John")
- expect_equal(df$name_given[2], "Jane")
+ result <- readWikifamilytree(text = family_tree_text)
+ result2 <- readWikifamilytree(file_path = temp_file)
- # Clean up temporary file
- unlink(temp_file)
-})
-
-test_that("readGedcom.alpha removes empty columns correctly", {
- # Create a temporary GEDCOM file for testing
- gedcom_content <- c(
- "0 @I1@ INDI",
- "1 NAME John /Doe/",
- "1 SEX M"
+ expect_equal(
+ result$summary,
+ "I have a brother Joe and a little sister: my mom married my dad, and my dad's parents were Grandma and Grandpa; they had another child, Aunt Daisy."
)
- temp_file <- tempfile(fileext = ".ged")
- writeLines(gedcom_content, temp_file)
-
- # Call readGedcom.alpha with remove_empty_cols = TRUE
- df <- readGedcom.alpha(temp_file, verbose = TRUE, remove_empty_cols = TRUE)
-
- # Check that empty columns are removed
- expect_false("birth_date" %in% colnames(df))
- expect_false("birth_place" %in% colnames(df))
- # Clean up temporary file
- unlink(temp_file)
-})
-
-test_that("readGedcom.alpha handles skinny option correctly", {
- # Create a temporary GEDCOM file for testing
- gedcom_content <- c(
- "0 @I1@ INDI",
- "1 NAME John /Doe/",
- "1 SEX M",
- "1 FAMC @F1@",
- "1 FAMS @F2@"
+ expect_equal(
+ result2$summary,
+ "I have a brother Joe and a little sister: my mom married my dad, and my dad's parents were Grandma and Grandpa; they had another child, Aunt Daisy."
)
- temp_file <- tempfile(fileext = ".ged")
- writeLines(gedcom_content, temp_file)
-
- # Call readGedcom.alpha with skinny = TRUE
- df <- readGedcom.alpha(temp_file, verbose = TRUE, skinny = TRUE)
-
- # Check that FAMC and FAMS columns are removed
- expect_false("FAMC" %in% colnames(df))
- expect_false("FAMS" %in% colnames(df))
-
- # Clean up temporary file
- unlink(temp_file)
})
-test_that("processParents.alpha adds momID and dadID correctly", {
- # Create a data frame for testing
- df_temp <- data.frame(
- id = c("I1", "I2", "I3"),
- sex = c("M", "F", "M"),
- FAMS = c("@F1@", "@F1@", NA),
- FAMC = c(NA, NA, "@F1@"),
- stringsAsFactors = FALSE
- )
-
- # Call processParents.alpha
- df_temp <- processParents.alpha(df_temp, datasource = "gedcom")
- # Check the structure of the data frame
- expect_true("momID" %in% colnames(df_temp))
- expect_true("dadID" %in% colnames(df_temp))
+# read E:/Dropbox/Lab/Research/Projects/2024/BGMiscJoss/BGmisc_main/data-raw/Targaryen tree Dance.txt
- # Check the contents of the data frame
- expect_equal(df_temp$momID[1], NA_character_)
- expect_equal(df_temp$dadID[1], NA_character_)
- expect_equal(df_temp$momID[2], NA_character_)
- expect_equal(df_temp$dadID[2], NA_character_)
- expect_equal(df_temp$momID[3], "I2")
- expect_equal(df_temp$dadID[3], "I1")
-
- # Create a more complex data frame for testing
- df_temp <- data.frame(
- id = c("I1", "I2", "I3", "I4", "I5"),
- sex = c("M", "F", "M", "F", "M"),
- FAMS = c("@F1@", "@F1@", "@F2@", "@F2@", "@F3@"),
- FAMC = c(NA, NA, "@F1@", "@F1@", "@F2@"),
- stringsAsFactors = FALSE
- )
+# test_that("readWikifamilytree reads a file correctly", {
+# Create a temporary WikiFamilyTree file for testing
+# Example usage
+# family_tree_file_path <- "data-raw/Targaryen tree Dance.txt" # system.file("extdata", "Targaryen tree Dance.txt", package = "BGmisc")
- # Call processParents.alpha
- df_temp <- processParents.alpha(df_temp, datasource = "gedcom")
-
- # Check the contents of the data frame
- expect_equal(df_temp$momID[3], "I2")
- expect_equal(df_temp$dadID[3], "I1")
- expect_equal(df_temp$momID[4], "I2")
- expect_equal(df_temp$dadID[4], "I1")
- expect_equal(df_temp$momID[5], "I4")
- expect_equal(df_temp$dadID[5], "I3")
-})
-
-test_that("if file does not exist, readGedcom.alpha throws an error", {
- # Call readGedcom.alpha with a non-existent file
- expect_error(readGedcom.alpha("nonexistent.ged"))
-})
-
-test_that("readGedcom.alpha parses death event correctly", {
- # Test that a GEDCOM file with a death event is parsed correctly.
- gedcom_content <- c(
- "0 @I1@ INDI",
- "1 NAME John /Doe/",
- "1 SEX M",
- "1 DEAT",
- "2 DATE 31 DEC 2000",
- "2 PLAC Lastplace",
- "2 CAUS Old age",
- "2 LATI 12.3456",
- "2 LONG -65.4321"
- )
- temp_file <- tempfile(fileext = ".ged")
- writeLines(gedcom_content, temp_file)
-
- df <- readGedcom.alpha(temp_file, verbose = TRUE)
-
- expect_true("death_date" %in% colnames(df))
- expect_true("death_place" %in% colnames(df))
- expect_true("death_caus" %in% colnames(df))
- expect_true("death_lat" %in% colnames(df))
- expect_true("death_long" %in% colnames(df))
-
- expect_equal(df$death_date[1], "31 DEC 2000")
- expect_equal(df$death_place[1], "Lastplace")
- expect_equal(df$death_caus[1], "Old age")
- expect_equal(df$death_lat[1], "12.3456")
- expect_equal(df$death_long[1], "-65.4321")
-
- unlink(temp_file)
-})
-
-test_that("readGedcom.alpha handles incomplete individual records gracefully", {
- # Test that an individual record missing a NAME line is handled without error.
- gedcom_content <- c(
- "0 @I1@ INDI",
- "1 SEX M"
- # No NAME or BIRT information.
- )
- temp_file <- tempfile(fileext = ".ged")
- writeLines(gedcom_content, temp_file)
-
- df <- readGedcom.alpha(temp_file, verbose = TRUE)
-
- # Expect one record with missing name fields.
- expect_equal(nrow(df), 1)
- expect_true(is.null(df$name[1]))
-
- unlink(temp_file)
-})
+# result <- readWikifamilytree(file_path=family_tree_file_path)
+# })
From ff6947316bc14253ec2b2633904745f718058092 Mon Sep 17 00:00:00 2001
From: Mason Garrison
Date: Mon, 14 Apr 2025 20:07:02 -0400
Subject: [PATCH 30/35] rename
---
R/readGedcom.R | 98 +++++++++---------
R/readGedcomlegacy.R | 2 +-
data-raw/benchged.R | 31 ++++++
data/royal92.rda | Bin 69068 -> 67612 bytes
man/applyTagMappings.Rd | 27 +++++
man/collapseNames.Rd | 3 +
man/collapseNames.legacy.Rd | 17 +++
man/combine_columns.Rd | 8 +-
man/countPatternRows.Rd | 10 +-
man/countPatternRows.legacy.Rd | 18 ++++
man/extract_info.legacy.Rd | 20 ++++
man/initializeRecord.Rd | 17 +++
man/mapFAMC2parents.legacy.Rd | 21 ++++
man/mapFAMS2parents.Rd | 10 +-
man/mapFAMS2parents.legacy.Rd | 18 ++++
man/parseIndividualBlock.Rd | 24 +++++
man/parseNameLine.Rd | 19 ++++
man/postProcessGedcom.Rd | 17 +--
man/postProcessGedcom.legacy.Rd | 34 ++++++
man/processEventLine.Rd | 27 +++++
man/processParents.Rd | 11 +-
man/processParents.legacy.Rd | 18 ++++
man/process_tag.legacy.Rd | 35 +++++++
man/readGedcom.Rd | 1 -
man/readGedcom.legacy.Rd | 78 ++++++++++++++
man/splitIndividuals.Rd | 20 ++++
...dPedigrees_alpha.R => test-readWikiTree.R} | 0
27 files changed, 509 insertions(+), 75 deletions(-)
create mode 100644 data-raw/benchged.R
create mode 100644 man/applyTagMappings.Rd
create mode 100644 man/collapseNames.legacy.Rd
create mode 100644 man/countPatternRows.legacy.Rd
create mode 100644 man/extract_info.legacy.Rd
create mode 100644 man/initializeRecord.Rd
create mode 100644 man/mapFAMC2parents.legacy.Rd
create mode 100644 man/mapFAMS2parents.legacy.Rd
create mode 100644 man/parseIndividualBlock.Rd
create mode 100644 man/parseNameLine.Rd
create mode 100644 man/postProcessGedcom.legacy.Rd
create mode 100644 man/processEventLine.Rd
create mode 100644 man/processParents.legacy.Rd
create mode 100644 man/process_tag.legacy.Rd
create mode 100644 man/readGedcom.legacy.Rd
create mode 100644 man/splitIndividuals.Rd
rename tests/testthat/{test-readPedigrees_alpha.R => test-readWikiTree.R} (100%)
diff --git a/R/readGedcom.R b/R/readGedcom.R
index 3cde78dc..109377e1 100644
--- a/R/readGedcom.R
+++ b/R/readGedcom.R
@@ -47,7 +47,7 @@
#' - `FAMC`: ID(s) of the family where the individual is a child
#' - `FAMS`: ID(s) of the family where the individual is a spouse
#' @export
-readGedcom.alpha <- function(file_path,
+readGedcom <- function(file_path,
verbose = FALSE,
add_parents = TRUE,
remove_empty_cols = TRUE,
@@ -67,7 +67,7 @@ readGedcom.alpha <- function(file_path,
if (verbose) message("File is ", total_lines, " lines long")
# Count pattern occurrences (pattern_rows remains used in subfunctions)
- pattern_rows <- countPatternRows.alpha(data.frame(X1 = lines))
+ pattern_rows <- countPatternRows(data.frame(X1 = lines))
# List of variables to initialize
all_var_names <- unlist(list(
@@ -85,10 +85,10 @@ readGedcom.alpha <- function(file_path,
), use.names = FALSE)
# Split the file into blocks; each block corresponds to one individual.
- blocks <- splitIndividuals.alpha(lines, verbose)
+ blocks <- splitIndividuals(lines, verbose)
# Parse each individual block into a record (a named list)
- records <- lapply(blocks, parseIndividualBlock.alpha,
+ records <- lapply(blocks, parseIndividualBlock,
pattern_rows = pattern_rows,
all_var_names = all_var_names, verbose = verbose)
@@ -110,7 +110,7 @@ readGedcom.alpha <- function(file_path,
# Run post-processing if requested.
if (post_process) {
if (verbose) message("Post-processing data frame")
- df_temp <- postProcessGedcom.alpha(
+ df_temp <- postProcessGedcom(
df_temp = df_temp,
remove_empty_cols = remove_empty_cols,
combine_cols = combine_cols,
@@ -132,7 +132,7 @@ readGedcom.alpha <- function(file_path,
#' @param lines A character vector of lines from the GEDCOM file.
#' @param verbose Logical indicating whether to output progress messages.
#' @return A list of character vectors, each representing one individual.
-splitIndividuals.alpha <- function(lines, verbose = FALSE) {
+splitIndividuals <- function(lines, verbose = FALSE) {
indi_idx <- grep("@ INDI", lines)
if (length(indi_idx) == 0) return(list())
@@ -153,7 +153,7 @@ splitIndividuals.alpha <- function(lines, verbose = FALSE) {
#'
#' @param all_var_names A character vector of variable names.
#' @return A named list representing an empty individual record.
-initializeRecord.alpha <- function(all_var_names) {
+initializeRecord <- function(all_var_names) {
setNames(as.list(rep(NA_character_, length(all_var_names))), all_var_names)
}
@@ -167,8 +167,8 @@ initializeRecord.alpha <- function(all_var_names) {
#' @param verbose Logical indicating whether to print progress messages.
#' @return A named list representing the parsed record for the individual, or NULL if no ID is found.
#' @keywords internal
-parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbose = FALSE) {
- record <- initializeRecord.alpha(all_var_names)
+parseIndividualBlock <- function(block, pattern_rows, all_var_names, verbose = FALSE) {
+ record <- initializeRecord(all_var_names)
n_lines <- length(block)
# Loop through the block by index so that we can look ahead for event details.
@@ -185,19 +185,19 @@ parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbo
# Special processing for full name using " NAME" tag.
if (grepl(" NAME", line) && pattern_rows$num_name_rows > 0) {
- record <- parseNameLine.alpha(line, record)
+ record <- parseNameLine(line, record)
i <- i + 1
next
}
# Process birth and death events by consuming multiple lines.
if (grepl(" BIRT", line) && pattern_rows$num_birt_rows > 0) {
- record <- processEventLine.alpha("birth", block, i, record, pattern_rows)
+ record <- processEventLine("birth", block, i, record, pattern_rows)
i <- i + 1 # Skip further processing of this line.
next
}
if (grepl(" DEAT", line) && pattern_rows$num_deat_rows > 0) {
- record <- processEventLine.alpha("death", block, i, record, pattern_rows)
+ record <- processEventLine("death", block, i, record, pattern_rows)
i <- i + 1
next
}
@@ -212,7 +212,7 @@ parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbo
list(tag = "NSFX", field = "name_nsfx", mode = "replace"),
list(tag = "_MARNM", field = "name_marriedsurn", mode = "replace")
)
- out <- applyTagMappings.alpha(line, record, pattern_rows, name_piece_mappings)
+ out <- applyTagMappings(line, record, pattern_rows, name_piece_mappings)
if (out$matched) { record <- out$record
i <- i + 1
next }
@@ -234,7 +234,7 @@ parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbo
list(tag = "SSN", field = "attribute_ssn", mode = "replace"),
list(tag = "TITL", field = "attribute_title", mode = "replace")
)
- out <- applyTagMappings.alpha(line, record, pattern_rows, attribute_mappings)
+ out <- applyTagMappings(line, record, pattern_rows, attribute_mappings)
if (out$matched) { record <- out$record
i <- i + 1
next }
@@ -246,7 +246,7 @@ parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbo
list(tag = "FAMS", field = "FAMS", mode = "append",
extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)"))
)
- out <- applyTagMappings.alpha(line, record, pattern_rows, relationship_mappings)
+ out <- applyTagMappings(line, record, pattern_rows, relationship_mappings)
if (out$matched) { record <- out$record
i <- i + 1
next }
@@ -267,8 +267,8 @@ parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbo
#' @param line A character string containing the name line.
#' @param record A named list representing the individual's record.
#' @return The updated record with parsed name information.
-parseNameLine.alpha <- function(line, record) {
- record$name <- extract_info.alpha(line, "NAME")
+parseNameLine <- function(line, record) {
+ record$name <- extract_info(line, "NAME")
record$name_given <- stringr::str_extract(record$name, ".*(?= /)")
record$name_surn <- stringr::str_extract(record$name, "(?<=/).*(?=/)")
record$name <- stringr::str_squish(stringr::str_replace(record$name, "/", " "))
@@ -287,19 +287,19 @@ parseNameLine.alpha <- function(line, record) {
#' @param pattern_rows A list with counts of GEDCOM tag occurrences.
#' @return The updated record with parsed event information.#
# For "death": expect DATE on line i+1, PLAC on i+2, CAUS on i+3, LATI on i+4, LONG on i+5.
-processEventLine.alpha <- function(event, block, i, record, pattern_rows) {
+processEventLine <- function(event, block, i, record, pattern_rows) {
n_lines <- length(block)
if (event == "birth") {
- if (i + 1 <= n_lines) record$birth_date <- extract_info.alpha(block[i+1], "DATE")
- if (i + 2 <= n_lines) record$birth_place <- extract_info.alpha(block[i+2], "PLAC")
- if (i + 4 <= n_lines) record$birth_lat <- extract_info.alpha(block[i+4], "LATI")
- if (i + 5 <= n_lines) record$birth_long <- extract_info.alpha(block[i+5], "LONG")
+ if (i + 1 <= n_lines) record$birth_date <- extract_info(block[i+1], "DATE")
+ if (i + 2 <= n_lines) record$birth_place <- extract_info(block[i+2], "PLAC")
+ if (i + 4 <= n_lines) record$birth_lat <- extract_info(block[i+4], "LATI")
+ if (i + 5 <= n_lines) record$birth_long <- extract_info(block[i+5], "LONG")
} else if (event == "death") {
- if (i + 1 <= n_lines) record$death_date <- extract_info.alpha(block[i+1], "DATE")
- if (i + 2 <= n_lines) record$death_place <- extract_info.alpha(block[i+2], "PLAC")
- if (i + 3 <= n_lines) record$death_caus <- extract_info.alpha(block[i+3], "CAUS")
- if (i + 4 <= n_lines) record$death_lat <- extract_info.alpha(block[i+4], "LATI")
- if (i + 5 <= n_lines) record$death_long <- extract_info.alpha(block[i+5], "LONG")
+ if (i + 1 <= n_lines) record$death_date <- extract_info(block[i+1], "DATE")
+ if (i + 2 <= n_lines) record$death_place <- extract_info(block[i+2], "PLAC")
+ if (i + 3 <= n_lines) record$death_caus <- extract_info(block[i+3], "CAUS")
+ if (i + 4 <= n_lines) record$death_lat <- extract_info(block[i+4], "LATI")
+ if (i + 5 <= n_lines) record$death_long <- extract_info(block[i+5], "LONG")
}
return(record)
}
@@ -317,10 +317,10 @@ processEventLine.alpha <- function(event, block, i, record, pattern_rows) {
#' - \code{mode}: either "replace" or "append",
#' - \code{extractor}: (optional) a custom extraction function.
#' @return A list with the updated record (\code{record}) and a logical flag (\code{matched}).
-applyTagMappings.alpha <- function(line, record, pattern_rows, tag_mappings) {
+applyTagMappings <- function(line, record, pattern_rows, tag_mappings) {
for (mapping in tag_mappings) {
extractor <- if (is.null(mapping$extractor)) NULL else mapping$extractor
- result <- process_tag.alpha(mapping$tag, mapping$field, pattern_rows, line, record,
+ result <- process_tag(mapping$tag, mapping$field, pattern_rows, line, record,
extractor = extractor, mode = mapping$mode)
record <- result$vars
if (result$matched) {
@@ -338,7 +338,7 @@ applyTagMappings.alpha <- function(line, record, pattern_rows, tag_mappings) {
#' @param type A character string representing the type of information to extract.
#' @return A character string with the extracted information.
#' @keywords internal
-extract_info.alpha <- function(line, type) {
+extract_info <- function(line, type) {
stringr::str_squish(stringr::str_extract(line, paste0("(?<=", type, " ).+")))
}
@@ -349,7 +349,7 @@ extract_info.alpha <- function(line, type) {
#'
#' @param file A data frame with a column \code{X1} containing GEDCOM lines.
#' @return A list with counts of specific GEDCOM tag occurrences.
-countPatternRows.alpha <- function(file) {
+countPatternRows <- function(file) {
pattern_counts <- sapply(
c(
"@ INDI", " NAME", " GIVN", " NPFX", " NICK", " SURN", " NSFX", " _MARNM",
@@ -407,7 +407,7 @@ countPatternRows.alpha <- function(file) {
#' @param vars The current list of variables to update.
#' @return A list with updated `vars` and a `matched` flag.
#' @keywords internal
-process_tag.alpha <- function(tag, field_name, pattern_rows, line, vars,
+process_tag <- function(tag, field_name, pattern_rows, line, vars,
extractor = NULL, mode = "replace") {
count_name <- paste0("num_", tolower(tag), "_rows")
matched <- FALSE
@@ -415,7 +415,7 @@ process_tag.alpha <- function(tag, field_name, pattern_rows, line, vars,
pattern_rows[[count_name]] > 0 &&
grepl(paste0(" ", tag), line)) {
value <- if (is.null(extractor)) {
- extract_info.alpha(line, tag)
+ extract_info(line, tag)
} else {
extractor(line)
}
@@ -441,7 +441,7 @@ process_tag.alpha <- function(tag, field_name, pattern_rows, line, vars,
#' @param skinny Logical indicating whether to slim down the data frame.
#' @param verbose Logical indicating whether to print progress messages.
#' @return The post-processed data frame.
-postProcessGedcom.alpha <- function(df_temp,
+postProcessGedcom <- function(df_temp,
remove_empty_cols = TRUE,
combine_cols = TRUE,
add_parents = TRUE,
@@ -449,10 +449,10 @@ postProcessGedcom.alpha <- function(df_temp,
verbose = FALSE) {
if (add_parents) {
if (verbose) message("Processing parents")
- df_temp <- processParents.alpha(df_temp, datasource = "gedcom")
+ df_temp <- processParents(df_temp, datasource = "gedcom")
}
if (combine_cols) {
- df_temp <- collapseNames.alpha(verbose = verbose, df_temp = df_temp)
+ df_temp <- collapseNames(verbose = verbose, df_temp = df_temp)
}
if (remove_empty_cols) {
if (verbose) message("Removing empty columns")
@@ -474,7 +474,7 @@ postProcessGedcom.alpha <- function(df_temp,
#' @param df_temp A data frame produced by \code{readGedcom()}.
#' @param datasource Character string indicating the data source ("gedcom" or "wiki").
#' @return The updated data frame with parent IDs added.
-processParents.alpha <- function(df_temp, datasource) {
+processParents <- function(df_temp, datasource) {
if (datasource == "gedcom") {
required_cols <- c("FAMC", "sex", "FAMS")
} else if (datasource == "wiki") {
@@ -487,11 +487,11 @@ processParents.alpha <- function(df_temp, datasource) {
warning("Missing necessary columns: ", paste(missing_cols, collapse = ", "))
return(df_temp)
}
- family_to_parents <- mapFAMS2parents.alpha(df_temp)
+ family_to_parents <- mapFAMS2parents(df_temp)
if (is.null(family_to_parents) || length(family_to_parents) == 0) {
return(df_temp)
}
- df_temp <- mapFAMC2parents.alpha(df_temp, family_to_parents)
+ df_temp <- mapFAMC2parents(df_temp, family_to_parents)
return(df_temp)
}
@@ -502,7 +502,7 @@ processParents.alpha <- function(df_temp, datasource) {
#'
#' @param df_temp A data frame produced by \code{readGedcom()}.
#' @return A list mapping family IDs to parent information.
-mapFAMS2parents.alpha <- function(df_temp) {
+mapFAMS2parents <- function(df_temp) {
if (!all(c("FAMS", "sex") %in% colnames(df_temp))) {
warning("The data frame does not contain the necessary columns (FAMS, sex)")
return(NULL)
@@ -541,7 +541,7 @@ mapFAMS2parents.alpha <- function(df_temp) {
#' @param family_to_parents A list mapping family IDs to parent IDs.
#' @return A data frame with added momID and dad_ID columns.
#' @keywords internal
-mapFAMC2parents.alpha <- function(df_temp, family_to_parents) {
+mapFAMC2parents <- function(df_temp, family_to_parents) {
df_temp$momID <- NA_character_
df_temp$dadID <- NA_character_
for (i in 1:nrow(df_temp)) {
@@ -569,17 +569,17 @@ mapFAMC2parents.alpha <- function(df_temp, family_to_parents) {
#' @inheritParams readGedcom
#' @param df_temp A data frame containing the columns to be combined.
#' @return A data frame with the combined columns.
-collapseNames.alpha <- function(verbose, df_temp) {
+collapseNames <- function(verbose, df_temp) {
if (verbose) message("Combining Duplicate Columns")
if (!all(is.na(df_temp$name_given_pieces)) | !all(is.na(df_temp$name_given))) {
- result <- combine_columns.alpha(df_temp$name_given, df_temp$name_given_pieces)
+ result <- combine_columns(df_temp$name_given, df_temp$name_given_pieces)
df_temp$name_given <- result$combined
if (!result$retain_col2) df_temp$name_given_pieces <- NULL
}
if (!all(is.na(df_temp$name_surn_pieces)) | !all(is.na(df_temp$name_surn))) {
- result <- combine_columns.alpha(df_temp$name_surn, df_temp$name_surn_pieces)
+ result <- combine_columns(df_temp$name_surn, df_temp$name_surn_pieces)
df_temp$name_surn <- result$combined
if (!result$retain_col2) df_temp$name_surn_pieces <- NULL
}
@@ -594,7 +594,7 @@ collapseNames.alpha <- function(verbose, df_temp) {
#' @return A list with the combined column and a flag indicating if the second column should be retained.
#' @keywords internal
# Helper function to check for conflicts and merge columns
-combine_columns.alpha <- function(col1, col2) {
+combine_columns <- function(col1, col2) {
col1_lower <- stringr::str_to_lower(col1)
col2_lower <- stringr::str_to_lower(col2)
conflicts <- !is.na(col1_lower) & !is.na(col2_lower) & col1_lower != col2_lower
@@ -608,9 +608,9 @@ combine_columns.alpha <- function(col1, col2) {
}
# --- Exported Aliases ---
-#' @rdname readGedcom.alpha
+#' @rdname readGedcom
#' @export
-readGed.alpha <- readGedcom.alpha
-#' @rdname readGedcom.alpha
+readGed <- readGedcom
+#' @rdname readGedcom
#' @export
-readgedcom.alpha <- readGedcom.alpha
+readgedcom <- readGedcom
diff --git a/R/readGedcomlegacy.R b/R/readGedcomlegacy.R
index b8697908..e7e04a82 100644
--- a/R/readGedcomlegacy.R
+++ b/R/readGedcomlegacy.R
@@ -47,7 +47,7 @@
#' - `attribute_title`: Title of the individual
#' - `FAMC`: ID(s) of the family where the individual is a child
#' - `FAMS`: ID(s) of the family where the individual is a spouse
-#' @internal
+#' @keywords internal
readGedcom.legacy <- function(file_path,
verbose = FALSE,
add_parents = TRUE,
diff --git a/data-raw/benchged.R b/data-raw/benchged.R
new file mode 100644
index 00000000..c0d0eef9
--- /dev/null
+++ b/data-raw/benchged.R
@@ -0,0 +1,31 @@
+library(microbenchmark)
+library(Matrix)
+# library(BGmisc)
+# data("hazard")
+library(tidyverse)
+
+
+
+# Run benchmarking for "loop" and "indexed" methods in ped2com()
+benchmark_results <- microbenchmark(
+ reg = {
+ readGedcom("data-raw/royal92.ged")
+ },
+ alpha = {
+ readGedcom.alpha("data-raw/royal92.ged")
+ },
+ times = 5 # Run each method 100 times
+)
+
+summary(benchmark_results)
+
+lm(benchmark_results$time ~ benchmark_results$expr) %>%
+ summary()
+# Print benchmark results
+print(benchmark_results)
+
+# Optional: Save results to CSV for later analysis
+write.csv(summary(benchmark_results),
+ "benchmark_results.csv",
+ row.names = FALSE
+)
diff --git a/data/royal92.rda b/data/royal92.rda
index 4a10b174c582e692825f6afe8e14ff1b069d9978..678261bd01b418c34dea9b0a8f61ed568ac576f1 100644
GIT binary patch
literal 67612
zcmV(tKvQ&2UKVgRpfkl*
zs)A-(1?mtq`wK1=P|2Vhnvh!g`SsYX4|fbsicZp6kJg_Fs9KxM*|3n!St4?ABQ
zGOXX2>_XHrw%M=CH`Ek{MrR(DqDw%&()~Q3JW3eYe?D=Xr2$bJxqRaiBfPdsiJf%>0sk++1HQ@nuK%m-~Bwj%ohQqqA9!X8kacjQf
z&RWSBfEO57e;WR6jpP96L*C&LMF3ME#u%vAz2#(l<(333V7N-)lH`y?t(J%9@yo4z
z4NQFcs)K+pPp)3Q7t(y+p6a)Ht8DiMN@B!THRwQyD9vK}KnE4avDkZWabxU#8<`uI
zgm$$x1Fzgq2@nu4ueSR6j$y%QUB&mQp{NRS*iRP!1ex@$m+vFHTRb$oH1N~~_(W9B1e~St5sZtJ
zGSFM`2f0Mu4{wuJIr@qZ-EZDxfYmML!lJoTXw3`>ISJMma2n0~=`p#8mM)xUPby(-
zao1Y%N44IvPZZL(A#7Th$!>%iVsD$=hWjT(8RM?f?*aouu;5oJX{5W24-ky+tLQx-
z9ZPKNUjRx+xLAUrbI4NYCWl$mMji6dkGI8RCp@8|^FmZJg3egn|D-O62BMXW>+v$C
zk1xnnnLwqX7SpQ_jQ|>12npb9?-YFfI
zRK17DV%gE!{S4m8Gh&tpJDHj{fV@$`uycOSX;{9Y>x%uL2}^n^%82`4X-JvNrHaB!
zSAa`+MqUi#mxpCyC5|wrpLR{)cp(c_2d{bayeKh}O*;j4{&CeRpKuE}C~rmA1e%~E
z5(6uJwQ>AiOyhY7>VHYNLi!N{j#v^Qe5->QV39+~FbSdzdV+GT#-ljR-35yw->FbA
zg<+b-{9<26Ybh{EbU0_0DEluFMhR7z9&%-=SFc?t`4c41Nf0+7mVaLjfX^_$Ic3S2
z8hGtilTF3*bf-r{v~N^L)j0zub74)*K=B0gk`QssSijzb<
zo*?lGIHj?Sm0{kqW&VzB&Rf6Z7Y;{9dy06pCh=MtII>PXtk-evj1?y}Gg#cEe~6o#
z!LLMG{IU1*OIzOHUelEZ3wWKTPe&?>$Q~%Ba{ca0AI5ej0c}1R*7{ZdCivy@th%aP55Yez`qWfB;=Sn@0^dMB
z>^((wik=eQRxXu(&LA_6gwcOm>29Qks>U*oEK^`)@Z6HlORJ8Mfq~Za_-ub<5h_Uq
z`k$HHUw2UCPSwxKb>Os|x$c_98OOBjDHxS&qM|14wUDaU3Y9_Z_^ijF3S!B>?ZqO%j6w
z!ow8&Jf*K`wKq@0K#9Ntz90N>_&voBd#-ROes2Hjf2sFf7{=AGhy{MCkDQ
zUZ;Jr%AV-ZB>*lsXzigabF!t;p+#!@^5*IQ%IF}=k2#uwbswxzGXNo!q_`Lp==;{)
z2Py%!Hsov#xsd{?7Xruq{wY0m-_IaGpQxszfEHiboO
z;146%Fo|)lt%fzuk=J+*_D34*@)ea}_rPF<`+EYW!ZsJL!vX7GSC6dMH}}?;&3_EE
zI3K0G*n5nt&tCs-f-uVP7=E&=aq6S54WG2@w$!9jp+7-)gWIq>5^F>pmmilX-P!{#8zzj`#E!2!tAIbVN;w9V+)
zhMGfIuO(%?2#>u8l~>SPC>vwGz$*Rq_VVNTF~sroo6w|!)xJm~9(YEixJ-QF^$7w#
z%8FjS?hvy4o%c*-H0y8jLu{+5U+Y&(GlBkbecR5wJ`qWt7e{*ez~T%ae{>wf7&x|Z
zPSQa}w?`2_?)wBs%B~4?&{{}%4-Jcm!nCem>B-==#Y5u@=XCKg)$JGk@WjvV)?(Gf
z49yoDMI%Ey+^wU05qaIiJ6*oG2EkUOK0xFEH)SBhh3EwWTys-V3Nnr&Y(V%CRk2PO
zVl%jmP%#eMlyPf9ojE+SAYq=fMB@l1**bkExUp&Tq_n%xQ>O!ajTtT^h(bsoOc@5!g;oDFe@LeIz@V9N?xT$BN(!B4Fr
zJg%1;4+;zOPYiT
zPi|q|r1nr<5Dw=1&y@(dJ)BrznkuX|3LxrkO%?w2G$e8|S9Rt86}{>xMg9wrse3YuD;a0jryH9i-i=@-v4S
z8P__;t-62_*oDMhIHVIi!hxMRH_&tToXiIAFz+U7e7MWDTpN
z;<(Jn*#pS=7S+f=BW_Pgc*RhGUYU)6i;G`Gs`NShkREM?l8Egq@0CXbD$3Eh82`;&
z!(o#5^2B8x`A4(Ar5zd&bM2_f<&py_0YMW@WNIr4%z%L*Q+8@x)xOW^F4`!GGPbX}
zhXHDEPL@=wWVKYG_mq*!&J+TPXG;1VD921)Xugb}q$qQ%u-cN|=HBRz*-fCKMJ*R=O%n&OY;WGb1wI99W&rvFLT{4c3dgipPxTbVw
zC&fV0UpOyD@AQIx4gcix+I5WWoZ>4B3!ePgzVx%Q%5K@LxdoM;WB8t~k9cobzwTY|
zP}e=t2-BKZFGdJ8kZs=a(T>|v&O}NFEXvG<{0>Q6AI~Q_VKhJg-uugG3&_!f>Dx#{%0I5qrQlqR7{wo*J$sfO=*ywylsT23_eX}9hxY);9!##Wqw_yl~sqgv#hUV#zaigQK
ztE^oMpL&R2e(x|ZMDVf+jxjV*2=|)+H1OU9bFJ*gBBulIKX^3$g1eJBMQnI>HQVNg
z0{oA*Nb!|3Ne>`vN$H-S`s!TX@8gq#8SomxvkvA*CBORQ=)j7u{V|8ICU(tdv4E@!
z+gD?a`#tcpd=35zl!1gvFX^7Q_~0*)U8a?>=5&nE`liY%J6mOp&jjN&a9IU8qvU#$
zZlCLpc$^vMfGeW)%7;L4TCqtQDrh>5onrlwp)?c!+OvCFHOL*35FVg9*kF1FtY{{x
zVw0;`MCjNo!G$hUK3Wk>F-w)Ng@cJov~4oMOgg?yXIb)<-*4-a&K%4%HEQ!7QaE^-H$Cy0%U?v3php7zG7IC8U
z)Ha0bB%>|!1Nw0c>q!e5g5L*6!7V#^Fckx3uIN&fLM1;^yj-?Ni)PzXA5B2Rp}adq
z&|#mcS@ol=5Q{W5ANB_0I${QOdOgx~MVG%*AQ6E12)Gn32or#nVtzwgFfsnh6}<2F
zSU{JuooRT&oa-q`e2_-)+j|oK0@;_Wo8};+1F;)MpDDSUCM*T@F0qrE8*RC->U4h%
zari(o+-Ob8(Hl$GS*Fv64-c`TNWn!m5$3_$z=fK&dz<@>$EQ?Tk}D~dts9+GycdBF
z67~b+*0}k7z2R(kStQc0OrZLTy;ResStq5w@kzgRAxAr(HZ?+J)!6AcT2`KHmP{Oo
zbfGXrq^%nBohqv(w0@Q$*Uye&6UqmKiPcYYm+XaUy~zEoQUvcC#&}%FhFRw=DUI}H
z49)zLV*d8}@=ZZYsU(nq;v
zm71Q5_TO@l0qwVZc;LgfsbJ`L9!TT+$VtVHx4mkiCo#IMImYlA7BREorP%h){S|T@
zgsR%$Dh6W#SLXCUXE9k%KN#Z`!gXAvuFY6sE
z9BH!b3eonY4FmIfMOckmE=|9%bdA@)7!DQKEl%t5RH@9z)V*2ZO+#N?TL7hT&{6fA
zbeg;k;-*Xi4PU%b6SJT^A}mHa%DJog6~73U-_8KvfkL@z7_#ie-YQnhw}5yytyV0c
z+kjk{@-SZk{cPBG)mGM{VX4!ht7{ny;$Jgh7LNCkF6gq+ibi0CfKL`Y&nS8pad5)w
zuXil0{YW#}{RNbS9OQ`vE%j{0SF5Xppv`{E0sCt*jA?xV!7w_)yrNVn$~x>P<$c=E
zJwNJDC!-{nGaH9R#T*zX<&fRn7)eeSlitdZV*5=BF^zUx6i09Ne5cbHC+9A!*t%VZ
zH1F-r!7#pD_w>skFyrD4`CLT*o!?^B9ZCx-tF8date7W6EiSOc%*yX$FN-(ObJVFD
zYYo5~8BNL8=(ngFALTrU&6t>&yCLU5hTaTv@@iNrx_|g7ggQ3E=#EkUz)Q?`9`u7&geL&PgWFdqCEWI-SLo&dzd+L>fApN2CJXwd~*6WYi)qaVRU
z4ZyvFv6VEs%OYE%It$ac+Y?YSiH{M)s=)^?J7L{@*g0Wmx$3EBV`O!}f+=t_w@R^_
z>bBMW+~9n8?~fkst{OCq=nl3hd&V!{7QYa7whD*_CEuM3VS#euzL~=9#6rFH6sy3q|F5THHl8Bgu)Ks4@5uza+4Pa-5wR
z&A>Pi<4`tmP8f0~bCNE4s$t3|8u-X8XNNf-NHw1MD%lKShw%xkL!FXJ!@ZAy@H*>3
zHi%J1R=Xc}maC-EEflISU}x!^f^IXmcwwHkuO%J8RwQ#K&(V`(AvljIR-o^gU&?tu
zZBvumb3xCaYT->#J34{5kylLCy(Q1;PQz*TUzR$*HE}GoKO|tn>Xjb!6R1yCt1r})
zs|z;r|7Bw0^~Udka2E>d9zdBYr!qCf^$_OF3SfvE$*hnaG_98vo*_i)@0nHENgn5-
zPCjmnXK`|2bxDt>HT5Bb#BkzhN{GBOa`6mJJKBe>h(ajVJrkG11`Eva1k)%Nvbn-<
zG)hD1!H=VdcM1MrCrptXu>=4-DeC>~MVBz7WnZW4vEfAUqn3hcZ+qh_8GSj)@o;d>
z42HsyOh{W5P7k(PLAXetK`RX1*psnw`TN%|mMDSuX@a?!gz
zf3dgMb^=AL`TR)4pScd4{V21@Q?)IVl*-eF3vO}~Zbiz{zBGJx3$f4i+HbWI`*E{b
zRY>G7ROWxeyqjS8on<}KR#iz|2alKRPhkscRI<){2fotsd|9{SQ(51pyP$INNJXX8
z<8iC^HF-&gF+Y%R85#Rlp4#icpYzZ(Bhux*6>C>O(_r8pEQ26fPg3t|HOfD}MwWf_
zSE&Oyc)NlSA1Q^~%IQcdF=$HBX^teo-@$Db93avsFmR9^S|F6%GxE#Y3^c4^Jy|YNQFkDF0
zDY->^;587m=x#3ywJ-N!b(eSO=qo4@=OewT`Y04QwNC(AUg^KA`=5_y-D5FhHOnm3
zVBnoJq{zf%>A6v^#-dOoBjcY@eo|Yg;AD-68xLordB<1A8#0R);X3(ndq6y6$I7;-8TyS!%c&kpZ<6@7Ijy%^25;uT=S`+&G;^lNy@T)C;%MjI7
zr@yUfUZx$Sr_Z=wg76>CO%vaf&o;9M_h23biYifqx8}CziY|$xR>NA?u5bhg8n|Nk
zL&b28T1-wD7z
zML5{xiz8-;44YZnX^jQ{kB(8))&ou!K*4DqW|0Dwd-gT=EnRtVZ|w%9%j3xd4`ML+
zJiJM@rWY?Op&uC9W@99%=p*FT5yD{xNsH^C|IAy?OK(%v$|sa?ure!M1u1JZ)hIyE
z8i~|$i!KSf^FMoz5kD<#3kxd^e<`~XF~J~5-;(kfazYv);6_=UEKXdNV1Y7}L>Glb
z`0w7#)$Op6-UaJvT~M+M8{0xevNB*3x2yhhTV_70h5tu~2>1sg840^v=J#aK7gz4E
zNxl`5?#CkRe@~`c6G8)WpKdVNIxbcYwB|p>2B1yX#be^pp*zyzmQ2DY^{8xKEorbn
z>N~Q@q4oQ4)WFgQ6$T;0t3+NV2Yd?7WECMWT#4V~Em8?oTp3A7409f6$<
zQt%6yWT^`B+PU$W5&A7{Pu(|!VshtA~7m|6Vs&D*bhG|X5N5k*YFNpK3WihZ%i;JazOz@-e=JXu-i+}
zL>`;084GJC*L6aOn0-3|e&FVXmu_Z2(o=H5Ni%1AR^Xy<>>zo^rs#f|QBlKW{sDn!
zIL;7T=Sp)cQ)xzSy?h)08f++#CR827tmg1kd~2llOMS_>hWpb{Kl5d{GMFt0wUTqQ
z^Jy!tmUtj-oq-d^zm+vEwut#<^%YdLsf%79P~AJQX*%;HA7*GH%zmErV{aO69rj&d
zE||$WXf~R=_lT)9<+19b9|JilrqI%aQ2$qwm2`ZKK`r+zZ;lHTXVOD
zx{||aJPF@7J~t}#V%=lEqpDDWf(L-|bc4P1srUsa6H$8;-#hUGLD?wsI#a7n)~3kv
zjPXyrkCQL`!}Fe>k45gkc&gX?Xdj(}=go;Bm~N#mtGX+Jy7&ISpx*>QuT%C0i}zpZ
z!mzK~WN1NlE-LHvlK))J?_=rG=2keLY}+SGHt!YB_qr>jj~#?kgXGNmz8s>38j<7C
zl(hh!vB)B*{787P1B#E%;U_t|D5x=L!ha6M(UIFy0+-sXz4tCoY!fB|HA{RF{Ao*v
z|HXLjbjut&v_}&!W&Yh9v)+g-7^UJCnjqYtW>w+usUC{00Qma(g#rjqR}XpKHcPaU8AHTa{HlK4
zdAYucqiB2*U%ia59jdE$ByC*6uqv~W6vS|8252)}=hG_3xhV4beI;W=>TS={^ieu>o^(xZxt<4I(z_BcIm%vZcSa
zfC9!by@|+~s>7NUa@`qu@gJPyK%M*_J;^JhVVL5Mkb&R}qE0}sPj-ZXZW3Xpf#atr
z{$UmtG}f$DpOn>WR#AXGcBZdb2YaHp6Nfg~M!-d3(2;x>p&`EPZEn6Cxw*sMi~35e
zIY-=gdtIQ-ZcI&*EY2`y1qd?yhO6Db&aquH>Yp|zFA0rnLw&YzH6Z~0@ni=NcI8(w
z!0Fz5G>=^|Vq%wpuMjH83>M=JO6;U57ZI@JMv@WL*C$*E#WS@Zfg=EvW}a6)h=+%o
zrtbb2ACm&?G%-=hnfi>vN7v<&WIpbXDb0KpSaQFYv198=0LA~tsQyFDCqNv_`UFQN
zF7RXz^U4k$_UcIVhx;?SBG<@IfbWXCUX{k@$ze#<YpD%N^4Ea@P&=$YY
zMhohC>dm;awlB2b8qoXpSEVHG4d8yj9C?xR^QTN_nct*R`NZY=6JW|jBGlZCGFv2Au
zVI_34?PL?-!hG?)=OwHV)DwT~1p}g40rkk{+`6P+DV8ZzDo^JM8`gP=WjZLPf{Ugt
z7jim88eIR5`~Ji`?6PV<70n2loLGbGo0g5jgP0>ydo}YhfuMc)2
z?PRfZ%uMF21&`Y)F%ykJb{ad-hNu$<5_qpsA9Me*8?PsUe)HO5w)o+?t!+_@jVtzI
zNjjqiBjz01(-}SYXjLRGvn`&RgnYxH+%&J!{U__zu?*x7gBp*gZ#0!pVD&9&^x$4u
z%und;@=6v@T~XEBVQH%hsR4$#e)%HmzrJee
z3st7PNy%Rc{YkOOGcA3^wkL;u!AY-atCwNANBOg1p^Y=iLZbQAeWfc22R)c0=@?F@
zTklgJ)E~MPmg>a`DWL|8R3!%nGOcNoBJN6JIL{IUeIPc1-}#W|BQwfvPReLZF>|n=
zv4E^RmB_mSSvux2@x)-4pN4LfSE97bpU14=fn$4pfWA24;b6z}ndqNoXOPbAaj6`U
zybH1FI3)g~kH^2I(dXme>?)(ke8VFCf;NK27M#XW9j3(oDW#u-3BJ4q_Qmm!4OTVEWjW
zdS)&8>hwjQwRM@EWI&w4fRwLCah};d9ryXP9n~nh7
zu@YvvqWsz4OnS?}%1gBMbNo;eAv>gL?rGl=exQoH=XtUY4qt->DxF|UVeUXt`zYQC
z^HtnBvPYePt_T&8n<%N3#^A*#0I~tVkfR_d8^0_exlNkS9SA0hngl-M53*r{h!i|i
z?BV!&+>^x!?mTUiiT|F->Jt&!N!5nZT^9nd`?Z8YArwI`C^70|sQQDbtiPjlwyy&1
z(|~qThBndyXKZSN-q01E#&Wyg*aG(P^~M{>68}*)5m|Mkau!G7MKwBl=t?Xg;l08^
zGluY}2U*7y#Ewr*SW9@t{~w`LlKK+z_8!Q#RS*-sStFZ^DI76Dh=8Iy$Lgx8&=ukg
zq*VZ9krIXi=#i%dQ-bFp?&ODBIQE7Rg*SL8p6X-*`ot007f}?d!mStx)zEKWPX~lXj@d#LIJ|60!uMn>;4a
zHM4g|z9IT1`Q$1GF%K@2q%J!Z$@!=h@dqrXdfn*hzMm+~ujisIrJC-V;bmN7yRrCo
z|Ng&-IU0E<84v^;Gs36m+F=!&;${E{lZCc(iR)1DEy{_p)lIx#t1p0k(su+U14Ep1
zO<(tK-xWe4TO7MTtOYa30&w_86ug@|i)|3$9-iVa~ct{O@qzf2H;
z@YxmRl~wIXuy5deg3TI(Tq0MV*n(j+99*a@;5Fq!kO(U7#qW|RXVtXfdxuIPcVoQcw5YDSZW7g(Q^70L~Iw5RHa$qXmXP{?{a$S
zC=cj;*gDR@B&rpn0SfGv0;~#qvLTj9)U00>FoHeMKtDBRBy0|GgfZ5Es9EVp&i*7M
zRGf38DT2^h%07x$yOk#Y0F0X(uQ7#v}{+V@^M}$;cSRw~E#+!p6-a*kL>F6(FSjtD25IU6Fn}L!5M5
zieB`luAxs7p6f%}e6F%heDV@beB`_-CKB_Qy6kfuQb7*DeBt1B4>)IChGpk)x4p?q
zrbpZvS+79A62Ep3>JzKqrfy;;EuM#A^_W-k*PuoS4kjIxj0YxzHWMY3h+`*2%Asf>e0RLv$>o2k1th*j<40%e>h8yr9htn3Y#{`l^A^JXP
z`}%AG2BS^&nd@xe0MG0l&Hv14oM)Lwh0hE$8yq>DQ^%%K2HSu=@xMWa*4b>r1Gxrd
ziyZ~K0O3#FOv2K
z(&KZ-IY2+QVTuCjFyGeBu*TZ72m;wet-j<^v(6`6<`gBgLaBCe=E@bM^#wcn^joJB
zD`DZqueJ#^Y`UW`0sM*F=oKJ1=5>WSQ$OJU=QvbF$VUz~ZS9!#t!QRwHbbK;aI&!3
z%24DsTkF)$NJiY3h^YF*8%RJcaOK!`)YW--w!L8vhKGSn{@6$bA}n%-xam)&bB2jKmG$cW47XCu
zJ@ym>F8x0HC*WM92(uZ$pnBjS*?~H7dNNLGJhl))pAqhdMloikDzLlO#X=hotQS{j
zFnxNVBzzxS5m(L=FBQIvAy-P9{y{(#N_STWeG#&E_5hqa1p)Wv$<#^k$bR~%&a{AT
zAJ->Y(Z2{jPi=8MfwS!=n|rsG_MK!of1O4{IJu$lizQgCvN=xQKn7&MVx=Jm=Eq8x
z3Pq-Bp$u0K+2sh-sVDETOXLx4ROiT*&dfB{`a5_2gn9GF=MxYKIU$y{T!LUL4AQaG
zoyyjk=n51v7hPm2MrSxyOhJ2xw+t#|Ft8bj>?EM-7dRdHs)H!2iD?*4HHNdE!tkbi
z^!Uc5O-+07-;sDMa$gi5^iUQi>IfGMzEoH2RW
z$NkmXFqZBcUarYYOe^M@VJiJ3H-IWrHwds+u(%zud8JHB4hBPLY!yTR6^_|6=p?&~
z`G(e*1Lv`JEruLJVxt^u6o}fqMbupLew;}K#5&e}_4bDDmA*+)kMK)07*#fS`?iy^
zVRq#DOTl5=hoX|!8!mKPK|3IS0UHyXkL^AyuE*kfh7)wc4iv0aWAa&5fQn(6b_=JL
z#dYxUxM8H;AiwZ-DmHUKU&CP*x&>VO1eD7C63yieR{V1jNv4JDD_h6*ZT?u=p!47Q
zja-jja(yv=wI&<+f8C{ON71`-3_x=x_7t}E^|)rX3JNVrj&pli-{}3$XQSl(a9sFc
z+Ow}*FgjFp=?CZ5`_A*+y@7JOp2@sk7nLR<-^jkT)m#9W&zzv%varm@04$0IIdxg59
zId^9Sf?iue73($_1^k7{4F-CEVT!T^Ozx`gr`W6xQv=rFbe@lZ)h{ZkueSnb#lE$=FxYO
z+g)mYuhXY(li8(8@T*B$Gz`eIlg1+2DIvv1f!%ph%UXZNlf-@YXmuV0-}%|;!WjfB
zdz%+vdfiMg?))hU?}SS*yx9nBEw-H5B|Aokxbx;N2iSYn!O6Li&94kR`GoeGG}AeJ
zqofka<|x7d%|kjmxlp{If3(QPvQjpOS3NizXZ;4P^U9)CaACPQ_jYLFTpa{fs
zk>wFt#%C-b=c9>CCb0HOiBE}4O3nmZMZvT{H;YezV}!BLuH0aap(tYl3ns#~BQj;Z
zZwrOL>U42~A|oQ55H<}?rQ0g4wliBV&IN>$ZU%N2hQnj>LT`9N$EDCnwR&gY!HgzB
zKxAW&*Cxjvt3HumTLci9H8N+YaOaAL%BDIa0PNvW1Tbil1)?=#_7fgP77*tHmwr^EzsbWLwjUrg
z1x=Uvs{UWGd^;`!WOimY30^9gfsNL5EiLlv|HUKpMNQ_x4CE$fSgvbb
z-XI8CJ$KAjcSPUJr_1Ro6Fi;kX6ygp3(G4B(8&=sDe&3NUm({mso7A-*xEwy-Vb{3
zIk1U{Emozf|G9rgDl68lJ4N?TQ#YTZbjKx12&clCIQ*0d3oFB^?9YJXY985t=~r6&
z%^Gp-iCWLXN1*i54S`+CQTl#spS3HI;U*BE1PT(QT&DyQzs=tqjrI2D?&Z1X<-#YC
zmOi{SL{$_r18IhDc(|~9pgh0g?Nvo2V-Tr%i{}H$sV{ZYXylQMc3
z^cH2eo6T_gK6C6Il}j~+J=MR*9M>CV99LofXi$bwLHE!yfq4$h1sMbF7h`vX^4kdbo$<*
zlrTC2rxg_SAR%_~(~d<{6i@*kMR&3J$k{S^9(%(IV1jdbjx*7VqaI*-8tkvqBm*4I
zI-l|HWCnHHcULk&dmRUeO*{T1(z5MKGz>a|FvV9AM9c`}nDPg5*9?sreTqwPy=)kL
z$bFY*B!uId@b9nP_4C4;7Z7;euo$YezP?L0kjBBwizCDRB3DdRL`ZcB+(kmh*)}TDP}`Et|a<2qE7vF
zAwfdoXv?x#+7{G7L*YIV%+x=9VIqa16W|d*9Npo#CUNLNoze1;l8Gme#lpKb
zEf}BGp54BjMA4JLx%{B1^1oLUB}8UaCN=nHr7x{c1hco0#e@<^BY$FaW&km%OlG$T
z3+-WfF1{v*B~Ufhe3X9}8)>0-JY}U1-NBeM!;j2&dGR+_w1k2$y+MvNu;%8ryj4tb
zztQH3*yXap5wifovUc%Q;JUZ6NK}quhj<=S!mGsVX!MKLjX2)DPo?
zT3xV&r%&iqwo5fENljqF6o!Enwb|Uk^!(|MI6q>Eieh)-YE@=A>|Ldd
zI22cwUAUK>NYc#n^pdXW9A=@#2x@*BJXGX;G;
z@s!PHbqzIiT07Ck>b2mBoH6MWmtq7NAKG2Hn`^fg1t{N)p)Ax&4<&xO0SzoFA_zJnKv0q1-+n5W-NXIbn;kt~rB
zkbHjQDhKeNQlqP=>=foD7#cw2TFZ2&cT07LuT_(p6?CTBbeDsfb2vnJf}og1r(a&C
zM^$q=15?-@mZd748<01;`HK~V4UpZK`gK2{Uw&Wtzt;IfEWLxOc`#|VL5M~U7nTW@
zOXC@Ob>?C+b(c~@BSIheAxfwXm)n|c{A8fPm0E3}DpOqeBakAD@+-;Tx*7v=ElGTW
z+=K?sM!S_DPM>e4K@N7HN+$9Nu5seJ@UiC;tnF#$L?y?PF9xvzo3Oh3$$mjH%Bb&e
zFcONfCVQ|B8##0p(yUMZn(%G9ca>fBKmI(7{6t~ok)jvt!RV^9j3FeWB~9kzY`;=Q
z?%0Nz=W|o-7YQ68*(Nybt)&1$!n{ww3kQv!snI>XPl|bp&aT9k70Pj6>%#=OXCiC)
z&CDiYctOwT=z4_UoKTICSN&g|0*PU1;xjzi?Iix
zwH4EnkYSZ&=a5k`O$G)9cFg)pBpy*AKDK>Zs{lA7oDN0oEby
zg+)?UBxX`yP3|BjzqINb{1S2n{^Tb&=i3Pb2G%FBcsu-Hdo*F1TA1_hKPtw%25d124t;1yE#S1@aQeK1?P_}r&0Qlsc6qMTOgL^N?cxj-#j{$6ooJX>(7
zVIet7WJp5-#^$pS3HKiuv)6N90-S~Xxyy<}I@JlrdW&N%ED(h8aXO@f>;qDnztGPq
zzsd8Nt}>&K=^opmyc6Pq7nDeD_9A1-QhSSq?X6~ICV+;}U(r?_D+vuXxL3TmjojDD
zf|AW(aUv3Z7|a;6s^1nHKdK(#f^mxCxw%zaQrVSmB`~#imaPB3J}}us*NwvkW*-l|
z1C5U(_80<9<$DBn$(16rsw4vef?6>GwGr&N1<721^xz9-n9Ukgcm4u@5bY-X=hU3gOhK+!DJqu?9L%z_bh}d
zyu`UYl{ctC7xOrF#&79Oj2Al%Z?+X(r!MG+iAg;s2xATQFb+nD;N~b50d&GIUhs5I
zd)sv?aWCkJ-*0K^0L5B1WoaL+PveaX6HcF+)@b+uJVd3+%5;^`M~G_8!=C?s`?<3e
zC_`pk&RiqGRoA@ltw#xwm6+8r1qgWQ*gd*7d%;yz^C|RNFuSU|W2>~pV+`--#Bq_)
z0AIAO_MlE0GwEO~0rgH)#=`@vexNuRYcR^@w<}KaqKb9`P2B=jPr1tb+fQx=xjZLFooV|6|Vm7
zjKc9EAe31tZ)meYKujw*+68&s(@aMYgN^Cp8|5G@v
z(V1FCDs)m6mQi~TRJGvDmwCHF5sn9n^a&ZfG%T?GrT=vB?g8tSQ`EN27P>aa(`cB-
z-7jPJf$cgna-kEjJew^$=T)dxK4tLNCYwG19=q^Uw?7vDdu(AD(|sx!CzxLMwPV!=
z@M#-}$~Cnc)3z7=e-7QBaE|o?$N0J0RHq3K#P5270o5zNBh%@WWr1sM!5z@S+n+a+
zqUjmXl+*#5>yPRW@BDsCq8S%T3&XeY%x%#Vyp88v;u*zt93>bN=8@PywAtqEk7@lF
z0jF|~$<7CPQA<%qFSRL!=mXEe4&JB~2vNY3#k=L-vzaZLY}%ZMfzr4yc6Ig;8`9q$
zwDP}p`a0D*2u!cN#wGV*hx0tL8%>)Tik}U@KjoL*_Bb(|=iuu4*YZjRrWMB9Hbr=X
zvdvs4Jp3aQfz2QcC&~No8Sh`S8&A=|ij0R{P!;SXQ2zs9dA>giY%n(@ZFm&OCOxx(
zFPb=qn;Q=-dCh?#kiXN0)-=ob?SErWEErV#OEJw}lLO2E49;UaAl-}nAR_0JLS0=;
z)-Gx+LH=;^#**Auz<5ooOBnP)CzCnT^NdpQo14Nfk50K!NPH9a+`Nsi7bM?2pW7d;
zc{x)g>oj{zByA6XQ^P8pVRB{BMRw=8fq{RHEq57XrD85~dj#AmgdZx?zK?7m#Wx_B
zY-u*uSc%lnhZwvBB8`aoA^aORF_<^+?N^7F<|{!BeVheTA92ItsjMJoq^KlJw+oXP
z{E_%sQ6@qog7c%5Q^ye7?r>c`wosXNt4wz9e;j5f;jSl4LL(cf!VM@r(2&Y<<#Lt!
z*3fe+NY;3_O$YaG4={NFNH7IJXx_a}?d?oa1)IyidmGj>7UHtEFO4$|_%+8dWnP>|
z1iuLP7s>p@T1GzXt;*O}`7P_%Vfxz17v!wHnSH`8YljBFnRWkL522uN)*p=mlpFgv
zTVB)FBex5uYOzyACNR86IjWlTo=WvsIKqIY67b2dF;87IIP-l*Sw1i^+yLO>9tqzg
z@ZR44LW@W4NBh`5|17dlJWDm8((Wy|cwP6rjzy?w?DkQZ)Z+Szc|l_pn+S}N{b;!F
z+dLm1-p3Kkc+Cpb0E#oDW;1;vfr_efBwAuBmnKRpQrH^1L_WwfE*%A!G$GUO-Y3)p
zc42OJj2}q?@ryIBs6861`_s5%r_9u^ptS`qkPy%Q5<}l~v=papX5o=3s|R>q%)F4s
zY*R(}I2lR~4|*(>DE1{u(Tht4QS!F+L3QAKOmXCoeD%u{W^jf4z22Y_a@2*
zS!%(SP6a%GEN1`cvXWvTEHV~N-ss3ufiKYsldaq=7I*K-W!Pg0Y#w_}T@=jy@L5?}
zpV=+HV~}UgWxgxDU+Vkb6zdti?&RbZc}M3vCo?}buR-$mNK#4F=5l|+)~X1d0EsNf-xSEr
zs@m+%w#j|YE8pmmQdDTa%M55jW?1C|(&9f^*J>?`^}2tILo?v}ZLMCN^1WD@*b2V*
zt5&1Ll~~8RO8dN4+RuIMd}uH@suW;X6gsJo*4{KGbVAOLC;O?-4sFx6F3HH)72tU_
z&sgRFe3D`{T*NrBpmUH(F@>iG3Orx|B+C3@YisNgX$YIFGoF>SnmAe#k5D^53G)92
zawk3Oi?*WnJ#>lvt~(8MG0?}>I*WCO+RTv(FORs`tjm@@)<(j(?
z*>&hJ2g1l4N)7*x7k6voxxB6BYgn!=mdK4i+MsRqpz#u*o;$`<7lCRzzaJTgU
zRd#NO7za;uX66WYv2*58c+FfM((c}7&euzMu@EUBnM$5zOnCVGHEKu?9rJ{q#<@;$
z&=h@Piw^HF7O{4Br3wbs6Qo^nDDiM+!4!}~Q#1!=k4_7-`%DXjHLecFpEBeQ)-1R9
zF)zqN1ihoinD;zqN<7=jW6|t$g`cf=z
ze~D)o6bFJh3LzW_W(W^tXDOvi6kS#@G~Nl_7Al$*7a)82LHpa!jR
zwB-O+D6Q&;P9rTyPJ?#ag;-Wgh?&^_c;N{pzYMy@7dk34*-)Q!lkFia
zxp`tTh$6=r@f*{T3j5LlTSZX|Ox>8gTnbq1nan)j%dsdZ5#+i*8gbhzPXtWB_FQ@1
zE#(6#m>Ape5Z8vzjWoG?9FO}Pt`(dc~blER`Fjaf^)2EqpKONmhb(7Sa6b$S#||K
zVKv!vnpp2ucA^etyW3k1$Y=m3l)RRqULOR9!l^f?WGx-Mh2F^Mb1c~EP|<<7T`aHx
zVf+qb=d{7xHAwwDA)I>4$>ElAy$_Zme+{XSC0MrNkZO_VElTT`C~{Xk<|8ci)d}Vy
z{F^{E(qw_tGmvqGsFi$G*dR=jL~GCwdsiYlQrk_mIdO)y5qRurmk(VCz|FxJm;luU@
z{IYscoaAEiNuLOvdUK90kS<
zKD5j2)o3duGt!w{_frtw-bssObqn$Hzl$5j{w7rflhV;InA0wlUmE
z&!0_hgzvUAjCEedl~Eeb|O+4GXu}%Nw!;UvVS%Q;mv%P
zvq{l+A98#)`)M?Ch!p}WGS?P@?Vdk+dH}$vglt!k_W79-ShI0NvaP1G6@C6<4a=(OLy+E((q%d6h$%{~55X;G`Q
z^~~0^JSs67h1F_HWVejp^oXXW?^I{_bEc7eFOCT$3oB0$YEEVN7P9`OB#k~Eg+_-)
z=qI!#S=L20Ema=j3um?3KseYp{QBT`>*A1D-iF|Kz^ME{?$~_HyFPR=9JR-1-u;8;
zT`ItUB#h{_xVYQpwvZdKO-*0ezDl%hPm|EBN^vsA91gf|kzyV(BW(Jeotz-Dv4N$3
z7ml*8Pja|n?_{mHZmtT9K=V&9nd84c)(waBb6
zOO3BeOOC(2U2NU0QH^%`oTXR6D&a#uzoty#$yZHSv*;tmS}Y^xHfpHFz*cEp>&4wR
z1Ui|9qnQRBZ^r7joE$SDO0&0ZB?yuvg>lRUZ;rYR{uVZpfEGKkr=_NRMEIFNtMqu4
z#oAyOt*(M=ucd165sds&XQ4hfhNSP4s2x|Lt~x86POe1=t~sW;fY|pfQRAk}^KHJh
zQ(Q|m<$J8J0V5WExPVuW-@L;_Dv|tzk~>BuE1aeoI&=LMgA3WH;g(;1XrhuWEke*%
zb;f$-oskP!VWlqM`ps_2`V!CeDsHDveszkQ_EVGlwnL?LKLe-nxi#t|Qx#KS<$%-N
zx46qZcSc{K&A%2JORIysz~UpoyYqE5A5UR6NM%nr2#xdQ8b+N0{~jt=bq>Uj_Jx&k
zSCm1AcvId5QbkME(&SSXFw6M?*beb*GQ|)$kky;q2it5h3Q#{M4R0$`fEq`s2={?b
z(o{(5lCqg$C8$2guove^)NemnYc=bF2{KrdMIM$M4Kv~jLIb^L36tagX-Ag1s2B=;
zx8BtfrODZCzW==?zBS+_NUYiAvM-1Qeh8~X&25`HC1PHoIPZz75@)7H*oZD(Edo4u
z77-1{f{(<-SOC(1AvUvNp!yr#ekiCxgqp_t@l(sZ0%JN5m~5-BG~!0%9!~{s-{t|Y
zU-Edqfs%)d0j|sM9_fnI%UI!uT?)(
zQVOoMfG{N^C7=d;Pgr6RbqRJHyeGwcus71nMH8^K@2wSSd{A67QSJ)i7ni
zHK10sE@85Q`n1@v5;uVq8z*B1)C?Bj4FH|HvM__z*oq
zVs$VY4Nqrn-wyEGZS;CI6`Kvkf96OMecB4Pal6TbzFgn2MRMhlgKCn;JK5flU9izA
z)X1R%7>~-xw&HZj_?A`})kCX6U3`zhW4R?y-z_xXuX84MFUMoJa5(h8M7o6bXanO|
z&}q(MB+W)lK3jo_DHL6RI*t$6o(pDU13&V50Qe^R7yGoMQTYIH9(V*N1KnV>Lf0mG
zRJ`vkI{`M&wgXUDgfGt4fuftebjTm>sM^g;R9Abj=yG+JkpLLRbpqMx*)OBnl;`!7
zVre}|$o^yvhTZZ>c6(jWoOMb87NcN!kd2GTnF;2ci!_p3{Bs=KR-1c9R-7DrdF_tM
zE^k$)ku~L>b(~c<%AxZHtC%ObW_3yETsFiv&6tf89-4ZEvW}RmueY~{s{{#}2eWJe
zw+Ii+!;VP;HR3v+!RTsT81>00Q>O+yXw|UyH1?60zvCRU=3l^R$he6JoXEt{q?YKg
zp^=qmZ3e=$dTv~G^TCsL@PKlU8PH0zKq)huXNl~8k@g%2Ue=TDFny<~;2^dJPXLii
zJt~m|J1bspjUn#cBH=_w1?D~VFnGUc6TLq{dPjxgz#LVSW-&_%?G6prI*BVZLW1vo
z*=g4bkeuiS+Pw?~%m}bl1FOus)i^UbnL_P}n6l9!5QGz~L4uD&W_Z9qG=pP(yu4sp
zI(!;S{LeJ=wqvWID2v-I1^Tu2iQ}DtPa_pk
z;@}Q*djP{2UTJFWV)>ehWgT%Xr_*{rfm>_~WU#<|jn)o^SRqIehuLCh;TmDDN)v_C
zx$)+sbN{1&cpDoSMH|y*X*AG)lZBh1_7F?0lf<-;H)}jT7t3N9PrVq|OoV33a#vPSujQbEVMww%4P2EGY)4ve>;9y+AVi9$?__DKoC~>(F$#gj78RZx-P(?D)*3u=g>0w5Dohq^?#$*CziZas}05V)kkznq#$DFk%V8noF
zGeYRD;hkwz#UTHP8NzurF>XY-KG}%5hGYwv+>eOo5DUj4=>6Qq*ZP*sx?&K`_B>x&
z%fhsfSTWr0A^c`NKnh3KNy5QizGk$LmuNNfOrDkw8f-RwV(Is5U^jsEO86kW_>x(f
z{*?SoKJbK{D+thSj8jV+s=!7}Mt_LZ_S)JCfTzPH
z3|zjX0$b{rYyo%9LaHV41rlS7>!2XPRB0AfeX26dhx7sCBt;}wQcgdrwDl!D0Pqkk
zm&E*PM%Zn&BBF$WYPPP~DoZ?}6jIOZ!;Z4%z7|d15`&|}8R%m^=ssC^t)=8_EtMrE
z-3mg(xAQ`zi70-&F~wUvKhLFw3}m5lsLzZ;>8`8!=9k)wHC}bPxH!y5+kv1s0-t)x
zGk$?lug0~7<7$ZMRyeB%C~A~|Olih$xyqN(PR|7?%AqDIB4B(_LGzC;lFVDtvAOp}
zGHOLnmJ|_&c}LF~WGO1r83D!daPKIt2FXL23L>x~3V;>Xs(Uqj-=4l6
z)-%o`us4($z1;7cGW~KcI_I9m+=?1YLPDquW2_qaKEeiS5OX|Y@xB+jL3|S
z3x%HKn#d((gi|~I69$Rs4uv(6UChlgkL{1Ybh}f^gp;4R6*=k_!_-eR8oAnbX3xDq
z$`;NJcbocZ>Q>&Lid3B{`E-{Ev)K3Belv%_Q&*wH7<&_?gB+~aq`(U*F-)Fz+J8<=
zT5@O7gc*I_j-Fy6Gp$>x+~jL2?liJ%`)4J`zH;D=Rw)ZwTftMLTFjp?WAJGq|3ctBwUdb%FZ;F=W1KpwkRZ|-fd69f1y-~6IR8Td07K`u>QUeY}A
z=W1G+s1v5Llp!vO)E>b=bKa5etUgkYH)*OZk@{?i-gxF-RZ8jLC=a{gB?#-Wo>r&D
zPf_tiR%*pS2!gkU0C=Cs&+!OBVQlRIiGnPn}KJc9thd))!;mjCm|h`Yz!iJwQCbc
zb_y0DvhF*x<&wSmlRVDV_4qq%&^CcFbwK}BofaCGk1GK)?KPgO&_^MMojRLS5!85fe^GbHl^-MznUn`|vKbmJN(}}Aw?#(42!=kHha8H4i
z)})Y&M@-4i{EBe_s{2KrP6Y&(ihE)y#YwId1D++!!UKt_ak)t9)V23KMFyVTys4QHcHpijWg%Y
zZeA+HeoE7cQz-Xn!T;V8S@y|yi5YdvS}-=6Xo{O58pl5n&*^iMHzuIl4?PcPq8i4-
z+z3+e;ZVczV}20@w4QboCXc9#hgyV(bOApc_*<`dQ
zc1SFe{VMsOLZq;{i2QVnvJFA1uyM%%*`B%&P{3VVdhI*wxn}0@x7s4nEg959+h(;q
z3)su{84bqC;Fg8x9IT$uEQb~UiTa^*s1GxJXYD6;X7`o3lcPP?cmA*|mJWSV$tMSK
z=vZ=InbZ?KT6vdfYvs;)t1^emN%F>!ZP-1wDSyEKc&-BZcr-G7Et(Yg0&Z!Fzm&-y
z+5`m-1|I_yMnW}8X=#(S%w;neCQ*3WYg$BIIEngo(GXLg6%~oL?9CBCaghg~(=#Ww
zS!FJhzuIEGQA$jXH&&=xy|!K;z*{9_QPjDA`rllqMNdkj*aC^CU4H~3vAOmrTwu|9
znn7fC)%O|;o@#b0L4bV|96l2p&cAL}CK>q7ko=dR`sT%mHM2(GM3_9VgDBHd%UdQC
z-*r_Ju2cB53YSi7W!OBo#1183q75$9H_83h0=0?bc}tdARX8JUVWin6C@|4b7y)!<
zX^32V=Km2L?Ne8%o`6wW>^66wZsw3vjH|5StldxF$Xb~Bhwc2GI`V+7u5R461#6s{
zyELI#A91#S3;5+CRw{2u#eQ~e`2)cKS}>4Dl?l*_^WlvRfi;#g@|G}EMY
z{#fTrIMU^X#3Aj3v-v}IeaL7;=1u+_c%}MxnNqhk$dnqcK_BjAd}Od;uwvCl{}V^<
zcr#D5WmSxbG^A9a0#q)7RI1N^wnoI%cgA60#H3>3S_zQY3J1gvJ=B~~{9>p}af(L!
zK65{0JPcp9K($HHA_`8)^C*PT-SjWNmK(f8N~?yPE2Wg_kD2|W-(z_XDTCFy
z8{LgpHn}GB2@Ezuhz5Zc8@j(f@9(QqxG{|nTYlzoP$z*3!Udw+RPi>Uo6iLBb_EE-
z#>n^juP$DO8#f>1$$EhtaU6jCswJ_Fr-5YR6q$prDOx*dilDzU6-~g{kg7ycg5gWl
z}$(YWA8JOt}b#8RHJb;O8?~qa(8fAAt^JVxx8r9W^@=@LW>QwNCq#hV
zCPKp!Xvrqb6y?RAfWG4XFn^lJ1%J#56Q8<_P5_(8UcIj|!jT}>>m3Y2HY;4GS%L?N
zYBVb{>5_Mjhwt6>ez)J}+b*&p91I)G?>yo5_9Y>rl-Uh*JDhSRab`MG6gIg4_7jIC
zcr$JF&PiWn2FpVeXc24|N+Y;Q2ORM&!uvaDtjv0_P$k#ux<9vOka`0Ud(3gXe!RY3lg;L$~dgPcKs|5E%0#E2bBmG`QvrDqVRwm$NCqMV+CDxH{R>eKZ{lO_%lqn)Y6u?gAWHd
zYSQY-hsEcMN}kT`$;F;3R`OXY5dt?IN;q-9)a#|NV}xgkx;`LXy71a|?)SNo)=8M2
ze!5lRP~Ae?a7vus!SqZiqy1&MsXMRB>F-R>{h{{LUWnmz>f7Y0F+#7$n8?taJN%^-
z0{Uf=^4_0J7*{K!l@L20lgi4nva>?%HvK-rNN?5Pd=Yg7sQEG~6S*1zu>8SU320VS
zo|l*3bp&qPM+7Pcl(L>&KMYz_=!czTyAYfjC$x0k4NtPmPi)=eE%t~70a9Cv0w%K&
z(&MhD+Ka7+EZkt@4I8j!9v)gcwOa5j?|_{ROT6{8;f1gA-W}sBI_cw}E)6as!w$gY
zL>1?Nr>m9`m$E9YX?p>1Hki
zA_#IDdZ%K#3LT#&ohC3A5gt|5s4vZm?snlNJe
z7(6m%EA(}ONg+-*Pq4(^E00{0Ocrfyf1kfv2xIny{F$>w))6Qqb!vVTz-xkUfDF<3
z;{K{jfbDgORC!NG%2g`YWG_ml{gN$)p^KDy3>lU9go#w7V?2irx?5egk?Xgqc$ekm
z*_lp`^cUu69{S)Po^y8&WZXs0ORcWQkbGyzL40uvEZ~6jFgO*vbs9#?VDu@kwm
z<$OocsN#+Q$)1pB87L4vYVr3rt~&x&ab(PCMC7B?6%~g8LAz<&>eO&Fl^>jDN7qx|
zR%Gkq{!jfYvbfdKTHUf(HsYM;ZP*TXr@sA7fi{R8(`2YP1G>Du&-j({ltb=JF(U
z6NDyS-NU6kfXs|C)8(*QFUA*4$3Rgqlu4Jxda+PYY$n+A(I1mcO0=Pitg?{8u+gby
zc=MHFHRJwr*NdQ+=hO~@h&}*AY=J}_xf>rA-TRs}bu~^;RJi%cD)2l=PB~A+PC@*J
ztiSJK0rWtGN%c@Jab-4x{<3f0eWYpq0|nbRoM@M)^QAAVsIDQNl7J5<@fb4
zKyWJ9&950dkq>y%4O0)}TI~Q_aR$kSmBC+;4&%QW2hGp)PXl9FBSyxO%CvE~d>)ue
zXp4~N!AHvhUcN`9YHhYP13FX584VF`090j4Cu5E3LbeMLzs|*={7@+WEB1#LGt~tk
zq;_8%EN^FO2deJU2`pR0d;6YzZ|;_B!~T?=*_h&D`)O1deXmAl_iiMqg}ZjR|6{+J
z(e@!$6dcSnkU{xDfbx?i=J!em+qvOcAho=P`xOKtJ{E#}fUA8HJ18a%npQfQcv~$+N~5bvTY5RZJ=*OK-WgHl9v(h5Hf|iuuVxQx9uh3NdNP3IR@@AR
zwFxD2QC&xTBVpEz^Tk{?`=E3Ms2i2(2Mw`zn=st=tH}OgLVxHbFGlcf!`_=-GntlE
z=GPb#Eilv&jhP5)6YzW%)nCdYLRQ>(;C6~ixeK2$ZT0JY&svBOW4P{DP0R
zK!CVvDjJw5no;Aw-V0>)9-Z5i2+AR!qwGdY$hnSfz_4n_h-0&_o@$r12|*k2
zt4g0)I`Puk+~-9e(lkc#_JYV4YTHq$`{sipid!)3jTN@4(19NHe|W;E3_0n+exk%F
zDrd$7`P>hg
zu=@oRvnXPG;kU^r1b^bz_70I&r-(3iFJ)d6=a(3Q7w$LClde_%s`h^>Z5w{oi%4P2
zkk7o<-I(y*uASm52fy|3rs@9bxlc7zLDLYCbmdggMAxFg&;Gkg#2o!AvE#w&M}HkOi=g1OYgcn0CgwN?XD&AvW2OP_
zM1-{JwqMT|YPpiTda-2*j881=?T6`|Zg1(Qg6!77*_sT^ShkKf6_$_w*gu_oh1h)#Kk3LC`0}k&TQL%5UIkTp$
ztgJrS)h+o08SBT#WHA*rRjguM-<(UXV@E?N%L@-R^V@3|nEX^TnPSiM=9d@9jvImDBtt5t9r0?~!xuQBI(ngoyFk7)ZLa3rE?oLRnt1#e
z%Z3$Q(+j~e3fLj(xrBfXXqFTM0H~sMGr|nuaq2ijS3lYybVR=!_8@}q;R5A)WtM;w
za8`0n*vHV**v^wuaUU`_8--g!>IR1L%#(Sc7U0=UHR#)j@Pl@2PO_)V9MUJ1O15Qv
zaE}vke^stQx?fo{*B)(?H6^B+i7?mK+3*L~zbcw?P-Q+u|Gql^C5&{QOrBfc@lOA;!62zk?Hw
zt@K0!*)AQGj(@QpY#=EiOC|5Ih(c1T((a;rf9pjr`tJB=f-q4%s-UyuS4bLH3|iBT
zyZGrE
z{0xV)Og_)eFq6!%CuJ`%xDJ4$Xj4-ZCEWC!kPY4IE&^YW+v2zHG1U@=6PdKGPP7J%
zg|;|jg_GI;4o7#lN}OGRl#m6$yqr`o^z{tQ_SG5tcqJ0ACQhT&X&UwFmckub?q|m*
z%U*w>T}&!!ECIzfD;VIfOZQvY{DjE1X?XnB$NO;pAbuy;RQHiMZoea
zU3~>)dUSU)UfPywo~OB{5(*&{tIk#6ZhZ=Z%J&}@g%$(sdONaVH_l&AzhFf@zLL0i
z@yTUjfiWm@>luRx4tF5Pc41o{Ep0T?w@vLm%L(ze(SQiLS*AV64NXN^9&0|^!y4hQ
zi=kDhyi`WacK@r_Zg+d-Qt)a+GTvqS<7N%E}FrW9BNo)@Jo9C+TPx-ZBA)DKO^1lg?}=pM7S
zU%2(rJftw7RB8O)1;W=%Pto4B3%fN{R(GJp0W^e3%faVjtO>;*z&tPV3HRV>Vm&Jo
z3ZCb7M$7RV6?fxVdOEI&K#HI;kLz23?-@g?v}uYo^?+s`*U)(g>UzCTc-OGn{g4oS
z>mko}`|VG>8ip5)If|P|Xm*TNuOQPV&BSTLiJ>5iX4)w;9};3Nb}-uWml4})K0g0k
z9`ah^??f4;iQv141?#Redf*3tb*%0t>^@`R^C{g3EUbT~Q1u($7leRYAzI&fEG@Fm
zuz2{mP(G?MzxvK8wljgnJ6L|U5|UZGTzBdhv*;f#7y=ALcA}Vr-tRW
z+miYuWx?D?b4i??UW>VkqpMyze()S$>COX-2U^BzcUmkHmc(lG;J?qyx=bXGo?Zh&
zpi*^Zjw&POSlIz~CX*8bfBRJ8@^(Kc{s3(M5{Fuy=zuZ{Okq_2aIxXD=7~N%4(cR0
zAiH3tPQWSv*TAr0`|DvVzO=)e4yeD`jNc*TAi_M)NaMWORRy7l8@)vqUQg2@i7QM@L
z5FhD~sk2+)<^MlF2}a6d$Ju}0hWjRN6L*wy+O5?&*h=R?POfaPE<|d?W-8ufw-|qJ
z=e_YmrBNZ|nlIb>Ck3`VjCA-k-w9p
z&-;6hm*bD3c1yVh`inbNd%B|P{X{+h4C4aenRYcWy4=n)iM?#k1GQ4i*&AnN&~{86^btcmzdS4PJ6B{JyZl*-eAzoIe-@)M9BS45TYm
z>jY~{joTB(h@0GjfbublkZl9B4RBvLn3|+!W>UU}xmXEOnZp3$;|W_M4vC-mim8A)
z&4*7+lKoXCxdd0-Zt#A!oGf6f49Hs6^C0#`WZYTYI7l)(IY3und>hIXL-}ZVEx7dG
z(bNb&qo;qyOBI<9^i}rc?d+}AY5C6|9hIc;spDqkKa^}z1V4RQ)WPo(Wo!yMmxUHo$352Rm64yotr4xAlDo)Xtc1?-AS_In-n
zo)t#B$`qP&CF>^8Jjl^pe%8Pn%ApA$*$7gFgT$xnI&HBc1Zk6zs$@&DxCK)v`TxDW
z@?-&6545$VZ+(ZCodUyL7ej}hJ^meL1!c-h7UxK$K|fCOymE|emX}N-Z_$Lig#}$v
zvD0rP$|-U$X=Vx+09z?G&I(Z}A_Ek-d8Id7U2tfP9xATR_lA`HDQM1?S7F|ky4T-i
z*tbwJ!y!B)XdM>qw+7WZ605{C%MWfgymfDJa)r3!gxwV_Fv0uzNLVGQW~9%Na{OBb
zfbB2a-g1sv2)9;o)O{nlx|8mHK!6XlFLmc3%Uz4s{EB~_{36wW-7Fq=(~TtaN?4EM
zED+yfuOe8l&D0BB5as7kgYsQEt$U>9?x;fsy+O)Rg6^;ArtL%_15q%3qiJxj@+FpC
zk%yg&s$p@wzY&RJD>_5d$m>ZH^XpZ1&&Z~st^SYaY{{4Hevy8EP>@aI40)+s9%Ur^
z&yC;~c8+NtAttv?Y1OZI&z)ja&mj^C-Rq4$`2KwZrwhoTx$uztF6`QhVq$*(FT=L>
ze}IF9z@2ejF{z*ggMmZ4wzX}=l7Y2*S6Gi9dm)}2C*BqGx42J?hT0PDx^r10U)a_Q
zZid$bJq%`%vJzm`T6HC<%<^-SOh=JCag}X{Zm*@+;GjwEExNh255(6Q+gs1V@G>0#
z2dlE?V`?3AtD8oVA0P@ya64WR&WH)^R3Z1fp6cWD_aq
z_AiI{@G#ouH%UIVT7c6u2M-K@MV{wWTc
z_75dP+q1Roj^(1c_AqaivavHmSYKf#(8mnkuqyA%goqmpe>9OUd~PGiqO#44Di6v^
z2byTpB_cHs5^`UrNIF#!Qm#*lY?sRcdI-M=&)4!=y$oju=CIeY0O*{6C17MQO>{m=
za+r}%Hw5MB!iNyuq$1vUkec{Yt#ZbP^$)`(_tA|sCZ6hgFuiog=#qWx=r}1Z^7tvc
z<)w-IWgD^iOBw_5J#OS#B1&i)1PhW_7euZ!!3Ah05H~iyoSZj)`2tr2WDwsrg&isF
z{c>~M)y!svwzv`b(zYC=ANByPu-oSiqGaGl?~q3Vr#3GVrG#mDK3B*@?;7O&O?Diz
zxWxj_LjnBN?h}ini!t|H&p=MO#X$TlZBx4Mv6F9)qC~lvyO0TDfLO)1h+225luYM
z70;j-M4U}9nhZT%--Q)p1ND(a*#kFNfH?YE_o7zV-sXhi^ps{0a=4d7y6au*kETlA
zX?k}_|K+)$P{sEnOEKviONk!MD99%Zz(@lkRYu&Iy`m5fh4c&BJNYUE+OGk6!&^5I
z)Se)w%=$i|QS`3$cWg_BIn?Ee@P_d+rij|7erIv#;|0F=VsuI@QH@{fMM=5q2ChFy
zRbJ%OS_>GlR)f5x*=HjorZ-$xy
zKxDTPGziziMHTGL-|(KQMoye`3FtNj-IF;V>puR$DRLGcp(z@@bnS#X@QX~|MCDQA
zV6V$h2*;mrn<#Jbx^Bx54D3o^PsR>^zjE3|B=htBRHQqNZ0Iq~NVMjbw1?2I#gl1Wc-Zp$}xZzKE@^VzY9C=sCW}Mm*2P)^3mw1}Q}yTpa^B4YKEvV0#|u_^-uz
z!wwsa4yBjeJ8H;~1@6Q3dmpf&AuznI{oP7=PLb{-W
zD_mX*f>?M*K=Mm%MF1-lDFz}*Uf%LpYH#fR5W^q*%Udb|k@t6IkYhdABosMR2x67-
z^`GP%Wr#c~feOGWNfc~|0gYg#^B`i)M-N|>Va?<}bTS^du~2G7ir=_;f(QbzEL9LE
zHfAQ_s)CN5xi>nB?=0u-;FW7w%mQ?)%AR-E{HErb
zyd#jE@xXv2S&Ah#Lq=QWhSJPJh)}eO2l3QbPS`5ghvoSa&BjzDhsaoi0;=dceD;4a
zL!8iw%NX8#em6X0Lc6T76`8_kbgzrEA$9&eh~MQlj|5XqSqU>)?+N{6800
zH@l+FK5R@$6-wHw%2(NDJ_Z`&&-G151JWI58tdmEHnR=B?9ds=TVT66+0<`#x`A?8
zo*cTB4l&X?(e`M|<;?jDO=rG-Ng!As-8Gen?`#MgYxU}z=HsL@GV(T~ZxrtxiJQ|q
z#ay>jM%{7@xUMeTCpb&>%CiVhT#~$=U@fs^Y1yBHE`0AwZZuX0OuU|VHBxN6W_=6Rs|p{hD}nsKF#SLr{Ulb;l8|
zvYus_CU+YCv_}GHN_4-WhF9-CTBg%J%#z*R0b%-xLv1`&t2o+=5Iu#X`)=D#*fx5)
z`wpA{s?d#aH#@I>>nkkUG{4?i%Slv2DAl`U3k(TSSGo8f(-}kE?iCHI)Bu_0e3Q6;
zlD~Myvi|M7wbO==z5S__V|BWYv4FgbYm03fMA;n$laAjhN+96$1c{&3AhtgJhSS07
zN53Gg4e8wNT_TZxW>5&KzYQZZ3MJg=ajZAayaAMCtOUNpBQKCAr!An
zF>Q5+>$L?@bH54H3(p2bLX{Re_pmc3kGOSjMdHeciH`8GRNzNODG0I<#
zbdkp~wQjPPNMdNl+2eN?#5m;>m?$+^qHYbqx|y6Kg&r|Fe-p~5LnOOz^f4!9UnoEe
zATUpig5XZv<5$dUWd<~KOx3TAqH-HIy|q<`SvWC((;_g#wAcHa3i3I|CNFYx>=A#{
zx}%BNE&0*@=!^o9x)=nTFiD?8lhU&%XPjqr!+hODfgK$@$dlqqb>&dk7n
z>2?pVvLH%$1|OeX#!|Q+Ul)QdU<8Oq4r|fDs)6??8aybxooH~}=e8s}I$S!POea*o
zAeTIz0x+l?;_$)VJffj7L3A;BlwKG_@z!dC&KojWMG_y
zAdOp
z8#$f9Ye{=uHQKNgXgd4h@O#sy0RuNL@1|Z3UumsYEf~hTmN9-hIRB->1=|*#8BFW>dA9ELrsHumG&*!0Gj91I7@pk8ZtRB|EZrPtqu2if
zGQ_t;??^3~O;$MLg27)#6%&g7t>|<$PmBCq)9zLDx24_hQpr}Uas2={%$-Fw@gKVo
z;jBmD{$RbT63vr@Bb^$Lmz^~!QUc%AT(q+KGD>>z>4^pIlR-Iecp4>%i&GG7{EsPe
zu#-yP$N8_uL8;(R93~E2Cs?9k%Z#9rrZkc9vrn;B^wJoGKZ?o2
z#Gt8RVLl|W9oNI0kkjuwS0ns9;st9az(K|r~KWDK$LWDw43BS{HpYA&ba
z;gdrbS*#D**GNMwB5*D|>UhPb<)P4QWag;02#CCJX?$0|D>4(t*PhZEa0nc1(4mK`
z@2gU1=Rai6;97cSu<_>36joKo@fB
zFQSjoaQ7*U?JqOVOP2TdUav7v=xBh?Qk{^fE&w~9_nj_OaD`uhTekr_Y$hA7)K79|
zhJsjXMU?&!qIqBo)&Rw$QWZ<*y-gWQpOZhgat9xCc~zB6B_)}1WiHRC3qg(Lkz
zHP15&$JFohoQni(-VI6bp{5Wyi(=RA!N|ewdp?X$e2VaCJ;*%WF+%J^tfhkQ;sCFu
zrFJN1UM#tQl}2arhKx7cqxoTUBf+E|!4bAZkxYl_3Lly+|Uie2?5<5y1nsLn5v4XCDJTL1J-a>o`o`
z?s-EnJjc2TW!DiNBCWwMfSiW{oeLz-GXz&Kyiy&wnrE<6NRuxB@!~Et$mU12w%aoj
z?#Cj3V=inGx24ms3trxlZc`t1xaAI=z98IQvAYT6MB^w;6TC-Np4}#i+YOCZvpV;N
ze|Uzwq?`x<_b5DY4;i#Vi*+g<@^@Q8rL3yWGPJKZ9eHD5W+G0)xtArLL-zxw7?(g@
zw8Dts))JHt@hqYo$2z$aVnf5!PxWxZAg#AjK^OK8gdX+dX;GgVQdR6pn9AyTaZ;uY
zC5pMaJHE8itT5vW<1^`_)98E6F;L@Hqom>ev{ZT&7
zd5Xu*)LeI{!*a#ZV4OPfHlptRprk~QR@s|-+
znB9MlA~+!f2e_59AX~K9Pl#PD76S$hS-ge*=$#7^4dqPG<2HOwdTG$eb6g%S-oKKu
zNU7aDO9;1ftt>2Pl>CTQ;&e1-JRvht3)(Z4vW!G@V#?qNcyM7t)E|huw*m%y#}u^b
zTq`Z`&}Rf8vFQU~F)OvGtTgr>3ZCl|hqI?5mI}}(dC-7LEM#&*DRbk)m8{r``aIUl
zID(!H_0TP}Aqp)bwbDGCW5^dse+Ej`?ie-Nxq8iDcjdU=A3YKAF8vQ^qwwd0a&DuX
zGO-d{seIyTN_ZZSxM4Qd|A#Zp$xnyykr;}D+mpZ4YhWpI4+@F;B3Roir)OumnC(BH
z4uds}UK5L~&&1Y8oh6PiYx7_e>CbWu*dwj2}}hcwuorA(f2PiSZ73~
zs74d2Mmf(La?WGaPWx+Xm`z7*&kFg%)rnZPJz?gZ^0ITy03`v)5tYdiB0w8R;lrG-
z#pA}MBgAqbm(*k`B@^d^&iQ2ug)>>}HZ$@N9@?dyqB9|2)dSAQzgafYJ63IojHq(J
zj8EF8rp98GQDI3kcWFC>SI}419d_8#Be;olD6wz^hWck+FsHUgLb`@{$@E+z(7fbL
zTm80K1qk{g?-6**?G(&l4E{&wCy?7NptCxzT-5KNI8X?1s}QFzbVoEzr8+b)+*O4xTM
ztT|Bi2`v!;r8gR{Tx1>3k#Ikx*KCoY;Xk@}P+%x*pyU21F^beI(G7LjaJ$e(!;sbZ
z`8s8QsH1Oyjfa%Xs;C#wwyG27uZCYSl7|Y!K`c$BD6{m8g%kgHvnb
zrcIVLcb}%fqG3}^bVX!QJMrq>A&d!5ji3IV!g)+|{xvOj6ix^!kt&1&zUTU^BzH~*
z?bwGqMr_-xCfbH%{JG&){B-OmIk=GWlpAqC8gg?6|EGg?V1v{EBQR5~lyc2zKY_4w
z$TyeZSWb5bA2pJeB3GH_pOrMo*>f3l8hV;3Eo@zs`akN!I;38ebn`Clv+5A6ye$xf
zFDhsLri(Lh0!LnK<$u-LA>ql_6Yywv>|)hP_W4!?b*-y87b_XvQ7jE|f{A2|2Uf~@
z+L(nJ{l1iF_e}#N;e%0LcE*0F#nqZAw3ug6?%oUj;7TIq@65^9wnZGA(vl5M$^MdP
zxc24lmX)yCv6=tshz0TE5E^cPXJC0G=W7kIczJI)8HFS@<;muTf$#q>BnTZHWV|y1
zI1Rf)YYA3<$P+LstH1dYvcevcG?gBieRy0~RsYi@GhT836SmL0cZ>=Dc}`4Bw!wevRQrZH4GgxC7*c?q5m$1LV|Tk2tEQZ%N{)%ITq21
zjg&!IWk3FF7`_=DZiCwvr`)>f8N7%3AyfyHry*B8hkcTk|I^whe=#**n}jWxv2SKyA!y@N!|4$2kXQ%+H4gwkLnxR9B?}@!K2*3Tc+VNaqK2EEydhoc5>;z8G
zh~SHEZKP;%8hqR>O4YgIr|wIzmFIZq?U3dhe=k$1ZB)5F8k8NDa%nsC|BV4$mvLi0
z3}-c|Hq&HHrle1C^hhVcQ5G-kJ;;+Z35&7nr&)%laa`N&5~jS6AipT)Y|9lTmyM&SMN
z(Wn#kSR|^usV{Kma22@4t5wvKjVJh$O$lBj`5{Gg=$pN%lo^t>l#&%DlE6aBZ$Z9a
zA)VbU3iz{8+2v&o0K>%+dhw#K-+E_^iHt4ziN(PZ>OslRysc6ZCmd2dOv{c7$qo@Sz{^uHdXjs
zVG{>bZ|VD^%7{45%Vr??B#HI>7P{f^61o4gpCp{go1Jj8Qc|on3ev`Lk<*X%gb=%?
z_JvF@_1ytstUeGa)_gmdo1i5j#96K851(AJS0mwbyulBO>$>no-ORui08v1$zk~Ny
zOr9uy=B6G;|Not&Xdn*`nftY~6OPh9y64z2w95=r=Jk5%5-T?zSPNt#+2Hz!W%f5Sf|X^4godM9aTRbjAS}FJXbkz~!8q)<
zT?3-j@-EEYM*T`;**`QMI)f0YhyK0Dlqx8JTRez%NwHS}b*LAo6
zq2i|}5oKQx5fMRc(k^vMC*$|dL0^Lh$;Wz_YSQ`{Jci8`q7|dkeLf7n&`nL&Fy<(5
zW&cB(GPryj`Tv`J3?y-|KOOYHQn|TNcDW&GiSm&o@s9Bu_)Wc!-I0w6UppEqF3;R
z(wF#iCh!@ULR?Zs_BeC6q=1Mmky-#0%MA7-At<{rR2bhl#rWR4urN)~(l$bafMdna
zKx2cGji>+u$FQ+vh6DvEDthg0tY@|Z-r{=Du(kG7J&hwSFE(4NIb)QQ`rKD
zm7*9ToG=k+{+Ai2Mz}8_Iuqs}ZHIjVv1v|@|46GC)WGI9^`#>8t2I0SW%zAzDcq_a
zIb84C5nW$4H?>?mnX#wZMn}gC@yyvjogQPRR}k}aVPp3tUj?rKV%en?VQxm%SEQD#dY%s@w8*a8v|ud(<^g9G8h(+Dm~Yok$wRc!{;=MPO~ua5xTDb1
z8%Pq`28$nn?i*wff3(^X2Bg0RFYT>A;1z4TO1dfOKDc2=QF0
z7lDRAe#i;lH^d1*kHUOy9j^5Sr1$xf;!rX1vTQtpNJWE|SsLJ(^tNK(xlk
zbi}{2{-1bdA6ufL=2|DoadPJbt+zI|m!sN7s?RKBvAFt$-#h73!91-Rb<><4p?)9(
zi~yMezF&=a7|1Iz+|zZ2YL5|EHuQ8k|MFLll>r+}k1;pfcX{!(V
zh$~O^8mt=VK8U<$EUQkikKCj8O7)@siqy!?>~}2q@7`bm3ruF3F0A>eql_1oY#mj{
z6DS|==yFze3drY$cld9{*S!&Ea@0+J&C+)pzY5(Zc+wXt=6fq*(;2UNZNnG9@w&$K
zDN+vPaFS47_CjRSH=Wf6Px$)(SP<0Kh*7iT?>#UkcJ9w;l2aih!Gb{xPVGlIujusn
z&vD`yka7-Sh!X9(3_oOEt%Kfk*x48AQOmog4ge#q2kyjJxYV>&T<@ESYGF*{gup^K
z68!B;aDfk7giPx;U^Tbaou6~SF_Dkn_gm-rfI7UFu+SXjS}c=maa1Bz}
zt0m^LD7G&KFMwg;H!y0_MX*eq|3qZa1v@Fh
zHl+P!87mUp-2?7&5vG<^K3>M2
zL9d4Ab7%**IM*i91Yvz7SMGj{Fq9Pb`0Z$QEB--
zbNq+YWlsj%WP-*CUV;IJ8n&pbIYQdKfWH-R;gA5Zi5MHfw-N>8kr_cw4r6Mpvxn&U
zpsX40rz>QRvNK(?P+mqTS64o|)1ZNu3@X78bH3E?5lOGPE;bkSFab$M1?NO$`;t=;FyN5CB12i!W
zlzr&Uh3k-$l5*}mq(b~8!#L+V;QFf8ID{TT&!vh%zD!fL5=u*Ru&UD<5qg$ZACGxv
zHe>cO`geD4{tJ!Z0hvPt-<=R;;^I!Av$b3iqgog3CmxTPS2qlx<@0m|fW<^Cj~|jo
z3z`Kh1wt~8wh-Z!B}jwol8IE*u{^Zgk@LN$xv!Xr$H$i`_CB8f4gs=JZssqwPne(a
zGk|7f`Q0PB0sBEk1WxYDgqV<7oD3>c7<0(?AVu+v5jDYI*aVo|*TutmTthOU`6N{f
ztZ;+sDHovt3to#haUv?r=7sEm%OleiS1Wh^XLs@FP_E=Sz9I%^PE#7-LEmn*#xkR)
z-)S4e~0ylHIw9rj6t
zz7Jq=_PXgDAFuvNO^NFV4r#RsVAEUqom@tl#jxA&e-$5Q
z3o3w~Kx}hfuCaSvgulV%E#-N44DPH*INj6BkXmG*uiTnlBIr%R&;`Qr!j&
zE10+PN8fIWy8YNLpwg`ZP9v_0;WI0<%PCyDTD)`l-E$R~--!z%m)dw;j^2Ekq*WNd
z;GnJczjW+PKQFrHfclkrdah`QZF-OXv#l;TmgkTU?qAf9gkRInAYxX=#y@teA&YDG
zV>H&Rb==tSFhSziwjFQBfPv}$4X}~}ue4U((mftfIz7NJS&!gJmwCLc;AKi^7o&cXTGq+DOvYayp@k_09N0rx2cvZX5)FK%R
zGSCGbJBKly{lOHxy|h0ARR^Fc`0_TN%-o#VW*lWxwn1Y$fw|Kr{<(Z0GGGhrYXY!;
zLR&D1H`yYpeVz7Zrs_9bY7lpfQK}_6f4Z8<+Z;Wwiq(LKy2o1hN#}p&z{~-!a@jC?
zC@vGyZJMOlQAcWOekjvIh9ldiP^&|P(MIi0soxL4uP@YV+eFO9??Z-9kYoGnkbXVB
zNhuaYW}c-H*2Fq{hmNKz%OaAS^~1Q!k)#Tc+q{mO;LFKS5;W;oj#9dY2iKFu!6-nX
z$SGyp3H>c~;>e70X$e)dM~$~OAv3w6!^9&*V+Q4bBq3Qrvz0WEB2fbLiOqxPf;&OD
zFy)C7B#^unYkZyT{H2~^j72oneR?ydLMcQd9%NOLkl<0)@@0^
zTO8A%c+0nl>>FCM^?A9y$0h1(YiRt@T^0N^kw3#$q>&*a;PmT|TgtcZ0?#mkceUYq
zy0457#-gpgiT^S~LU3EhpKt}Z`S6CuZ&XrEX^O{rt&P<;HuI#QCsAsMWq?G~Od5gB
zbKVbTJw6vN-|NJ^U%$OK2}7Pv0(=>za|XglA-|D%}51`d-(#sx%Ksjxr$+btC4~
z!b-oaS9cRL(Vm7uXO3R;h-R2L@W6M3^ZH(AIS$BlxyG{oM->0~PJrDGkZ?txdB%{2
z#1B*C?T;-5TMY;%ajZW@ngnIf0M*5h;u;%n+T0!%O7r=eKDSaGK_!xgO|DGVl?l8j
zXA%lQFHBWwo_WMR`1RCH$&6G10H&EmjG5_bZuw+>rv_L{otATsGQAS
zZlE1igRMOZ2>Mzr#^lPliy=P?Vd-haRW0YCA9e>t$iMcZR5$%Z6ZF;qUq4h_&|jN?
zoU$Es8`H|aO;3a(OOg&eQ9F~BeBfRmpF9S4K$kj228WjXX!CK=1m?Wb5Y1>T+3vtR
z3~uj|on^hA7-^W&MQ@*&rp8M(K6U*Aw-)!qzr~&$3asibHm&-W8!!;~2hcX65eq7#
zz=t}DH+HXQVawTmng^>S{z{zQNawtllCbQIvZ485St==77&X%T`Mikm$h0sz99j{1
zGMXtnGXFj!5b0veK!#^;op
zWEvz#Pwc0Knf;|xIFpUV{Mn0Rv_;-yPA<^cvcWb99QS}|%XqtTf
zN!)KhfzOb=W%}?cg3*7yyoEELhQ&}u-9rlPwPY8LVE!yRvMvM$n(K6<;YHyj(NJOz2uF=
z+SC?4CP0F%Go9HYl|-aWRUyy^9%h1Y76|9|h(Do79XCN;kUPF{TXdgR$y-5DLi|pi
z35L=?5zv4hK@9=?G{_zm!He|tfI1~VW+o?|n$ciUAI*-(1HD{!K`QX`LT4P&?8LF4
za=K6J8)Ek`CW;OOMg}T^#-5mHwpPlWX-a$NzAs`h-mr@0cB6f+e8(~v6{Wa_s1o3N
z*a2-h@NpQ|p8$i|20_3qs!u;BzlVzy6~AMnxocRO$V?M^?NM!V?X(v+bO!?xr9U{J
zELF*{;-V@)$`cJ{O;>do3np@Q97eJEj2v8=%_H=U_6xq3PW~97+gspjxS+7P0%_4f{*r^FwKQy2g>@1UXQUv1k;
zso$)Fp%_}3^3RaoY?*>3@u(FTo_L}ACczRZnv?yobg1T2H(;}sm7c!c#6=84iO%XF
zJ(v*idIfZY41}hFq5)?=-yLW@3Nj{FI4M+WFQ0e}CH>o@$xs7ua9Gp&Mxm{9*NPay
z`?kF5y2G1LRNkdB%_2hB94*LXU$MWGie|rp78x$n>0s}WLrZ?5n3e9
zjHA4JZ$Lj>ofn^wAc}xKP$)VNYHgd6ib&!wN6t_Qp9hGsl8W2xwIrYItr*=}N7z|D
z4Gv9J@x(x_BYTleDIR=eS^dF|afqHckKHhDRNWYrn2)RLYjuAslze`qbTKskMVmT_
z0Uk}g4I4Y~0Id-Xn#vHfO1CoY|8r%^c8#5tGRunB6z>VHnF9KLa_w%E0t-{q*8PI)
zJY6S<%Y(%2=8ugrY?Cii&vhiRC1zP5apT0_j4(#Z!*^v8&dmQ_IW#kS^?g`7#VCah
zw3cWh84`K^lwH)1&huaag6#u-i@C45VC}utt6XFYFtC+_d8O}Y6Qd~Sv{?;lBNlOg
zm!+QXXQvGVql(uI)>dc5{ZbBouG9COWS*~@T!UOxmI~SEOK43k2hx>St{VT}EOR?p
z`9#u3RmbbpUA?EfUa$at74r6hfRc!zWRL_6+m5i1JfRO_M$t(pDF@!V$;%Jv^2j)?
zSOrzP)VV^A5T?9jybm`&EL;%Pec|d2sC=au0o42`myag~J$#0DbxPmkG{tUA&7Z>B
zs>D1L6+1|#wCpi18yd0~dVhj8eLFgXcLe%5eyYs?^pcsXG#m!vUdf|ClJ`~Wj;v*|
zKQQkv%c@QO%YwRDm$k(I_gpzCGkthVlWWlC!&K2tfkLd<$eRfC*qXWk*C5?dgk)oG
zdkHXzkTeybEui)VBCMhbYG<3PihR-vi@_~j>lYiTVH=7`k_%jp^+uGMrV@ejdt8s_
z(o#dIe4o)7<58=yJD(+lFT)9ukGy2irZQ+B4%URJP)dDwZ_0_jPBJ?!2`7US3c&{h
zzM+Egl-{j-nx(?AoL65=8rN}D(kjHBZ4g?SS*t;;)B{P$0*|o@N-8td@v?ESFEk5^
z$;srnZ*MnX(bT#GAkMFlLV_dZR|INQ@ikv}&|~!hb(Na7!QAw9u^fLH0_2@$fz&rY
zJsOtEb<>1)NT(?wO>&Faep{BGLe!a?k(w=EM6G@*5|}_eREPd&eW~?fzi2zOMf>M@
zS7@^_-50zpYFDys+J!*iRaFUnH?Ap7Eu#N-ME>Ev#zH-Zf_^?NKzME-R
zwAW`+QWXRz!(y58Tg?}D-pauz%n8FV!v1cqEPK}tdVDSp1AsL$cLu-*DpU55Lhv9Y
zqyaGzIB%Ol;f^X{nB+m^3@GFMQ?0P8*M%^^{!0-S19Cx?@cr|ZaiqXhiG|J25Amzt
znKSX-zj4I1OoXTv5l3Rf^-di+WbdAuqX^37r?f>-Z&Mr+{IP~tN{dp)F7`pqI)I$A
zgOwx6uPUY$501{?6sr!Jt37FuMOE}0Qw4q+N>uhpx{zTep5DdK6k(ALGKhX+FKp6L
zx!lwZaVzm_Kv7X}pV_dmtrt*U+Ol@zeZW8@MC
z6NJ0n|*aPO0-R?EUtcldJc0}8*L35QI70GOp@|W
z8L#9p?wnK$6Xrj#LYN&BeRadgXy0M=*L`G@S7E|oF>!^kpEdW=8wP7KPsM$=q<)K(
zEZWKIe4}wvF9MSmF-iJnzX}i-LNxeR3Q5+Qd#DY2GYB!`&0ODk#8VD9HJ5D;tmG*OrIiEdoeB*yLiXFY-gW7-B^1t1+mq
z@gEa@1<+n7`}VxD3whU$fr?2MOO7e{bWi?uCRrKE+yNBWlPkWspD@Kfo_GH5eTiyN
zbt|*f=G^N}5SbF|jc+Z95yR$}fQdoamk(2EST6_85A=G5<%4N#o21-GYAvKb7|IrS
z9Dg?@J_CbUIkNN0!b+X@j>WmIp`2;mzcKDZ-JltFZR4u^ox&5`O0ddWq+LE
z@0T=&sq%1KoKLj@EO)(^dL(4t%2>CqM-Al9jG4qI>DJ)BL)A=2|Q+
zpr8{p5G;@~eW4+~h^{_ur`>nYy_N%fknotFsot|jMNP2EU3KdUU~m9|7`&e2B?)bK
z>RY*g#gw=P>27`h7Ak#r^5tfZLe3Vhl&I&!*4lwesAXZET6@I8nMiheb0
z`4Wotk>VPzZNcMg%51BLN}A3F)Ec~gu^EWtpctK1RSC@P9T@6(N~?%C3owL?D6Q_O
z)q*$G#D5si)1-Li2trqjJ|VCx#^kinj>h{AvBh5GTyKGQ`7+1`V;;@x!H$~pgQ?zjj7b3&iIsu%P&aS!gJUj_ybhr+h_(t0W95I@
z4@NkYt8)B7l)}U!fCX)$wb8mmxA(>O7Y2-2(6c-r4EXx52}$pld?-D*SwVa&an!&<
zN6*g~_(R_X8UfdRsWHHI@Iz`=<=%vv>Hu{~DtRQ-eu4o(t!%=Xz=FQqdqCobb8wKf
zaGuuKOp#4?rKQuO+ct}Vd(3jgE>0QH3glV)eio~ik_i|o`nlzm1zjys%jDsP+@aZD
zki>j{48@+@=o_6ZWQ?Dz^AMn5NX!de4!~4X}us0-U+ed}?iJb(x!;5cW8iN)~9^fJE5(~iwVbAbZY)Kh!awv-rSNtUC
zDro82%o?u3F-Bx0Vqco(<6`Q3?4vX(H1Q;Y!2hAn&uZmdh@=&5MPp&4
zYysPVU|{y@7S*)p-%VLLizO2b*D#`gO&Q72^+B9ctS1H0TA;n=9|is?(sR|V(InWh=3xxCtwNY
zOWI({dy^I@mIm8EM>KSGO~tq
zPGI<(Rzw&=fw8us8GsgK?y|7_ZOkYIg%0sPm($rGVjgnj$|iuxE(VIV_X
zK!#HfQ~dR5a@l-GoXhUXkn`0{Md_Yyx~Od16G^m`!OBC?pw#^Ss~*t7k>^@C>YWYK
z)niu<9JUQag#}q7*6{6|C1op`zmL4co&R&p{8Ww@tAb(*^@sSgkcf|jr!%gSiy;Dc
zo5EaMBO>(F5#)%<1u0-&(|3-QG6hTY@J@{vkZTr&l%8t)#dknAb^m!gYav*1B(eTY
zDC;P@^2`X0C!@K*M~(%@6D%(*FT!=XvUGx@8LoHSe_%4+5^`-@w{oaypbfE*f99P#27>-9yW+
z0LIkpv{XuVI|+SibkjZ6r{xLlzE!z{AXw_&CJoYlV&o=(59EqVRfSq~dTTF&RS}f-
z=!BsJ16^ftKgBof)=S!Scp^o7Fc(+LZb>p{gd&O8YCIr<)P9
zhnBtI`!*%mTn8;-P}S&a4r_@)MFj8`JF|=v^N0*Uu@^(pYht`jy`*fWET0fnc=dX9x6LwdpeJOtvWF(#zk9Lts{;OUQx1Q1*Vm2@NSl4E<1+)Z`q7qLThB14fag(<@T$eB6QtWz
z%rv3nF1^2NQ!i4S1l)fm2Lihir8!m9cQp-jwg~N)=l3}~7&(%egL4r(Kb0`_#7QKg
z{_!TZI#xk@hbU699|Q3aUGilUXD=5gwQ_VRTJOtB2t6`V`DO|dcKD;Tjs)N{0*i@j
zuRAq5zRH9OjTcL_+s;JrE>5?tWFuf#lQ_wG`d^|H*-1gk!*37+d{GzK&q@`3$JVM9
z%8}PZZi(5{KZCJ+yE>#Z;@3Ahp5Vd{MhAVh(%ToTT2RC8FXUT{XvAVG0sz7}=G;<~
zw)vZZEG8@c&~HGd-sd@xkms4T@ML{B4LQCQusBG#Kk>1!u;h_
z-8AC5Q$l@)l?o?L3=%6gHB`g8c9(t1*I}%gU_J{7$OQU?IO)dlP?XmKB+8=vg6Xerij;eBw+tO2*?k#MUh-Ja;1AJZ*mJ0Tog+#9L{52Mz8KM>fUT}
z#yc2@0o!!OpL5UgHxMuStZb}P$a;dBV@G!KlYpvxz!_Oi$0E551@czGb|KQ(za3y48F(-GHgFSkZx;9s)=yy~BJT1cukcw9JX4
z%|Ksf{vLB9Jw;TJSA0Q+YZ8$Xtx5%>t`|282xr-?zq=0^f_S`0Oec+-gSlpc4_`S
zbQ@C*6Sgs@X31%!xOL81BQ&-&z0cW3co6Pn9D{!MF7CvOV?HHi%ZZ{WG;6Md;a&Jx)Gc_p9WZt+{4jnvt?;(VysMhAl!psZ$5|A~}GQifK!PdWm_j&@=t;08r
z6T9lZ(H;u=31P#b1w8oWUJc$0K=~l4TDVh`PrQc*tY|=)Wt7do-N{whl&e+40`f?+
zBS|`ytHWCC8Ot1ShcfGCu#Gqph&+W1%UkX=jSiSpwse`f9M7H120MR1t=P>L12X(WlvRCk-A
z!4&$?4$v(PO{f=k0}C{KuHstV>hEz8iy|%&v_Nh
zf!FX4tFbqRx=k}wex%(JL-Z62aMJBFxh5u76jyU*K*m}^*GgRJ?iUhQ5?uS!TQ)U*
z-W6(_`)K7T)E4AiylhjE@wDX~8wh=i;veh9C6sZ%fx9Tw?zIh_Guk({18CH`Z5kA&
z26-Hrt=+1D{^B=)V(@1=0qws7r4x2GUsv*mkbN@9%msjfugE{GSLtZpRe0y^uP2l<
zX%7)KCJnzgb>|&dFYMd!rZz_Wr(h$owtM}*>OF+(L*y*>jB>rCFBL|GXJSf$5?dY5
zURw%-%(1Eq(yEV6;7Vz71-oPw-mWFMjTdJYPLerzn@<3KM1B2sAq4||0t+k7zGQ(8
z0qEK?#zLz2iZ4&+3G3#k#wDzM69?Do!#S0uTNDf5(MCLJKZbJICq4Ap@CFMij-Y3-
z_xG{Lo*U$`Ub2H26H~vg8upIr-qz8O`j{+`mgHmsK|C!ZqEz8^r$C@UWYpP!`Y%Ax
zlYz)bjS_U$>u7)lSis9#+KWHC7|L
zCedAdt9az@o#01`gR2CwMEvv?)8F4)11<96AdMdkNv@paYtgoC58p33ruCs&y$=HF@Cp>AFj3L=W
zq~XWe{&L8uC)3Vo^rkT&$b0*)Y4>Q}IjNt?XipL2-b04=lya_wC27@-dVP#4M^+iA
zSKWKRW#<0LwQ^mzj|nQRl^K<`!PILw*Eb6mAfTIk!RpLa8!z+yV_uJKy(3yr`zU1
zDUaC~t6jxd1W}B`59E-PcweLgKD{5L8&`^9=bpcfBtxM4SGgo6-OF-n2
z4bMJyXxJgHF+Y$p`3bTP(;4{neTEYufHbh*`Qb!j2_yJ&+Hy2bc!he0CLJoAGV9{q
zqBNnA1b@c+&N!HOQdEu<2mVykdA8y1UORuD!5D2@aP_p7kgrOjxb73*qF&SEob&&Y
z^zWvyggX3QZdsM4ecbY
zHdqAv#l2U=WT*j$;zCMrJ15dngAMvKO^T(Hi=y8=JBCuD(Vd6cp5v^m6`sowBVD4W
zl6#I%v<2G}Sd~7m$*O3Ff#jTI6+?!sXVZzuC=C8YEps_P3o1GZ`VsQseYTpVt=oHk
z$7{arMDD)ub$ZJrK~<8z91$Q%+4o2khy38Aa%#f!p}yG*D`n=OlO(#JKa%-9=YVJ^
zS81c9(qn4MCV$r4!xU0Q5$pp5ZVJj1pQSA9ALHme(~s^}Hu6orN0FWgTUc-((epvPeT
zu;i=K+rvdj3}u0ymh1Hfqq=X;D!S#QQ$MK07n=ny@s-m4wGQ<#?lC@Y%y-LqA=AjXHDzHZ46**9z9i{M_&Y+Rws@~;BfHzc(Q-lXq@ZC%S}Dyt0CO=5_HUhc|O
z*0u8KTp9tahClV^T%l43Akj20gX!w3Di#+_YXIKhM?;JnJ#G0`^l^CL5s6y&00Ai(
z6wJ@B;d}pHa}jC%o1+kpU4W01``rC#u<-DiFh=_xBI_6^{bBM&)$d!27TryUO=LnT
zQm2m^cZ3P2lMo{ihrP1cKW}}LfH9MQfFk3%|5A|kQh6$N4Le72ybxRAOVLZ!*bqie
zS~1BQy0!+_B?ARjh@65Wv$FA)2-yAfKmM6cKUU_yOW^;7;JF*~8oU@-vaM?!iG}!6
zeyAz<88g{o)Z=gHHO0A7GzSyFH0&y*k#h-ICOdn)@IbwBpWPN?N2%oz^<{E2!qWF`
zUE_j^aGUYF)GxV?UGSs?MNKZxPyHUHIKsN=9(^~j`pFuin!6fed!wX;`gi@~ROB7Y
zy1E;ByD!#T^gRE*S@)7d1efRaW|199fzP`HNP4k%`nW$8Z-#{}M6nOoJ@;m-y4HHN
zrl^c@o@Sh)D{do}9UzCm3{rnzb7;Z{r9Kmh^iEmv>nHLZ#n{rLHS=>jUHyZ5Lrf&`
z;$AT%OJYQ>(|Vn#1SkJi#{}98C&*ZSNt^mkXOrq=bSjH6eoJNLV}GF3bEKTWkde64zM&*vb=DvlLt2Smcz-IumEI}}
z(5yFGhsXvRsZ(_pbUSoe|JV~atP_$03%sF?A+Fn?>QQm4s@bM!bv*9-aSr5@>ypW%
z-jw05RTK@S@y+CtVm7^;_W_}5<&V3!dUD+>$tSfEa?ewbdTKr-@G#F?Fp(H0oqbWI
zReDN*G%SumS|v9AS+^|~n-QOl8kx-<*NA1r#PO_rSw)g%1FY$^$D3zq9Q|OO^fSRP79nh+>qc{9rfSkHADeet84)0sj-%-Zo1?+0sP@SQkB+0-sSz
zw`>l89plu3l==6C?)EjAO2HBFgV2{_aK0f
zs$3aw1p|icefN8t-nhJU8qEh4^adEjlXW)KqaJ7p*x`AC$)#HV2S=B~RXZrguDv=6
zpx#+$#e@aFh
z23N6{i*qqIcy_6rvMgwD>vQErtO6!#=WfoMLJRa;KE`WUxvEk}3LMkj`;OzZ9q2@5Uc|Cj?NiRN!x}5l)
zq;;%L*l1LoXc4xRiSBFI9?GHY7%8J6H2AlHOs6M(Bq@
zyn$D`Z$Vpa3p?yQl!Ss}eGaTxn6c4fsjcXAK)@ZHSJMo8l7AzevHb*?{o8e=UCjJQ
z8qcAkszRhul;fY!4{arp)|{FDLH!w`Eo&14_PRjOp5J)xJRE)8Vd>E7q;=C01s_fe
zMRF)<5
zC_Uw9ASkWx{D