From 502993751ee1e059953fbc6494fca1fd7647e9dd Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Tue, 8 Apr 2025 11:47:05 -0400 Subject: [PATCH 01/35] add update rate --- R/buildPedigree.R | 15 +- R/convertPedigree.R | 2 +- R/makeLinks.R | 217 ++++++++++++----------- R/readGedcom.R | 82 +++++---- data-raw/benchmark.R | 4 +- data-raw/df_inbreeding.R | 2 +- data-raw/df_potter.R | 8 +- man/collapseNames.Rd | 16 ++ man/readGedcom.Rd | 3 + tests/testthat/test-computeRelatedness.R | 13 +- tests/testthat/test-makeLinks.R | 3 +- tests/testthat/test-readPedigrees.R | 10 +- tests/testthat/test-summarizePedigrees.R | 24 ++- 13 files changed, 235 insertions(+), 164 deletions(-) create mode 100644 man/collapseNames.Rd diff --git a/R/buildPedigree.R b/R/buildPedigree.R index 3d94ecad..98dfbaa3 100644 --- a/R/buildPedigree.R +++ b/R/buildPedigree.R @@ -88,7 +88,8 @@ ped2graph <- function(ped, ...) { # Check ped/data.fram if (!inherits(ped, "data.frame")) { - stop("ped should be a data.frame or inherit to a data.frame")} + stop("ped should be a data.frame or inherit to a data.frame") + } # Handle adjacent argument adjacent <- match.arg(tolower(adjacent)[1], choices = c( @@ -182,8 +183,10 @@ ped2maternal <- function(ped, personID = "ID", momID = "momID", dadID = "dadID", matID = "matID", ...) { # Call to wrapper function - .ped2id(ped = ped, personID = personID, momID = momID, - dadID = dadID, famID = matID, type = "mothers") + .ped2id( + ped = ped, personID = personID, momID = momID, + dadID = dadID, famID = matID, type = "mothers" + ) } #' Add a paternal line ID variable to a pedigree @@ -203,6 +206,8 @@ ped2paternal <- function(ped, personID = "ID", momID = "momID", dadID = "dadID", patID = "patID", ...) { # Call to wrapper function - .ped2id(ped = ped, personID = personID, momID = momID, - dadID = dadID, famID = patID, type = "fathers") + .ped2id( + ped = ped, personID = personID, momID = momID, + dadID = dadID, famID = patID, type = "fathers" + ) } diff --git a/R/convertPedigree.R b/R/convertPedigree.R index 0f454acc..9cab93f0 100644 --- a/R/convertPedigree.R +++ b/R/convertPedigree.R @@ -224,7 +224,7 @@ ped2com <- function(ped, component, } else { # isChild is the 'S' matrix from RAM - isChild <- isChild(isChild_method=isChild_method, ped=ped) + isChild <- isChild(isChild_method = isChild_method, ped = ped) if (saveable) { saveRDS(isChild, file = checkpoint_files$isChild) diff --git a/R/makeLinks.R b/R/makeLinks.R index 96768fd5..d2dc6a65 100644 --- a/R/makeLinks.R +++ b/R/makeLinks.R @@ -50,23 +50,28 @@ com2links <- function( } # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided. if (!is.null(ad_ped_matrix)) { - ad_ped_matrix <- validate_and_convert_matrix(mat=ad_ped_matrix, - name = "ad_ped_matrix") + ad_ped_matrix <- validate_and_convert_matrix( + mat = ad_ped_matrix, + name = "ad_ped_matrix" + ) } # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided. if (!is.null(cn_ped_matrix)) { - cn_ped_matrix <- validate_and_convert_matrix(mat=cn_ped_matrix, - name="cn_ped_matrix", - ensure_symmetric = TRUE) + cn_ped_matrix <- validate_and_convert_matrix( + mat = cn_ped_matrix, + name = "cn_ped_matrix", + ensure_symmetric = TRUE + ) } # Validate and process mit_ped_matrix: convert and ensure binary values. if (!is.null(mit_ped_matrix)) { - - mit_ped_matrix <- validate_and_convert_matrix(mat=mit_ped_matrix, - name="mit_ped_matrix",force_binary = TRUE, - ensure_symmetric = TRUE) + mit_ped_matrix <- validate_and_convert_matrix( + mat = mit_ped_matrix, + name = "mit_ped_matrix", force_binary = TRUE, + ensure_symmetric = TRUE + ) } # --- Build IDs and Prepare Matrix Pointers --- @@ -516,109 +521,117 @@ com2links.legacy <- function( verbose = FALSE, outcome_name = "data", ...) { - # --- Legacy Mode --- - if (verbose) { - message("Using legacy mode") + # --- Legacy Mode --- + if (verbose) { + message("Using legacy mode") + } + # In legacy mode, convert matrices to the expected symmetric formats. + + # load(paste0(outcome_name,'_dataBiggestCnPedigree.Rdata')) + # biggestCnPed <- methods::as(biggestCnPed, "symmetricMatrix") + # load(paste0(outcome_name,'_dataBiggestPedigree.Rdata')) + # load(paste0(outcome_name,'_dataBiggestMtPedigree.Rdata')) + + # rel_pairs_file <- paste0(outcome_name,'_datacnmitBiggestRelatedPairsTake3.csv') + + biggestMtPed <- mit_ped_matrix + remove(mit_ped_matrix) + biggestCnPed <- methods::as(cn_ped_matrix, "symmetricMatrix") + remove(cn_ped_matrix) + biggestPed <- ad_ped_matrix + remove(ad_ped_matrix) + biggestMtPed@x[biggestMtPed@x > 0] <- 1 + + # Set the output file name. + if (exists("rel_pairs_file")) { + fname <- rel_pairs_file + } else { + fname <- paste0(outcome_name, "_dataBiggestRelatedPairsTake2.csv") + } + # Initialize the output file with headers. + ds <- data.frame( + ID1 = numeric(0), ID2 = numeric(0), + addRel = numeric(0), + mitRel = numeric(0), cnuRel = numeric(0) + ) + + utils::write.table(ds, + file = fname, sep = ",", + append = FALSE, row.names = FALSE + ) + + # Extract IDs from the common nuclear matrix. + ids <- as.numeric(dimnames(biggestCnPed)[[1]]) + + # Extract pointers from the legacy matrices. + newColPos1 <- biggestPed@p + 1L + iss1 <- biggestPed@i + 1L + newColPos2 <- biggestMtPed@p + 1L + iss2 <- biggestMtPed@i + 1L + newColPos3 <- biggestCnPed@p + 1L + iss3 <- biggestCnPed@i + 1L + nc <- ncol(biggestPed) + + # Process each individual. + for (j in 1L:nc) { + ID2 <- ids[j] + ncp1 <- newColPos1[j] + ncp1p <- newColPos1[j + 1L] + cond1 <- ncp1 < ncp1p + if (cond1) { + vv1 <- ncp1:(ncp1p - 1L) + iss1vv <- iss1[vv1] } - # In legacy mode, convert matrices to the expected symmetric formats. - - # load(paste0(outcome_name,'_dataBiggestCnPedigree.Rdata')) - # biggestCnPed <- methods::as(biggestCnPed, "symmetricMatrix") - # load(paste0(outcome_name,'_dataBiggestPedigree.Rdata')) - # load(paste0(outcome_name,'_dataBiggestMtPedigree.Rdata')) - - # rel_pairs_file <- paste0(outcome_name,'_datacnmitBiggestRelatedPairsTake3.csv') - - biggestMtPed <- mit_ped_matrix - remove(mit_ped_matrix) - biggestCnPed <- methods::as(cn_ped_matrix, "symmetricMatrix") - remove(cn_ped_matrix) - biggestPed <- ad_ped_matrix - remove(ad_ped_matrix) - biggestMtPed@x[biggestMtPed@x > 0] <- 1 - - # Set the output file name. - if (exists("rel_pairs_file")) { - fname <- rel_pairs_file - } else { - fname <- paste0(outcome_name, "_dataBiggestRelatedPairsTake2.csv") + ncp2 <- newColPos2[j] + ncp2p <- newColPos2[j + 1L] + cond2 <- ncp2 < ncp2p + if (cond2) { + vv2 <- ncp2:(ncp2p - 1L) + iss2vv <- iss2[vv2] + } + ncp3 <- newColPos3[j] + ncp3p <- newColPos3[j + 1L] + cond3 <- ncp3 < ncp3p + if (cond3) { + vv3 <- ncp3:(ncp3p - 1L) + iss3vv <- iss3[vv3] } - # Initialize the output file with headers. - ds <- data.frame(ID1 = numeric(0), ID2 = numeric(0), - addRel = numeric(0), - mitRel = numeric(0), cnuRel = numeric(0)) - - utils::write.table(ds, file = fname, sep = ",", - append = FALSE, row.names = FALSE) - - # Extract IDs from the common nuclear matrix. - ids <- as.numeric(dimnames(biggestCnPed)[[1]]) - - # Extract pointers from the legacy matrices. - newColPos1 <- biggestPed@p + 1L - iss1 <- biggestPed@i + 1L - newColPos2 <- biggestMtPed@p + 1L - iss2 <- biggestMtPed@i + 1L - newColPos3 <- biggestCnPed@p + 1L - iss3 <- biggestCnPed@i + 1L - nc <- ncol(biggestPed) - - # Process each individual. - for (j in 1L:nc) { - ID2 <- ids[j] - ncp1 <- newColPos1[j] - ncp1p <- newColPos1[j + 1L] - cond1 <- ncp1 < ncp1p + + # Merge indices from all three matrices. + u <- sort(igraph::union(igraph::union(if (cond1) { + iss1vv + }, if (cond2) { + iss2vv + }), if (cond3) { + iss3vv + })) + # browser() + if (cond1 || cond2 || cond3) { + ID1 <- ids[u] + tds <- data.frame( + ID1 = ID1, ID2 = ID2, + addRel = 0, mitRel = 0, cnuRel = 0 + ) if (cond1) { - vv1 <- ncp1:(ncp1p - 1L) - iss1vv <- iss1[vv1] + tds$addRel[u %in% iss1vv] <- biggestPed@x[vv1] } - ncp2 <- newColPos2[j] - ncp2p <- newColPos2[j + 1L] - cond2 <- ncp2 < ncp2p if (cond2) { - vv2 <- ncp2:(ncp2p - 1L) - iss2vv <- iss2[vv2] + tds$mitRel[u %in% iss2vv] <- biggestMtPed@x[vv2] } - ncp3 <- newColPos3[j] - ncp3p <- newColPos3[j + 1L] - cond3 <- ncp3 < ncp3p if (cond3) { - vv3 <- ncp3:(ncp3p - 1L) - iss3vv <- iss3[vv3] - } - - # Merge indices from all three matrices. - u <- sort(igraph::union(igraph::union(if (cond1) { - iss1vv - }, if (cond2) { - iss2vv - }), if (cond3) { - iss3vv - })) - # browser() - if (cond1 || cond2 || cond3) { - ID1 <- ids[u] - tds <- data.frame(ID1 = ID1, ID2 = ID2, - addRel = 0, mitRel = 0, cnuRel = 0) - if (cond1) { - tds$addRel[u %in% iss1vv] <- biggestPed@x[vv1] - } - if (cond2) { - tds$mitRel[u %in% iss2vv] <- biggestMtPed@x[vv2] - } - if (cond3) { - tds$cnuRel[u %in% iss3vv] <- biggestCnPed@x[vv3] - } - utils::write.table(tds, file = fname, row.names = FALSE, - col.names = FALSE, append = TRUE, sep = ",") - } - if (!(j %% update_rate)) { - cat(paste0("Done with ", j, " of ", nc, "\n")) + tds$cnuRel[u %in% iss3vv] <- biggestCnPed@x[vv3] } + utils::write.table(tds, + file = fname, row.names = FALSE, + col.names = FALSE, append = TRUE, sep = "," + ) + } + if (!(j %% update_rate)) { + cat(paste0("Done with ", j, " of ", nc, "\n")) } - return(NULL) } + return(NULL) +} #' @title validate_and_convert_matrix #' @description diff --git a/R/readGedcom.R b/R/readGedcom.R index d6858e58..f9066c91 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -9,6 +9,7 @@ #' @param combine_cols A logical value indicating whether to combine columns with duplicate values. #' @param verbose A logical value indicating whether to print messages. #' @param skinny A logical value indicating whether to return a skinny data frame. +#' @param update_rate numeric. The rate at which to print progress #' @param ... Additional arguments to be passed to the function. #' @return A data frame containing information about individuals, with the following potential columns: #' - `id`: ID of the individual @@ -53,6 +54,7 @@ readGedcom <- function(file_path, remove_empty_cols = TRUE, combine_cols = TRUE, skinny = FALSE, + update_rate = 1000, ...) { # Checks if (!file.exists(file_path)) stop("File does not exist: ", file_path) @@ -73,11 +75,15 @@ readGedcom <- function(file_path, identifiers = c("id", "momID", "dadID"), names = c( "name", "name_given", "name_given_pieces", - "name_surn", "name_surn_pieces", "name_marriedsurn", "name_nick", "name_npfx", "name_nsfx" + "name_surn", "name_surn_pieces", "name_marriedsurn", + "name_nick", "name_npfx", "name_nsfx" ), sex = c("sex"), birth = c("birth_date", "birth_lat", "birth_long", "birth_place"), - death = c("death_caus", "death_date", "death_lat", "death_long", "death_place"), + death = c( + "death_caus", "death_date", + "death_lat", "death_long", "death_place" + ), attributes = c( "attribute_caste", "attribute_children", "attribute_description", "attribute_education", "attribute_idnumber", "attribute_marriages", @@ -91,7 +97,10 @@ readGedcom <- function(file_path, all_var_names <- unlist(var_names, use.names = FALSE) # Initialize all variables to NA - vars <- stats::setNames(as.list(rep(NA_character_, length(all_var_names))), all_var_names) + vars <- stats::setNames( + as.list(rep(NA_character_, length(all_var_names))), + all_var_names + ) df_temp <- as.data.frame(matrix(nrow = 1, ncol = length(all_var_names))) names(df_temp) <- all_var_names @@ -107,7 +116,10 @@ readGedcom <- function(file_path, df_temp <- rbind(df_temp, line_to_write) # Reset all variables to NA - vars <- stats::setNames(as.list(rep(NA_character_, length(all_var_names))), all_var_names) + vars <- stats::setNames(as.list(rep( + NA_character_, + length(all_var_names) + )), all_var_names) vars$id <- stringr::str_extract(tmpv, "(?<=@.)\\d*(?=@)") next @@ -269,7 +281,7 @@ readGedcom <- function(file_path, vars <- result$vars if (result$matched) next - if (verbose && i %% 1000 == 0) { + if (verbose && i %% update_rate == 0) { cat("Processed", i, "lines\n") } } @@ -296,32 +308,8 @@ readGedcom <- function(file_path, df_temp <- processParents(df_temp, datasource = "gedcom") } - - if (combine_cols) { - if (verbose) { - print("Combining Duplicate Columns") - } - # need to check if any values aren't NA in name_given_pieces and name_surn_pieces - # Combine `name_given` and `name_given_pieces` - - # Combine `name_given` and `name_given_pieces` - if (!all(is.na(df_temp$name_given_pieces)) | !all(is.na(df_temp$name_given))) { - result <- combine_columns(df_temp$name_given, df_temp$name_given_pieces) - df_temp$name_given <- result$combined - if (!result$retain_col2) { - df_temp$name_given_pieces <- NULL - } - } - - # Combine `name_surn` and `name_surn_pieces` - if (!all(is.na(df_temp$name_surn_pieces)) | !all(is.na(df_temp$name_surn))) { - result <- combine_columns(df_temp$name_surn, df_temp$name_surn_pieces) - df_temp$name_surn <- result$combined - if (!result$retain_col2) { - df_temp$name_surn_pieces <- NULL - } - } + df_temp <- collapseNames(verbose = verbose, df_temp = df_temp) } if (remove_empty_cols) { @@ -565,3 +553,37 @@ process_tag <- function(tag, field_name, pattern_rows, line, vars, } return(list(vars = vars, matched = matched)) } + +#' collapse Names +#' +#' This function combines the `name_given` and `name_given_pieces` columns in a data frame. +#' +#' @inheritParams readGedcom +#' @param df_temp A data frame containing the columns to be combined. + +collapseNames <- function(verbose, df_temp) { + if (verbose) { + print("Combining Duplicate Columns") + } + # need to check if any values aren't NA in name_given_pieces and name_surn_pieces + # Combine `name_given` and `name_given_pieces` + + # Combine `name_given` and `name_given_pieces` + if (!all(is.na(df_temp$name_given_pieces)) | !all(is.na(df_temp$name_given))) { + result <- combine_columns(df_temp$name_given, df_temp$name_given_pieces) + df_temp$name_given <- result$combined + if (!result$retain_col2) { + df_temp$name_given_pieces <- NULL + } + } + + # Combine `name_surn` and `name_surn_pieces` + if (!all(is.na(df_temp$name_surn_pieces)) | !all(is.na(df_temp$name_surn))) { + result <- combine_columns(df_temp$name_surn, df_temp$name_surn_pieces) + df_temp$name_surn <- result$combined + if (!result$retain_col2) { + df_temp$name_surn_pieces <- NULL + } + } + return(df_temp) +} diff --git a/data-raw/benchmark.R b/data-raw/benchmark.R index 8c4c90b5..fe480cdf 100644 --- a/data-raw/benchmark.R +++ b/data-raw/benchmark.R @@ -60,4 +60,6 @@ print(benchmark_results) # Optional: Save results to CSV for later analysis write.csv(summary(benchmark_results), - "benchmark_results.csv", row.names = FALSE) + "benchmark_results.csv", + row.names = FALSE +) diff --git a/data-raw/df_inbreeding.R b/data-raw/df_inbreeding.R index 9af10ff0..430830ff 100644 --- a/data-raw/df_inbreeding.R +++ b/data-raw/df_inbreeding.R @@ -8,5 +8,5 @@ inbreeding <- raw ## # data processing -#write.csv(inbreeding, "data-raw/inbreeding.csv", row.names = FALSE) +# write.csv(inbreeding, "data-raw/inbreeding.csv", row.names = FALSE) usethis::use_data(inbreeding, overwrite = TRUE, compress = "xz") diff --git a/data-raw/df_potter.R b/data-raw/df_potter.R index aa039c81..d1f5faf3 100644 --- a/data-raw/df_potter.R +++ b/data-raw/df_potter.R @@ -45,9 +45,11 @@ potter <- data.frame( "Molly Weasley", "Lucy Weasley" ), - gen = c(1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3), + gen = c( + 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3 + ), momID = c( 101, 101, 103, 103, NA, 3, 4, 10, NA, NA, 10, 10, 10, 10, 10, 10, NA, 105, 105, NA, diff --git a/man/collapseNames.Rd b/man/collapseNames.Rd new file mode 100644 index 00000000..01a38a6c --- /dev/null +++ b/man/collapseNames.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcom.R +\name{collapseNames} +\alias{collapseNames} +\title{collapse Names} +\usage{ +collapseNames(verbose, df_temp) +} +\arguments{ +\item{verbose}{A logical value indicating whether to print messages.} + +\item{df_temp}{A data frame containing the columns to be combined.} +} +\description{ +This function combines the `name_given` and `name_given_pieces` columns in a data frame. +} diff --git a/man/readGedcom.Rd b/man/readGedcom.Rd index 78d45773..fdb158e1 100644 --- a/man/readGedcom.Rd +++ b/man/readGedcom.Rd @@ -11,6 +11,7 @@ readGedcom( remove_empty_cols = TRUE, combine_cols = TRUE, skinny = FALSE, + update_rate = 1000, ... ) } @@ -27,6 +28,8 @@ readGedcom( \item{skinny}{A logical value indicating whether to return a skinny data frame.} +\item{update_rate}{numeric. The rate at which to print progress} + \item{...}{Additional arguments to be passed to the function.} } \value{ diff --git a/tests/testthat/test-computeRelatedness.R b/tests/testthat/test-computeRelatedness.R index 05fb7e62..192df30b 100644 --- a/tests/testthat/test-computeRelatedness.R +++ b/tests/testthat/test-computeRelatedness.R @@ -28,11 +28,13 @@ test_that("calculateRelatedness function with empirical", { # Test 7: empirical divide by zero test_that("calculateH handles divide by zero for empirical", { -expect_error( - calculateRelatedness(generations = 2, - empirical = TRUE, total_a = 0, - total_m = 0)) - + expect_error( + calculateRelatedness( + generations = 2, + empirical = TRUE, total_a = 0, + total_m = 0 + ) + ) }) test_that("inferRelatedness performs as expected", { result <- inferRelatedness(0, aceA = .9, aceC = 0, sharedC = 0) @@ -105,4 +107,3 @@ test_that("calculateH stops for illegal coefficients", { "The observed correlations should be between -1 and 1" ) }) - diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R index e8dfe99e..475f7b6b 100644 --- a/tests/testthat/test-makeLinks.R +++ b/tests/testthat/test-makeLinks.R @@ -131,7 +131,8 @@ test_that("com2links correctly handles missing matrices", { expect_error( com2links(ad_ped_matrix = NULL, mit_ped_matrix = NULL, cn_ped_matrix = NULL), - "At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided." ) + "At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided." + ) expect_error(com2links(ad_ped_matrix = hazard), "The 'ad_ped_matrix' must be a matrix or dgCMatrix.") }) diff --git a/tests/testthat/test-readPedigrees.R b/tests/testthat/test-readPedigrees.R index 1f098697..a6d6bdd0 100644 --- a/tests/testthat/test-readPedigrees.R +++ b/tests/testthat/test-readPedigrees.R @@ -214,10 +214,10 @@ test_that("readWikifamilytree reads a string correctly", { # read E:/Dropbox/Lab/Research/Projects/2024/BGMiscJoss/BGmisc_main/data-raw/Targaryen tree Dance.txt -#test_that("readWikifamilytree reads a file correctly", { - # Create a temporary WikiFamilyTree file for testing - # Example usage +# test_that("readWikifamilytree reads a file correctly", { +# Create a temporary WikiFamilyTree file for testing +# Example usage # family_tree_file_path <- "data-raw/Targaryen tree Dance.txt" # system.file("extdata", "Targaryen tree Dance.txt", package = "BGmisc") - # result <- readWikifamilytree(file_path=family_tree_file_path) -#}) +# result <- readWikifamilytree(file_path=family_tree_file_path) +# }) diff --git a/tests/testthat/test-summarizePedigrees.R b/tests/testthat/test-summarizePedigrees.R index 61dae665..5bc1d6ea 100644 --- a/tests/testthat/test-summarizePedigrees.R +++ b/tests/testthat/test-summarizePedigrees.R @@ -53,9 +53,11 @@ test_that("summarizeMatrilines() works", { nbiggest <- 2 df <- ped2fam(potter, famID = "newFamID", personID = "personID") %>% ped2maternal(personID = "personID") - df_summarized <- summarizeMatrilines(df, famID = "newFamID", - personID = "personID", - nbiggest = nbiggest) + df_summarized <- summarizeMatrilines(df, + famID = "newFamID", + personID = "personID", + nbiggest = nbiggest + ) # is the total count from the family summary the same as the raw data? result_observed <- sum(df_summarized$maternal_summary$count) result_expected <- nrow(potter) @@ -76,9 +78,11 @@ test_that("summarizePatrilines() works", { nbiggest <- 4 df <- ped2fam(potter, famID = "newFamID", personID = "personID") %>% ped2paternal(personID = "personID") - df_summarized <- summarizePatrilines(df, famID = "newFamID", - personID = "personID", - nbiggest = nbiggest) + df_summarized <- summarizePatrilines(df, + famID = "newFamID", + personID = "personID", + nbiggest = nbiggest + ) # is the total count from the family summary the same as the raw data? result_observed <- sum(df_summarized$paternal_summary$count) result_expected <- nrow(potter) @@ -111,7 +115,7 @@ test_that("summarizePedigrees() handles missing values correctly", { }) # Test Case 7: When all variables are skipped -test_that("summarizePedigrees works when all numeric variables are skipped",{ +test_that("summarizePedigrees works when all numeric variables are skipped", { df <- data.frame( ID = 1:5, momID = c(NA, 1, 1, NA, 4), @@ -126,8 +130,10 @@ test_that("summarizePedigrees works when all numeric variables are skipped",{ # Test Case 8: Handling invalid column names test_that("summarizePedigrees() throws error on invalid column names", { - df <- data.frame(ID = 1:5, momID = c(NA, 1, 1, NA, 4), - dadID = c(NA, 2, 2, NA, 5), famID = c(1, 1, 1, 2, 2)) + df <- data.frame( + ID = 1:5, momID = c(NA, 1, 1, NA, 4), + dadID = c(NA, 2, 2, NA, 5), famID = c(1, 1, 1, 2, 2) + ) expect_error(summarizePedigrees(df, byr = "unknown_column")) }) From e22c933ea0dcfb8053e4c434d03f9cf4f12163c8 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Tue, 8 Apr 2025 13:53:02 -0400 Subject: [PATCH 02/35] allow verbose argument to be passed to standardizeColnames --- NEWS.md | 1 + R/checkIDs.R | 2 +- R/checkParents.R | 47 ++++++++++++++++++++++++++++++++++++++++++++++- R/checkSex.R | 2 +- R/cleanPedigree.R | 2 +- R/plotPedigree.R | 2 +- R/tweakPedigree.R | 4 ++-- 7 files changed, 53 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index d8bd3dd0..c5392a9d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ * added tests for checkParents function * added GoT analysis * reduced complexity of com2links and summarizePedigree with the use of subfunctions +* allow verbose argument to be passed to standardizeColnames # BGmisc 1.3.5.1 * Setting the default for the `sparse` argument in `ped2com()` to TRUE diff --git a/R/checkIDs.R b/R/checkIDs.R index b9da9a6d..d528d56a 100644 --- a/R/checkIDs.R +++ b/R/checkIDs.R @@ -17,7 +17,7 @@ #' @export checkIDs <- function(ped, verbose = FALSE, repair = FALSE) { # Standardize column names in the input dataframe - ped <- standardizeColnames(ped) + ped <- standardizeColnames(ped, verbose = verbose) # Initialize a list to store validation results validation_results <- list() diff --git a/R/checkParents.R b/R/checkParents.R index eab8e0ca..c37a08e9 100644 --- a/R/checkParents.R +++ b/R/checkParents.R @@ -26,7 +26,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, parentswithoutrow = repair) { # Standardize column names in the input dataframe ped_og <- ped - ped <- standardizeColnames(ped) + ped <- standardizeColnames(ped, verbose = verbose) # Initialize a list to store validation results @@ -300,3 +300,48 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, repairParentIDs <- function(ped, verbose = FALSE) { checkParentIDs(ped = ped, verbose = verbose, repair = TRUE) } + +#' Add Phantom Parents +#' +#' This function adds phantom parents to a pedigree. +#' @inheritParams checkParentIDs + +addPhantoms <- function(ped, verbose, pid, validation_results) { + # Add parents who appear in momID or dadID but are missing from ID + new_entries <- data.frame() + + listed_parents <- unique(c(ped$momID, ped$dadID)) + listed_parents <- listed_parents[!is.na(listed_parents)] + + existing_ids <- ped$ID + missing_parents <- setdiff(listed_parents, existing_ids) + + if (length(missing_parents) > 0) { + if (verbose) { + cat("Adding parents who were listed in momID/dadID but missing from ID:\n") + print(missing_parents) + } + + for (pid in missing_parents) { + role <- unique( + c( + if (pid %in% ped$momID) "mom" else NULL, + if (pid %in% ped$dadID) "dad" else NULL + ) + ) + inferred_sex <- if ("mom" %in% role) validation_results$female_var else validation_results$male_var + + new_row <- ped[1, ] + new_row$ID <- pid + new_row$dadID <- NA + new_row$momID <- NA + new_row$sex <- inferred_sex + new_entries <- rbind(new_entries, new_row) + } + } + ped <- merge(ped, new_entries, all = TRUE) + if (verbose) { + cat("Added phantom parents for:", paste(new_entries$ID, collapse = ", "), "\n") + } + return(ped) +} diff --git a/R/checkSex.R b/R/checkSex.R index d220dec6..8fbabf6a 100644 --- a/R/checkSex.R +++ b/R/checkSex.R @@ -37,7 +37,7 @@ #' checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, repair = FALSE) { # Standardize column names in the input dataframe - ped <- standardizeColnames(ped) + ped <- standardizeColnames(ped, verbose = verbose) # TO DO: bypass the rest of the function if recode_only is TRUE diff --git a/R/cleanPedigree.R b/R/cleanPedigree.R index ffd8054e..b03ca270 100644 --- a/R/cleanPedigree.R +++ b/R/cleanPedigree.R @@ -60,7 +60,7 @@ standardizeColnames <- function(df, verbose = FALSE) { # check_sex = TRUE, # check_parents = TRUE, # verbose = FALSE) { -# corrected_ped <- ped <- standardizeColnames(ped) +# corrected_ped <- ped <- standardizeColnames(ped, verbose = verbose) # if (verbose) { # print("Repairing pedigree...") # } diff --git a/R/plotPedigree.R b/R/plotPedigree.R index 051610e6..5b2229cd 100644 --- a/R/plotPedigree.R +++ b/R/plotPedigree.R @@ -25,7 +25,7 @@ plotPedigree <- function(ped, pconnect = .5, ...) { # Standardize column names in the input dataframe - ped <- standardizeColnames(ped) + ped <- standardizeColnames(ped, verbose = verbose) # Define required columns simulated_vars <- c("fam", "ID", "dadID", "momID", "sex") diff --git a/R/tweakPedigree.R b/R/tweakPedigree.R index 2367fe21..edefaf99 100644 --- a/R/tweakPedigree.R +++ b/R/tweakPedigree.R @@ -18,7 +18,7 @@ makeTwins <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_t "fam", "ID", "gen", "dadID", "momID", "spID", "sex" ), collapse = "")) { - ped <- standardizeColnames(ped) + ped <- standardizeColnames(ped, verbose = verbose) if (verbose) { cat("The input pedigree is not in the same format as the output of simulatePedigree\n") } @@ -131,7 +131,7 @@ makeInbreeding <- function(ped, c("fam", "ID", "gen", "dadID", "momID", "spID", "sex"), collapse = "" )) { - ped <- standardizeColnames(ped) + ped <- standardizeColnames(ped, verbose = verbose) if (verbose) { cat("The input pedigree is not in the same format as the output of simulatePedigree\n") } From a8c2b6c8781bcf4a37858c12660247dd221a1233 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Tue, 8 Apr 2025 14:54:40 -0400 Subject: [PATCH 03/35] refactor checkids --- R/checkIDs.R | 151 ++++++++++++++++++++---------------------- R/summarizePedigree.R | 62 ++++++++++------- 2 files changed, 109 insertions(+), 104 deletions(-) diff --git a/R/checkIDs.R b/R/checkIDs.R index d528d56a..c92653ed 100644 --- a/R/checkIDs.R +++ b/R/checkIDs.R @@ -28,91 +28,15 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) { } # Identify non-unique IDs - duplicated_ids <- ped$ID[duplicated(ped$ID) | duplicated(ped$ID, fromLast = TRUE)] - + id_check <- checkIDuniqueness(ped=ped, verbose=verbose) - # Update the validation_results list - if (length(duplicated_ids) > 0) { - if (verbose) { - cat(paste0(length(duplicated_ids), " non-unique IDs found.\n")) - } - validation_results$all_unique_ids <- FALSE - validation_results$total_non_unique_ids <- length(duplicated_ids) - validation_results$non_unique_ids <- unique(duplicated_ids) - } else { - if (verbose) { - cat("All IDs are unique.\n") - } - validation_results$all_unique_ids <- TRUE - validation_results$total_non_unique_ids <- 0 - validation_results$non_unique_ids <- NULL - } if (verbose) { cat("Step 2: Checking for within row duplicats...\n") - cat("Is own father?\n") - } - is_own_father <- ped$ID[ped$ID == ped$dadID & !is.na(ped$dadID)] - if (verbose) { - cat("Is own mother?\n") - } - is_own_mother <- ped$ID[ped$ID == ped$momID & !is.na(ped$momID)] - if (verbose) { - cat("Is mother father?\n") } - duplicated_parents <- ped$ID[ped$dadID == ped$momID & !is.na(ped$dadID) & !is.na(ped$momID)] + row_check <- checkWithinRowDuplicates(ped=ped, verbose = verbose) - # get the total number of within row duplicates - validation_results$total_own_father <- length(is_own_father) - validation_results$total_own_mother <- length(is_own_mother) - validation_results$total_duplicated_parents <- length(duplicated_parents) - validation_results$total_within_row_duplicates <- sum(length(is_own_father), length(is_own_mother), length(duplicated_parents)) - # Update the validation_results list + validation_results <- c(id_check, row_check) - if (validation_results$total_within_row_duplicates > 0) { - if (verbose) { - cat(paste0( - validation_results$total_within_row_duplicates, - " within row duplicates found.\n" - )) - } - validation_results$within_row_duplicates <- TRUE - if (validation_results$total_own_father > 0) { - validation_results$is_own_father_ids <- unique(is_own_father) - if (verbose) { - cat(paste0( - validation_results$total_own_father, - " individuals are their own fathers.\n" - )) - } - } - if (validation_results$total_own_mother > 0) { - validation_results$is_own_mother_ids <- unique(is_own_mother) - if (verbose) { - cat(paste0( - validation_results$total_own_mother, - " individuals are their own mothers.\n" - )) - } - } - if (validation_results$total_duplicated_parents > 0) { - validation_results$duplicated_parents_ids <- unique(duplicated_parents) - if (verbose) { - cat(paste0( - validation_results$total_duplicated_parents, - " individuals have the same mother and father.\n" - )) - } - } - } else { - if (verbose) { - cat("No within row duplicates found.\n") - } - validation_results$within_row_duplicates <- FALSE - validation_results$total_within_row_duplicates <- 0 - validation_results$is_own_father_ids <- NULL - validation_results$is_own_mother_ids <- NULL - validation_results$duplicated_parents_ids <- NULL - } if (verbose) { cat("Validation Results:\n") print(validation_results) @@ -169,3 +93,72 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) { repairIDs <- function(ped, verbose = FALSE) { checkIDs(ped = ped, verbose = verbose, repair = TRUE) } + +#' Check for duplicated individual IDs +#' +#' This function checks for duplicated individual IDs in a pedigree. +#' +#' @param ped A pedigree object +#' @param verbose A logical indicating whether to print progress messages +#' @return A list containing the results of the check +#' +checkIDuniqueness <- function(ped, verbose = FALSE) { + # Identify non-unique IDs + + duplicated_ids <- ped$ID[duplicated(ped$ID) | duplicated(ped$ID, fromLast = TRUE)] + + if (verbose) { + if (length(duplicated_ids) > 0) { + cat(length(duplicated_ids), " non-unique IDs found.\n") + } else { + cat("All IDs are unique.\n") + } + } + + # Update the validation_results list + list( + all_unique_ids = length(duplicated_ids) == 0, + total_non_unique_ids = length(duplicated_ids), + non_unique_ids = if (length(duplicated_ids) > 0) unique(duplicated_ids) else NULL + ) +} + + +#' Check for within-row duplicates (self-parents, same mom/dad) +checkWithinRowDuplicates <- function(ped, verbose = FALSE) { + # is the individual their own father or mother? + is_own_father <- ped$ID[ped$ID == ped$dadID & !is.na(ped$dadID)] + is_own_mother <- ped$ID[ped$ID == ped$momID & !is.na(ped$momID)] + + # is mother and father the same? + duplicated_parents <- ped$ID[ + ped$dadID == ped$momID & + !is.na(ped$dadID) & !is.na(ped$momID) + ] + + # get the total number of within row duplicates + total <- length(is_own_father) + length(is_own_mother) + length(duplicated_parents) + + if (verbose) { + if (total > 0) { + cat(total, " within row duplicates found.\n") + if (length(is_own_father) > 0) cat(length(is_own_father), " individuals are their own fathers.\n") + if (length(is_own_mother) > 0) cat(length(is_own_mother), " individuals are their own mothers.\n") + if (length(duplicated_parents) > 0) cat(length(duplicated_parents), " individuals have the same mother and father.\n") + } else { + cat("No within row duplicates found.\n") + } + } + # Update the validation_results list + list( + total_own_father = length(is_own_father), + total_own_mother = length(is_own_mother), + total_duplicated_parents = length(duplicated_parents), + total_within_row_duplicates = total, + within_row_duplicates = total > 0, + is_own_father_ids = if (length(is_own_father) > 0) unique(is_own_father) else NULL, + is_own_mother_ids = if (length(is_own_mother) > 0) unique(is_own_mother) else NULL, + duplicated_parents_ids = if (length(duplicated_parents) > 0) unique(duplicated_parents) else NULL + ) +} + diff --git a/R/summarizePedigree.R b/R/summarizePedigree.R index 6c6564f3..ea8f81e8 100644 --- a/R/summarizePedigree.R +++ b/R/summarizePedigree.R @@ -39,7 +39,7 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID", matID = "matID", patID = "patID", type = c("fathers", "mothers", "families"), byr = NULL, include_founder = FALSE, founder_sort_var = NULL, - nbiggest = 5, noldest = 5, skip_var = NULL, + nbiggest = 5, noldest = nbiggest, skip_var = NULL, five_num_summary = FALSE, network_checks = FALSE, verbose = FALSE) { # Fast Fails @@ -123,15 +123,11 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID", ) # Find the originating member for each line if (include_founder) { - if (verbose) message("Finding originating members for families...") - originating_member_family <- findFounder(ped_dt, + family_summary_dt <- summarizeFounder( + verbose = verbose, ped_dt = ped_dt, group_var = famID, - sort_var = founder_sort_var - ) - # Merge summary statistics with originating members for additional information - family_summary_dt <- merge(family_summary_dt, - originating_member_family, - by = famID, suffixes = c("", "_founder") + sort_var = founder_sort_var, + foo_summary_dt = family_summary_dt ) } output$family_summary <- family_summary_dt @@ -146,14 +142,11 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID", five_num_summary = five_num_summary ) if (include_founder) { - if (verbose) message("Finding originating members for matrilineal lines...") - originating_member_maternal <- findFounder(ped_dt, + maternal_summary_dt <- summarizeFounder( + verbose = verbose, ped_dt = ped_dt, group_var = matID, - sort_var = founder_sort_var - ) - maternal_summary_dt <- merge(maternal_summary_dt, - originating_member_maternal, - by = matID, suffixes = c("", "_founder") + sort_var = founder_sort_var, + foo_summary_dt = maternal_summary_dt ) } output$maternal_summary <- maternal_summary_dt @@ -167,14 +160,11 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID", five_num_summary = five_num_summary ) if (include_founder) { - if (verbose) message("Finding originating members for patrilineal lines...") - originating_member_paternal <- findFounder(ped_dt, + paternal_summary_dt <- summarizeFounder( + verbose = verbose, ped_dt = ped_dt, group_var = patID, - sort_var = founder_sort_var - ) - paternal_summary_dt <- merge(paternal_summary_dt, - originating_member_paternal, - by = patID, suffixes = c("", "_founder") + sort_var = founder_sort_var, + foo_summary_dt = paternal_summary_dt ) } @@ -298,8 +288,8 @@ calculateSummaryDT <- function(data, group_var, skip_var, return(summary_stats) } -# Function to find the originating member for each line - +#' Function to find the originating member for each line +#' #' This function finds the originating member for each line in a pedigree. It is supposed to be used internally by the \code{summarize_pedigree} function. #' @inheritParams summarizePedigrees #' @param sort_var A character string specifying the column name to sort by. @@ -312,7 +302,29 @@ findFounder <- function(data, group_var, sort_var) { data[order(get(sort_var)), .SD[1], by = group_var] } +#' Function to summarize the originating members for each line +#' +#' This function summarizes the originating members for each line in a pedigree. It is supposed to be used internally by the \code{summarize_pedigree} function. +#' +#' @inheritParams summarizePedigrees +#' @inheritParams findFounder +#' +#' @keywords internal +summarizeFounder <- function(ped_dt, group_var, sort_var, foo_summary_dt, verbose) { + if (verbose) message(paste0("Finding originating members for ", "group_var")) + originating_member_foo <- findFounder( + data = ped_dt, + group_var = group_var, + sort_var = sort_var + ) + # Merge summary statistics with originating members for additional information + foo_summary_dt <- merge(foo_summary_dt, + originating_member_foo, + by = group_var, suffixes = c("", "_founder") + ) + return(foo_summary_dt) +} #' Summarize the maternal lines in a pedigree #' @inheritParams summarizePedigrees #' @seealso [summarizePedigrees ()] From 3326f757fbe4fa9e5731fa74972096a1e842842c Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Tue, 8 Apr 2025 14:57:25 -0400 Subject: [PATCH 04/35] docs --- NEWS.md | 3 ++- man/addPhantoms.Rd | 16 ++++++++++++++++ man/checkIDuniqueness.Rd | 19 +++++++++++++++++++ man/checkWithinRowDuplicates.Rd | 11 +++++++++++ man/findFounder.Rd | 2 +- man/summarizeFounder.Rd | 17 +++++++++++++++++ man/summarizePedigrees.Rd | 2 +- 7 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 man/addPhantoms.Rd create mode 100644 man/checkIDuniqueness.Rd create mode 100644 man/checkWithinRowDuplicates.Rd create mode 100644 man/summarizeFounder.Rd diff --git a/NEWS.md b/NEWS.md index c5392a9d..31ad40c0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,9 +2,10 @@ * revived checkParents function to check for handling phantom parents and missing parents * added tests for checkParents function * added GoT analysis -* reduced complexity of com2links and summarizePedigree with the use of subfunctions +* reduced complexity of com2links, summarizePedigree, and checkIDs with the use of subfunctions * allow verbose argument to be passed to standardizeColnames + # BGmisc 1.3.5.1 * Setting the default for the `sparse` argument in `ped2com()` to TRUE diff --git a/man/addPhantoms.Rd b/man/addPhantoms.Rd new file mode 100644 index 00000000..2981b01e --- /dev/null +++ b/man/addPhantoms.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checkParents.R +\name{addPhantoms} +\alias{addPhantoms} +\title{Add Phantom Parents} +\usage{ +addPhantoms(ped, verbose, pid, validation_results) +} +\arguments{ +\item{ped}{A dataframe representing the pedigree data with columns 'ID', 'dadID', and 'momID'.} + +\item{verbose}{A logical flag indicating whether to print progress and validation messages to the console.} +} +\description{ +This function adds phantom parents to a pedigree. +} diff --git a/man/checkIDuniqueness.Rd b/man/checkIDuniqueness.Rd new file mode 100644 index 00000000..88b4dba3 --- /dev/null +++ b/man/checkIDuniqueness.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checkIDs.R +\name{checkIDuniqueness} +\alias{checkIDuniqueness} +\title{Check for duplicated individual IDs} +\usage{ +checkIDuniqueness(ped, verbose = FALSE) +} +\arguments{ +\item{ped}{A pedigree object} + +\item{verbose}{A logical indicating whether to print progress messages} +} +\value{ +A list containing the results of the check +} +\description{ +This function checks for duplicated individual IDs in a pedigree. +} diff --git a/man/checkWithinRowDuplicates.Rd b/man/checkWithinRowDuplicates.Rd new file mode 100644 index 00000000..e0ddb318 --- /dev/null +++ b/man/checkWithinRowDuplicates.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checkIDs.R +\name{checkWithinRowDuplicates} +\alias{checkWithinRowDuplicates} +\title{Check for within-row duplicates (self-parents, same mom/dad)} +\usage{ +checkWithinRowDuplicates(ped, verbose = FALSE) +} +\description{ +Check for within-row duplicates (self-parents, same mom/dad) +} diff --git a/man/findFounder.Rd b/man/findFounder.Rd index a4cf70bb..19bfcb1f 100644 --- a/man/findFounder.Rd +++ b/man/findFounder.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/summarizePedigree.R \name{findFounder} \alias{findFounder} -\title{This function finds the originating member for each line in a pedigree. It is supposed to be used internally by the \code{summarize_pedigree} function.} +\title{Function to find the originating member for each line} \usage{ findFounder(data, group_var, sort_var) } diff --git a/man/summarizeFounder.Rd b/man/summarizeFounder.Rd new file mode 100644 index 00000000..703834e3 --- /dev/null +++ b/man/summarizeFounder.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summarizePedigree.R +\name{summarizeFounder} +\alias{summarizeFounder} +\title{Function to summarize the originating members for each line} +\usage{ +summarizeFounder(ped_dt, group_var, sort_var, foo_summary_dt, verbose) +} +\arguments{ +\item{sort_var}{A character string specifying the column name to sort by.} + +\item{verbose}{Logical, if TRUE, print progress messages.} +} +\description{ +This function summarizes the originating members for each line in a pedigree. It is supposed to be used internally by the \code{summarize_pedigree} function. +} +\keyword{internal} diff --git a/man/summarizePedigrees.Rd b/man/summarizePedigrees.Rd index b298d326..3ed2f0a5 100644 --- a/man/summarizePedigrees.Rd +++ b/man/summarizePedigrees.Rd @@ -17,7 +17,7 @@ summarizePedigrees( include_founder = FALSE, founder_sort_var = NULL, nbiggest = 5, - noldest = 5, + noldest = nbiggest, skip_var = NULL, five_num_summary = FALSE, network_checks = FALSE, From 6019916322c00d8227bfbdffada562fe09cadea7 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Wed, 9 Apr 2025 14:37:32 -0400 Subject: [PATCH 05/35] documentation --- NEWS.md | 2 +- R/checkIDs.R | 13 +++++-- R/checkParents.R | 3 +- R/computeRelatedness.R | 16 ++++++++ R/helpGeneric.R | 68 +-------------------------------- R/simulatePedigree.R | 8 ++++ man/SimPed.Rd | 33 ---------------- man/addPhantoms.Rd | 4 +- man/calculateRelatedness.Rd | 3 ++ man/checkIDuniqueness.Rd | 4 +- man/checkWithinRowDuplicates.Rd | 10 ++++- man/inferRelatedness.Rd | 5 +++ man/related_coef.Rd | 33 ---------------- man/relatedness.Rd | 33 ---------------- man/repairIDs.Rd | 4 +- man/simulatePedigree.Rd | 5 +++ 16 files changed, 67 insertions(+), 177 deletions(-) delete mode 100644 man/SimPed.Rd delete mode 100644 man/related_coef.Rd delete mode 100644 man/relatedness.Rd diff --git a/NEWS.md b/NEWS.md index 31ad40c0..94602bd7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,7 @@ * added GoT analysis * reduced complexity of com2links, summarizePedigree, and checkIDs with the use of subfunctions * allow verbose argument to be passed to standardizeColnames - +* list SimPed and related_coef as aliases for functions # BGmisc 1.3.5.1 * Setting the default for the `sparse` argument in `ped2com()` to TRUE diff --git a/R/checkIDs.R b/R/checkIDs.R index c92653ed..4c1100fd 100644 --- a/R/checkIDs.R +++ b/R/checkIDs.R @@ -87,8 +87,7 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) { #' Repair Missing IDs #' #' This function repairs missing IDs in a pedigree. -#' @param ped A pedigree object -#' @param verbose A logical indicating whether to print progress messages +#' @inheritParams checkIDs #' @return A corrected pedigree repairIDs <- function(ped, verbose = FALSE) { checkIDs(ped = ped, verbose = verbose, repair = TRUE) @@ -98,8 +97,7 @@ repairIDs <- function(ped, verbose = FALSE) { #' #' This function checks for duplicated individual IDs in a pedigree. #' -#' @param ped A pedigree object -#' @param verbose A logical indicating whether to print progress messages +#' @inheritParams checkIDs #' @return A list containing the results of the check #' checkIDuniqueness <- function(ped, verbose = FALSE) { @@ -124,7 +122,14 @@ checkIDuniqueness <- function(ped, verbose = FALSE) { } + #' Check for within-row duplicates (self-parents, same mom/dad) +#' +#' This function checks for within-row duplicates in a pedigree. +#' +#' @inheritParams checkIDs +#' @return A list containing the results of the check +#' checkWithinRowDuplicates <- function(ped, verbose = FALSE) { # is the individual their own father or mother? is_own_father <- ped$ID[ped$ID == ped$dadID & !is.na(ped$dadID)] diff --git a/R/checkParents.R b/R/checkParents.R index c37a08e9..3c680e77 100644 --- a/R/checkParents.R +++ b/R/checkParents.R @@ -305,8 +305,9 @@ repairParentIDs <- function(ped, verbose = FALSE) { #' #' This function adds phantom parents to a pedigree. #' @inheritParams checkParentIDs +#' @param validation_results validation results -addPhantoms <- function(ped, verbose, pid, validation_results) { +addPhantoms <- function(ped, verbose, validation_results) { # Add parents who appear in momID or dadID but are missing from ID new_entries <- data.frame() diff --git a/R/computeRelatedness.R b/R/computeRelatedness.R index e3f53b8b..3fa91ffd 100644 --- a/R/computeRelatedness.R +++ b/R/computeRelatedness.R @@ -64,6 +64,14 @@ calculateRelatedness <- function( return(coef) } + +#' @rdname calculateRelatedness +#' @export +related_coef <- function(...) { + warning("The 'related_coef' function is deprecated. Please use 'calculateRelatedness' instead.") + calculateRelatedness(...) +} + #' Infer Relatedness Coefficient #' #' @description @@ -79,6 +87,7 @@ calculateRelatedness <- function( #' @param aceA Numeric. Proportion of variance attributable to additive genetic variance. Must be between 0 and 1. Default is 0.9. #' @param aceC Numeric. Proportion of variance attributable to shared environmental variance. Must be between 0 and 1. Default is 0. #' @param sharedC Numeric. Proportion of shared environment shared between the two individuals. Must be between 0 (no shared environment) and 1 (completely shared environment). Default is 0. +#' @param ... Further named arguments that may be passed to another function. #' #' @return #' Numeric. The calculated relatedness coefficient (`est_r`). @@ -97,6 +106,13 @@ inferRelatedness <- function(obsR, aceA = .9, aceC = 0, sharedC = 0) { return(calc_r) } +#' @rdname inferRelatedness +#' @export +relatedness <- function(...) { + warning("The 'relatedness' function is deprecated. Please use 'inferRelatedness' instead.") + inferRelatedness(...) +} + #' Falconer's Formula #' #' @description diff --git a/R/helpGeneric.R b/R/helpGeneric.R index e8ef381c..8b376bfb 100644 --- a/R/helpGeneric.R +++ b/R/helpGeneric.R @@ -99,71 +99,7 @@ resample <- function(x, ...) { } -#' SimPed (Deprecated) -#' -#' This function is a wrapper around the new `simulatePedigree` function. -#' `SimPed` has been deprecated, and it's advised to use `simulatePedigree` directly. -#' -#' @param ... Arguments to be passed to `simulatePedigree`. -#' @return The same result as calling `simulatePedigree`. -#' @seealso \code{\link{simulatePedigree}} for the updated function. -#' @description When calling this function, a warning will be issued about its deprecation. -#' @keywords deprecated -#' @examples -#' \dontrun{ -#' # This is an example of the deprecated function: -#' SimPed(...) -#' # It is recommended to use: -#' simulatePedigree(...) -#' } -#' @export -SimPed <- function(...) { # nolint: object_name_linter. - warning("The 'SimPed' function is deprecated. Please use 'simulatePedigree' instead.") - simulatePedigree(...) -} -#' related_coef (Deprecated) -#' -#' This function is a wrapper around the new `calculateRelatedness` function. -#' `related_coef` has been deprecated, and it's advised to use `calculateRelatedness` directly. -#' -#' @param ... Arguments to be passed to `calculateRelatedness`. -#' @return The same result as calling `calculateRelatedness`. -#' @seealso \code{\link{calculateRelatedness}} for the updated function. -#' @description When calling this function, a warning will be issued about its deprecation. -#' @keywords deprecated -#' @examples -#' \dontrun{ -#' # This is an example of the deprecated function: -#' related_coef(...) -#' # It is recommended to use: -#' calculateRelatedness(...) -#' } -#' @export -related_coef <- function(...) { - warning("The 'related_coef' function is deprecated. Please use 'calculateRelatedness' instead.") - calculateRelatedness(...) -} -#' relatedness (Deprecated) -#' -#' This function is a wrapper around the new `inferRelatedness` function. -#' `relatedness` has been deprecated, and it's advised to use `inferRelatedness` directly. -#' -#' @param ... Arguments to be passed to `inferRelatedness`. -#' @return The same result as calling `inferRelatedness`. -#' @seealso \code{\link{inferRelatedness}} for the updated function. -#' @description When calling this function, a warning will be issued about its deprecation. -#' @keywords deprecated -#' @examples -#' \dontrun{ -#' # This is an example of the deprecated function: -#' relatedness(...) -#' # It is recommended to use: -#' inferRelatedness(...) -#' } -#' @export -relatedness <- function(...) { - warning("The 'relatedness' function is deprecated. Please use 'inferRelatedness' instead.") - inferRelatedness(...) -} + + diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R index d50b7732..29407af8 100644 --- a/R/simulatePedigree.R +++ b/R/simulatePedigree.R @@ -343,6 +343,7 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR, #' @param balancedSex Not fully developed yet. Always \code{TRUE} in the current version. #' @param balancedMar Not fully developed yet. Always \code{TRUE} in the current version. #' @param verbose logical If TRUE, print progress through stages of algorithm +#' @param ... Additional arguments to be passed to other functions. #' @return A \code{data.frame} with each row representing a simulated individual. The columns are as follows: #' \itemize{ @@ -408,3 +409,10 @@ simulatePedigree <- function(kpc = 3, # print(df_Fam) return(df_Fam) } + +#' @rdname simulatePedigree +#' @export +SimPed <- function(...) { # nolint: object_name_linter. + warning("The 'SimPed' function is deprecated. Please use 'simulatePedigree' instead.") + simulatePedigree(...) +} diff --git a/man/SimPed.Rd b/man/SimPed.Rd deleted file mode 100644 index 79dd1f96..00000000 --- a/man/SimPed.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helpGeneric.R -\name{SimPed} -\alias{SimPed} -\title{SimPed (Deprecated)} -\usage{ -SimPed(...) -} -\arguments{ -\item{...}{Arguments to be passed to `simulatePedigree`.} -} -\value{ -The same result as calling `simulatePedigree`. -} -\description{ -When calling this function, a warning will be issued about its deprecation. -} -\details{ -This function is a wrapper around the new `simulatePedigree` function. -`SimPed` has been deprecated, and it's advised to use `simulatePedigree` directly. -} -\examples{ -\dontrun{ -# This is an example of the deprecated function: -SimPed(...) -# It is recommended to use: -simulatePedigree(...) -} -} -\seealso{ -\code{\link{simulatePedigree}} for the updated function. -} -\keyword{deprecated} diff --git a/man/addPhantoms.Rd b/man/addPhantoms.Rd index 2981b01e..bafe5b75 100644 --- a/man/addPhantoms.Rd +++ b/man/addPhantoms.Rd @@ -4,12 +4,14 @@ \alias{addPhantoms} \title{Add Phantom Parents} \usage{ -addPhantoms(ped, verbose, pid, validation_results) +addPhantoms(ped, verbose, validation_results) } \arguments{ \item{ped}{A dataframe representing the pedigree data with columns 'ID', 'dadID', and 'momID'.} \item{verbose}{A logical flag indicating whether to print progress and validation messages to the console.} + +\item{validation_results}{validation results} } \description{ This function adds phantom parents to a pedigree. diff --git a/man/calculateRelatedness.Rd b/man/calculateRelatedness.Rd index b9a8f067..6d932e63 100644 --- a/man/calculateRelatedness.Rd +++ b/man/calculateRelatedness.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/computeRelatedness.R \name{calculateRelatedness} \alias{calculateRelatedness} +\alias{related_coef} \title{Calculate Relatedness Coefficient} \usage{ calculateRelatedness( @@ -18,6 +19,8 @@ calculateRelatedness( denom_m = FALSE, ... ) + +related_coef(...) } \arguments{ \item{generations}{Number of generations back of common ancestors the pair share.} diff --git a/man/checkIDuniqueness.Rd b/man/checkIDuniqueness.Rd index 88b4dba3..6c1aebf6 100644 --- a/man/checkIDuniqueness.Rd +++ b/man/checkIDuniqueness.Rd @@ -7,9 +7,9 @@ checkIDuniqueness(ped, verbose = FALSE) } \arguments{ -\item{ped}{A pedigree object} +\item{ped}{A dataframe representing the pedigree data with columns `ID`, `dadID`, and `momID`.} -\item{verbose}{A logical indicating whether to print progress messages} +\item{verbose}{A logical flag indicating whether to print progress and validation messages to the console.} } \value{ A list containing the results of the check diff --git a/man/checkWithinRowDuplicates.Rd b/man/checkWithinRowDuplicates.Rd index e0ddb318..4797306c 100644 --- a/man/checkWithinRowDuplicates.Rd +++ b/man/checkWithinRowDuplicates.Rd @@ -6,6 +6,14 @@ \usage{ checkWithinRowDuplicates(ped, verbose = FALSE) } +\arguments{ +\item{ped}{A dataframe representing the pedigree data with columns `ID`, `dadID`, and `momID`.} + +\item{verbose}{A logical flag indicating whether to print progress and validation messages to the console.} +} +\value{ +A list containing the results of the check +} \description{ -Check for within-row duplicates (self-parents, same mom/dad) +This function checks for within-row duplicates in a pedigree. } diff --git a/man/inferRelatedness.Rd b/man/inferRelatedness.Rd index dba7a0ed..1dd8b17c 100644 --- a/man/inferRelatedness.Rd +++ b/man/inferRelatedness.Rd @@ -2,9 +2,12 @@ % Please edit documentation in R/computeRelatedness.R \name{inferRelatedness} \alias{inferRelatedness} +\alias{relatedness} \title{Infer Relatedness Coefficient} \usage{ inferRelatedness(obsR, aceA = 0.9, aceC = 0, sharedC = 0) + +relatedness(...) } \arguments{ \item{obsR}{Numeric. Observed correlation between the two groups. Must be between -1 and 1.} @@ -14,6 +17,8 @@ inferRelatedness(obsR, aceA = 0.9, aceC = 0, sharedC = 0) \item{aceC}{Numeric. Proportion of variance attributable to shared environmental variance. Must be between 0 and 1. Default is 0.} \item{sharedC}{Numeric. Proportion of shared environment shared between the two individuals. Must be between 0 (no shared environment) and 1 (completely shared environment). Default is 0.} + +\item{...}{Further named arguments that may be passed to another function.} } \value{ Numeric. The calculated relatedness coefficient (`est_r`). diff --git a/man/related_coef.Rd b/man/related_coef.Rd deleted file mode 100644 index 11112982..00000000 --- a/man/related_coef.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helpGeneric.R -\name{related_coef} -\alias{related_coef} -\title{related_coef (Deprecated)} -\usage{ -related_coef(...) -} -\arguments{ -\item{...}{Arguments to be passed to `calculateRelatedness`.} -} -\value{ -The same result as calling `calculateRelatedness`. -} -\description{ -When calling this function, a warning will be issued about its deprecation. -} -\details{ -This function is a wrapper around the new `calculateRelatedness` function. -`related_coef` has been deprecated, and it's advised to use `calculateRelatedness` directly. -} -\examples{ -\dontrun{ -# This is an example of the deprecated function: -related_coef(...) -# It is recommended to use: -calculateRelatedness(...) -} -} -\seealso{ -\code{\link{calculateRelatedness}} for the updated function. -} -\keyword{deprecated} diff --git a/man/relatedness.Rd b/man/relatedness.Rd deleted file mode 100644 index f84740df..00000000 --- a/man/relatedness.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helpGeneric.R -\name{relatedness} -\alias{relatedness} -\title{relatedness (Deprecated)} -\usage{ -relatedness(...) -} -\arguments{ -\item{...}{Arguments to be passed to `inferRelatedness`.} -} -\value{ -The same result as calling `inferRelatedness`. -} -\description{ -When calling this function, a warning will be issued about its deprecation. -} -\details{ -This function is a wrapper around the new `inferRelatedness` function. -`relatedness` has been deprecated, and it's advised to use `inferRelatedness` directly. -} -\examples{ -\dontrun{ -# This is an example of the deprecated function: -relatedness(...) -# It is recommended to use: -inferRelatedness(...) -} -} -\seealso{ -\code{\link{inferRelatedness}} for the updated function. -} -\keyword{deprecated} diff --git a/man/repairIDs.Rd b/man/repairIDs.Rd index 2ce49f7a..cc37d78f 100644 --- a/man/repairIDs.Rd +++ b/man/repairIDs.Rd @@ -7,9 +7,9 @@ repairIDs(ped, verbose = FALSE) } \arguments{ -\item{ped}{A pedigree object} +\item{ped}{A dataframe representing the pedigree data with columns `ID`, `dadID`, and `momID`.} -\item{verbose}{A logical indicating whether to print progress messages} +\item{verbose}{A logical flag indicating whether to print progress and validation messages to the console.} } \value{ A corrected pedigree diff --git a/man/simulatePedigree.Rd b/man/simulatePedigree.Rd index a2e07b23..b7c28650 100644 --- a/man/simulatePedigree.Rd +++ b/man/simulatePedigree.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/simulatePedigree.R \name{simulatePedigree} \alias{simulatePedigree} +\alias{SimPed} \title{Simulate Pedigrees This function simulates "balanced" pedigrees based on a group of parameters: 1) k - Kids per couple; @@ -19,6 +20,8 @@ simulatePedigree( balancedMar = TRUE, verbose = FALSE ) + +SimPed(...) } \arguments{ \item{kpc}{Number of kids per couple. An integer >= 2 that determines how many kids each fertilized mated couple will have in the pedigree. Default value is 3. Returns an error when kpc equals 1.} @@ -36,6 +39,8 @@ simulatePedigree( \item{balancedMar}{Not fully developed yet. Always \code{TRUE} in the current version.} \item{verbose}{logical If TRUE, print progress through stages of algorithm} + +\item{...}{Additional arguments to be passed to other functions.} } \value{ A \code{data.frame} with each row representing a simulated individual. The columns are as follows: From 7f249101de4e10259cf8bd28abbc1ede8e2def2c Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Wed, 9 Apr 2025 23:04:00 -0400 Subject: [PATCH 06/35] Updatedocs --- .gitignore | 1 + R/checkIDs.R | 5 +- R/checkParents.R | 48 +- R/helpGeneric.R | 6 - R/makeLinks.R | 499 ++++++++++++++++++- benchmark_results.csv | 4 + data-raw/benchmark.R | 81 ++- man/{addPhantoms.Rd => addRowlessParents.Rd} | 8 +- tests/testthat/test-checkParents.R | 2 +- tests/testthat/test-makeLinks.R | 90 ++++ 10 files changed, 688 insertions(+), 56 deletions(-) create mode 100644 benchmark_results.csv rename man/{addPhantoms.Rd => addRowlessParents.Rd} (75%) diff --git a/.gitignore b/.gitignore index 8faabc57..dc07da73 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ ASOIAF.ged .vscode/launch.json dataRelatedPairs_new2.csv data-raw/ASOIAF_040725.ged +dataRelatedPairs.csv diff --git a/R/checkIDs.R b/R/checkIDs.R index 4c1100fd..b687f4c7 100644 --- a/R/checkIDs.R +++ b/R/checkIDs.R @@ -28,12 +28,12 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) { } # Identify non-unique IDs - id_check <- checkIDuniqueness(ped=ped, verbose=verbose) + id_check <- checkIDuniqueness(ped = ped, verbose = verbose) if (verbose) { cat("Step 2: Checking for within row duplicats...\n") } - row_check <- checkWithinRowDuplicates(ped=ped, verbose = verbose) + row_check <- checkWithinRowDuplicates(ped = ped, verbose = verbose) validation_results <- c(id_check, row_check) @@ -166,4 +166,3 @@ checkWithinRowDuplicates <- function(ped, verbose = FALSE) { duplicated_parents_ids = if (length(duplicated_parents) > 0) unique(duplicated_parents) else NULL ) } - diff --git a/R/checkParents.R b/R/checkParents.R index 3c680e77..b80669bf 100644 --- a/R/checkParents.R +++ b/R/checkParents.R @@ -250,39 +250,10 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, cat("Added phantom moms for:", paste(changes$phantom_moms_added, collapse = ", "), "\n") } } - # add phantom parents + # add parents who appear in momID or dadID but are missing from ID if (parentswithoutrow) { # Add parents who appear in momID or dadID but are missing from ID - listed_parents <- unique(c(ped$momID, ped$dadID)) - listed_parents <- listed_parents[!is.na(listed_parents)] - - existing_ids <- ped$ID - missing_parents <- setdiff(listed_parents, existing_ids) - - if (length(missing_parents) > 0) { - if (verbose) { - cat("Adding parents who were listed in momID/dadID but missing from ID:\n") - print(missing_parents) - } - - for (pid in missing_parents) { - role <- unique( - c( - if (pid %in% ped$momID) "mom" else NULL, - if (pid %in% ped$dadID) "dad" else NULL - ) - ) - inferred_sex <- if ("mom" %in% role) validation_results$female_var else validation_results$male_var - - new_row <- ped[1, ] - new_row$ID <- pid - new_row$dadID <- NA - new_row$momID <- NA - new_row$sex <- inferred_sex - new_entries <- rbind(new_entries, new_row) - } - } - ped <- merge(ped, new_entries, all = TRUE) + ped <- addRowlessParents(ped = ped, verbose = verbose, validation_results = validation_results) } if (verbose) { @@ -301,13 +272,13 @@ repairParentIDs <- function(ped, verbose = FALSE) { checkParentIDs(ped = ped, verbose = verbose, repair = TRUE) } -#' Add Phantom Parents +#' Add addRowlessParents #' -#' This function adds phantom parents to a pedigree. +#' This function adds parents who appear in momID or dadID but are missing from ID #' @inheritParams checkParentIDs #' @param validation_results validation results -addPhantoms <- function(ped, verbose, validation_results) { +addRowlessParents <- function(ped, verbose, validation_results) { # Add parents who appear in momID or dadID but are missing from ID new_entries <- data.frame() @@ -339,10 +310,11 @@ addPhantoms <- function(ped, verbose, validation_results) { new_row$sex <- inferred_sex new_entries <- rbind(new_entries, new_row) } - } - ped <- merge(ped, new_entries, all = TRUE) - if (verbose) { - cat("Added phantom parents for:", paste(new_entries$ID, collapse = ", "), "\n") + + ped <- merge(ped, new_entries, all = TRUE) + if (verbose) { + cat("Added phantom parents for:", paste(new_entries$ID, collapse = ", "), "\n") + } } return(ped) } diff --git a/R/helpGeneric.R b/R/helpGeneric.R index 8b376bfb..8dae2b2c 100644 --- a/R/helpGeneric.R +++ b/R/helpGeneric.R @@ -97,9 +97,3 @@ resample <- function(x, ...) { } x[sample.int(length(x), ...)] } - - - - - - diff --git a/R/makeLinks.R b/R/makeLinks.R index d2dc6a65..930d3d2b 100644 --- a/R/makeLinks.R +++ b/R/makeLinks.R @@ -78,6 +78,9 @@ com2links <- function( # Extract individual IDs from the first available matrix. ids <- NULL + + + if (!is.null(cn_ped_matrix)) { ids <- as.numeric(dimnames(cn_ped_matrix)[[1]]) nc <- ncol(cn_ped_matrix) @@ -93,6 +96,15 @@ com2links <- function( stop("Could not extract IDs from the provided matrices.") } + + # Construct case identifier + matrix_case <- paste(sort(c( + if (!is.null(ad_ped_matrix)) "ad" else NULL, + if (!is.null(mit_ped_matrix)) "mt" else NULL, + if (!is.null(cn_ped_matrix)) "cn" else NULL + )), collapse = "-") + + # Count how many matrices are provided. sum_nulls <- sum(!is.null(ad_ped_matrix), !is.null(mit_ped_matrix), @@ -100,7 +112,7 @@ com2links <- function( na.rm = TRUE ) if (verbose) { - print(sum_nulls) + print(matrix_case) } # Extract the internal pointers (p, i, and x slots) for each provided matrix. @@ -633,6 +645,491 @@ com2links.legacy <- function( return(NULL) } +com2links.beta <- function( + rel_pairs_file = "dataRelatedPairs.csv", + ad_ped_matrix = NULL, + mit_ped_matrix = mt_ped_matrix, + mt_ped_matrix = NULL, + cn_ped_matrix = NULL, + # pat_ped_matrix = NULL, + # mat_ped_matrix = NULL, + # mapa_id_file = "data_mapaID.csv", + write_buffer_size = 1000, + update_rate = 1000, + gc = TRUE, + writetodisk = TRUE, + verbose = FALSE, + legacy = FALSE, + outcome_name = "data", + drop_upper_triangular = TRUE, + ...) { + # --- Input Validations and Preprocessing --- + + # Ensure that at least one relationship matrix is provided. + if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) { + stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.") + } + # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided. + if (!is.null(ad_ped_matrix)) { + ad_ped_matrix <- validate_and_convert_matrix( + mat = ad_ped_matrix, + name = "ad_ped_matrix" + ) + } + + # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided. + if (!is.null(cn_ped_matrix)) { + cn_ped_matrix <- validate_and_convert_matrix( + mat = cn_ped_matrix, + name = "cn_ped_matrix", + ensure_symmetric = TRUE + ) + } + + # Validate and process mit_ped_matrix: convert and ensure binary values. + if (!is.null(mit_ped_matrix)) { + mit_ped_matrix <- validate_and_convert_matrix( + mat = mit_ped_matrix, + name = "mit_ped_matrix", force_binary = TRUE, + ensure_symmetric = TRUE + ) + } + + # --- Build IDs and Prepare Matrix Pointers --- + + # Extract individual IDs from the first available matrix. + ids <- NULL + + + + if (!is.null(cn_ped_matrix)) { + ids <- as.numeric(dimnames(cn_ped_matrix)[[1]]) + nc <- ncol(cn_ped_matrix) + } else if (!is.null(ad_ped_matrix)) { + ids <- as.numeric(dimnames(ad_ped_matrix)[[1]]) + nc <- ncol(ad_ped_matrix) + } else if (!is.null(mit_ped_matrix)) { + ids <- as.numeric(dimnames(mit_ped_matrix)[[1]]) + nc <- ncol(mit_ped_matrix) + } + + if (is.null(ids)) { + stop("Could not extract IDs from the provided matrices.") + } + + # --- matrix_case construction and switch dispatch --- + matrix_case <- paste(sort(c( + if (!is.null(ad_ped_matrix)) "ad" else NULL, + if (!is.null(mit_ped_matrix)) "mt" else NULL, + if (!is.null(cn_ped_matrix)) "cn" else NULL + )), collapse = "-") + + if (verbose) { + print(matrix_case) + } + + switch(matrix_case, + "ad" = process_one( + matrix = ad_ped_matrix, + rel_name = "addRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "mt" = process_one( + matrix = mit_ped_matrix, + rel_name = "mitRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "cn" = process_one( + matrix = cn_ped_matrix, + rel_name = "cnuRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "ad-mt" = process_two( + matrix1 = ad_ped_matrix, + name1 = "addRel", + matrix2 = mit_ped_matrix, + name2 = "mitRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "ad-cn" = process_two( + matrix1 = ad_ped_matrix, + name1 = "addRel", + matrix2 = cn_ped_matrix, + name2 = "cnuRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "cn-mt" = process_two( + matrix1 = cn_ped_matrix, + name1 = "cnuRel", + matrix2 = mit_ped_matrix, + name2 = "mitRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "ad-cn-mt" = process_all_three( + mat1 = ad_ped_matrix, + name1 = "addRel", + mat2 = mit_ped_matrix, + name2 = "mitRel", + mat3 = cn_ped_matrix, + name3 = "cnuRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + stop("Unsupported matrix combination") + ) +} + +process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, write_buffer_size, drop_upper_triangular, update_rate, verbose, gc, ...) { + # Extract pointers and indices from the matrix. + newColPos <- matrix@p + 1L + iss <- matrix@i + 1L + x <- matrix@x + + # Initialize the related pairs file with headers. + df_relpairs <- initialize_empty_df(relNames = rel_name) + + if (writetodisk == TRUE) { + utils::write.table( + df_relpairs, + file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE + ) + + # Prepare an empty buffer for batching writes. + write_buffer <- list() + remove(df_relpairs) + } + + # Process each column in the matrix. + for (j in 1L:nc) { + ID2 <- ids[j] + + # Extract column indices + ncp <- newColPos[j] + ncpp <- newColPos[j + 1L] + cond <- ncp < ncpp + if (cond) { + vv <- ncp:(ncpp - 1L) + issvv <- iss[vv] + } + + # Create a unique set of row indices. + u <- sort(issvv) + + # If any relationships exist for this individual, build the related pairs. + if (cond) { + ID1 <- ids[u] + tds <- data.frame(ID1 = ID1, ID2 = ID2) + tds[[rel_name]] <- 0 + + if (cond) { + tds[u %in% issvv, rel_name] <- x[vv] + } + if (drop_upper_triangular == TRUE) { + tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle + } + + # Write the batch to disk or accumulate in the data frame. + if (nrow(tds) > 0) { + if (writetodisk == TRUE) { + write_buffer[[length(write_buffer) + 1]] <- tds + + if (length(write_buffer) >= write_buffer_size) { # Write in batches + utils::write.table(do.call(rbind, write_buffer), + file = rel_pairs_file, + row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," + ) + write_buffer <- list() + } + } else { + df_relpairs <- rbind(df_relpairs, tds) + } + } + } + if (verbose && (j %% update_rate == 0L)) { + cat("Done with", j, "of", nc, "\n") + } + } + # If not writing to disk, return the accumulated data frame. + if (writetodisk == FALSE) { + return(df_relpairs) + } else { + # Write any remaining buffered rows. + if (length(write_buffer) > 0) { + utils::write.table(do.call(rbind, write_buffer), + file = rel_pairs_file, + row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," + ) + } + } + if (gc == TRUE) { + remove(newColPos, iss, x) + } +} + +process_all_three <- function( + mat1, name1, + mat2, name2, + mat3, name3, + ids, nc, + rel_pairs_file, + writetodisk, + write_buffer_size, + drop_upper_triangular, + update_rate, + verbose, + gc, + ...) { + # Extract matrix slots + p1 <- mat1@p + 1L + i1 <- mat1@i + 1L + x1 <- mat1@x + p2 <- mat2@p + 1L + i2 <- mat2@i + 1L + x2 <- mat2@x + p3 <- mat3@p + 1L + i3 <- mat3@i + 1L + x3 <- mat3@x + + relNames <- c(name1, name2, name3) + df_relpairs <- initialize_empty_df(relNames) + + if (writetodisk) { + utils::write.table(df_relpairs, file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE) + write_buffer <- list() + rm(df_relpairs) + } + + for (j in seq_len(nc)) { + ID2 <- ids[j] + + # Get index spans + v1 <- if (p1[j] < p1[j + 1L]) { + idx <- p1[j]:(p1[j + 1L] - 1L) + list(i = i1[idx], x = x1[idx]) + } else { + NULL + } + v2 <- if (p2[j] < p2[j + 1L]) { + idx <- p2[j]:(p2[j + 1L] - 1L) + list(i = i2[idx], x = x2[idx]) + } else { + NULL + } + v3 <- if (p3[j] < p3[j + 1L]) { + idx <- p3[j]:(p3[j + 1L] - 1L) + list(i = i3[idx], x = x3[idx]) + } else { + NULL + } + + # Union of index positions + u <- sort(unique(c( + if (!is.null(v1)) v1$i else NULL, + if (!is.null(v2)) v2$i else NULL, + if (!is.null(v3)) v3$i else NULL + ))) + if (length(u) > 0) { + ID1 <- ids[u] + tds <- data.frame(ID1 = ID1, ID2 = ID2) + tds[[name1]] <- if (!is.null(v1)) ifelse(u %in% v1$i, v1$x[match(u, v1$i)], 0) else 0 + tds[[name2]] <- if (!is.null(v2)) ifelse(u %in% v2$i, v2$x[match(u, v2$i)], 0) else 0 + tds[[name3]] <- if (!is.null(v3)) ifelse(u %in% v3$i, v3$x[match(u, v3$i)], 0) else 0 + + if (drop_upper_triangular) { + tds <- tds[tds$ID1 <= tds$ID2, ] + } + + if (nrow(tds) > 0) { + if (writetodisk) { + write_buffer[[length(write_buffer) + 1L]] <- tds + if (length(write_buffer) >= write_buffer_size) { + utils::write.table(do.call(rbind, write_buffer), + file = rel_pairs_file, row.names = FALSE, + col.names = FALSE, append = TRUE, sep = "," + ) + write_buffer <- list() + } + } else { + df_relpairs <- rbind(df_relpairs, tds) + } + } + } + + if (verbose && (j %% update_rate == 0L)) { + cat("Done with", j, "of", nc, "\n") + } + } + + if (!writetodisk) { + return(df_relpairs) + } else if (length(write_buffer) > 0) { + utils::write.table(do.call(rbind, write_buffer), + file = rel_pairs_file, row.names = FALSE, + col.names = FALSE, append = TRUE, sep = "," + ) + } + + invisible(NULL) +} + +process_two <- function( + matrix1, name1, + matrix2, name2, + ids, nc, + rel_pairs_file, + writetodisk, + write_buffer_size, + drop_upper_triangular, + update_rate, + verbose, + gc, + ...) { + # Extract internal slots + p1 <- matrix1@p + 1L + i1 <- matrix1@i + 1L + x1 <- matrix1@x + p2 <- matrix2@p + 1L + i2 <- matrix2@i + 1L + x2 <- matrix2@x + + relNames <- c(name1, name2) + df_relpairs <- initialize_empty_df(relNames) + + if (writetodisk) { + utils::write.table(df_relpairs, file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE) + write_buffer <- list() + rm(df_relpairs) + } + + for (j in seq_len(nc)) { + ID2 <- ids[j] + + # Get index/value slices + v1 <- if (p1[j] < p1[j + 1L]) { + idx <- p1[j]:(p1[j + 1L] - 1L) + list(i = i1[idx], x = x1[idx]) + } else { + NULL + } + v2 <- if (p2[j] < p2[j + 1L]) { + idx <- p2[j]:(p2[j + 1L] - 1L) + list(i = i2[idx], x = x2[idx]) + } else { + NULL + } + + # Union of indices from both matrices + u <- sort(unique(c( + if (!is.null(v1)) v1$i else NULL, + if (!is.null(v2)) v2$i else NULL + ))) + + if (length(u) > 0) { + ID1 <- ids[u] + tds <- data.frame(ID1 = ID1, ID2 = ID2) + tds[[name1]] <- if (!is.null(v1)) ifelse(u %in% v1$i, v1$x[match(u, v1$i)], 0) else 0 + tds[[name2]] <- if (!is.null(v2)) ifelse(u %in% v2$i, v2$x[match(u, v2$i)], 0) else 0 + + if (drop_upper_triangular) { + tds <- tds[tds$ID1 <= tds$ID2, ] + } + + if (nrow(tds) > 0) { + if (writetodisk) { + write_buffer[[length(write_buffer) + 1L]] <- tds + if (length(write_buffer) >= write_buffer_size) { + utils::write.table(do.call(rbind, write_buffer), + file = rel_pairs_file, row.names = FALSE, + col.names = FALSE, append = TRUE, sep = "," + ) + write_buffer <- list() + } + } else { + df_relpairs <- rbind(df_relpairs, tds) + } + } + } + + if (verbose && (j %% update_rate == 0L)) { + cat("Done with", j, "of", nc, "\n") + } + } + + if (!writetodisk) { + return(df_relpairs) + } else if (length(write_buffer) > 0) { + utils::write.table(do.call(rbind, write_buffer), + file = rel_pairs_file, row.names = FALSE, + col.names = FALSE, append = TRUE, sep = "," + ) + } + + invisible(NULL) +} + + #' @title validate_and_convert_matrix #' @description #' This function validates and converts a matrix to a specific format. diff --git a/benchmark_results.csv b/benchmark_results.csv new file mode 100644 index 00000000..efe84ed8 --- /dev/null +++ b/benchmark_results.csv @@ -0,0 +1,4 @@ +"expr","min","lq","mean","median","uq","max","neval" +"beta",430.4392,516.5138,574.575919,563.7353,623.24015,956.2494,100 +"regular",424.8536,507.47875,578.514,564.8453,631.97425,997.8213,100 +"legacy",580.1082,1392.24995,2086.481802,1783.02635,2686.8328,4768.9119,100 diff --git a/data-raw/benchmark.R b/data-raw/benchmark.R index fe480cdf..536190b1 100644 --- a/data-raw/benchmark.R +++ b/data-raw/benchmark.R @@ -2,7 +2,7 @@ library(microbenchmark) library(Matrix) # library(BGmisc) # data("hazard") - +library(tidyverse) # make big data set.seed(15) @@ -10,8 +10,34 @@ Ngen <- 5 kpc <- 5 sexR <- .50 marR <- .7 -ped <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) - +ped <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) %>% + mutate( + fam = "fam 1" + ) +set.seed(151) +Ngen <- 5 +marR <- .8 +ped2 <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) %>% + mutate( + fam = "fam 2", + ID = ID + 10000, + momID = momID + 10000, + dadID = dadID + 10000, + spID = spID + 10000 + ) +set.seed(1151) +kpc <- 8 +ped3 <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) %>% + mutate( + fam = "fam 3", + ID = ID + 20000, + momID = momID + 20000, + dadID = dadID + 20000, + spID = spID + 20000 + ) +ped <- rbind(ped, ped2) +ped <- rbind(ped, ped3) +if(FALSE){ # Define parameters component <- "additive" # Change this to test different components saveable <- FALSE # Disable saving to avoid disk I/O slowing down benchmarking @@ -52,9 +78,58 @@ benchmark_results <- microbenchmark( times = 100 # Run each method 100 times ) +summary(benchmark_results) + +lm(benchmark_results$time ~ benchmark_results$expr) %>% + summary() +# Print benchmark results +print(benchmark_results) + +# Optional: Save results to CSV for later analysis +write.csv(summary(benchmark_results), + "benchmark_results.csv", + row.names = FALSE +) # Print benchmark +} +verbose=FALSE +ad_ped_matrix <- ped2com(ped, component = "additive", adjacency_method = "direct", sparse = TRUE) +mit_ped_matrix <- ped2com(ped, component = "mitochondrial", adjacency_method = "direct", sparse = TRUE) +cn_ped_matrix <- ped2com(ped, component = "common nuclear", adjacency_method = "indexed", sparse = TRUE) +benchmark_results <- microbenchmark( + beta = { + com2links.beta( + ad_ped_matrix = ad_ped_matrix, + mit_ped_matrix = mit_ped_matrix, + cn_ped_matrix = cn_ped_matrix, + writetodisk = TRUE, + verbose = verbose +); file.remove("dataRelatedPairs.csv") + }, regular = { + com2links( + ad_ped_matrix = ad_ped_matrix, + mit_ped_matrix = mit_ped_matrix, + cn_ped_matrix = cn_ped_matrix, + writetodisk = TRUE, + verbose = verbose + ); file.remove("dataRelatedPairs.csv") + }, legacy = { + com2links( + ad_ped_matrix = ad_ped_matrix, + mit_ped_matrix = mit_ped_matrix, + cn_ped_matrix = cn_ped_matrix, + verbose = verbose, + legacy = TRUE + ); file.remove("dataRelatedPairs.csv") + }, + + times = 100 # Run each method 100 times +) +summary(benchmark_results) +lm(benchmark_results$time ~ benchmark_results$expr) %>% + summary() # Print benchmark results print(benchmark_results) diff --git a/man/addPhantoms.Rd b/man/addRowlessParents.Rd similarity index 75% rename from man/addPhantoms.Rd rename to man/addRowlessParents.Rd index bafe5b75..8161ee4e 100644 --- a/man/addPhantoms.Rd +++ b/man/addRowlessParents.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/checkParents.R -\name{addPhantoms} -\alias{addPhantoms} +\name{addRowlessParents} +\alias{addRowlessParents} \title{Add Phantom Parents} \usage{ -addPhantoms(ped, verbose, validation_results) +addRowlessParents(ped, verbose, validation_results) } \arguments{ \item{ped}{A dataframe representing the pedigree data with columns 'ID', 'dadID', and 'momID'.} @@ -14,5 +14,5 @@ addPhantoms(ped, verbose, validation_results) \item{validation_results}{validation results} } \description{ -This function adds phantom parents to a pedigree. +This function adds parents w } diff --git a/tests/testthat/test-checkParents.R b/tests/testthat/test-checkParents.R index 21660c4b..50916289 100644 --- a/tests/testthat/test-checkParents.R +++ b/tests/testthat/test-checkParents.R @@ -22,6 +22,6 @@ test_that("checksif single parents found correctly in ASOIAF dataset", { single_moms <- length(df_asoiaf$id[is.na(df_asoiaf$dadID) & !is.na(df_asoiaf$momID)]) expect_equal(single_moms, length(results$missing_fathers)) expect_equal(single_dads, length(results$missing_mothers)) - repaired_df <- checkParentIDs(df_asoiaf, verbose = FALSE, repair = TRUE) + repaired_df <- checkParentIDs(df_asoiaf, verbose = FALSE, repair = TRUE, parentswithoutrow = TRUE) expect_equal(nrow(repaired_df), nrow(df_asoiaf) + single_moms + single_dads) }) diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R index 475f7b6b..200b8934 100644 --- a/tests/testthat/test-makeLinks.R +++ b/tests/testthat/test-makeLinks.R @@ -108,6 +108,17 @@ test_that("com2links legacy works", { expect_true(all(c("ID1", "ID2", "addRel", "mitRel", "cnuRel") %in% colnames(written_data))) + + result_beta <- com2links.beta( + ad_ped_matrix = ad_ped_matrix, + mit_ped_matrix = mit_ped_matrix, cn_ped_matrix = cn_ped_matrix, + writetodisk = FALSE + ) + + expect_true(is.data.frame(result_beta)) + expect_true(all(c("ID1", "ID2", "addRel", "mitRel", "cnuRel") %in% colnames(result_beta))) + + result <- com2links( ad_ped_matrix = ad_ped_matrix, mit_ped_matrix = mit_ped_matrix, cn_ped_matrix = cn_ped_matrix, @@ -115,15 +126,94 @@ test_that("com2links legacy works", { ) expect_true(is.data.frame(result)) + expect_true(all(c("ID1", "ID2", "addRel", "mitRel", "cnuRel") %in% colnames(result))) # Drop row names to avoid mismatches in expect_equal rownames(result) <- NULL rownames(written_data) <- NULL + rownames(result_beta) <- NULL # Final comparison between written versions expect_equal(written_data, result) + expect_equal(result_beta, result) }) +test_that("com2links beta works", { + data(hazard) + ad_ped_matrix <- ped2com(hazard, component = "additive", adjacency_method = "direct", sparse = TRUE) + mit_ped_matrix <- ped2com(hazard, component = "mitochondrial", adjacency_method = "direct", sparse = TRUE) + cn_ped_matrix <- ped2com(hazard, component = "common nuclear", adjacency_method = "indexed", sparse = TRUE) + + # compare 2 + result_beta <- com2links.beta( + ad_ped_matrix = ad_ped_matrix, + mit_ped_matrix = mit_ped_matrix, + writetodisk = FALSE + ) + + expect_true(is.data.frame(result_beta)) + expect_true(all(c("ID1", "ID2", "addRel", "mitRel") %in% colnames(result_beta))) + + + result <- com2links( + ad_ped_matrix = ad_ped_matrix, + mit_ped_matrix = mit_ped_matrix, + writetodisk = FALSE + ) + + expect_true(is.data.frame(result)) + expect_true(all(c("ID1", "ID2", "addRel", "mitRel") %in% colnames(result))) + # Drop row names to avoid mismatches in expect_equal + rownames(result) <- NULL + rownames(result_beta) <- NULL + + # Final comparison between versions + expect_equal(result_beta, result) + + + # write to disk + result_disk <- com2links.beta( + ad_ped_matrix = ad_ped_matrix, + mit_ped_matrix = mit_ped_matrix, + writetodisk = TRUE + ) + expect_true(file.exists("dataRelatedPairs.csv")) + written_data <- read.csv("dataRelatedPairs.csv") + # remove the file + expect_true(file.remove("dataRelatedPairs.csv")) + + expect_true(all(c("ID1", "ID2", "addRel", "mitRel") %in% colnames(written_data))) + rownames(written_data) <- NULL + expect_equal(result_beta, written_data) + expect_equal(result, written_data) + # compare 1 + + result_beta <- com2links.beta( + mit_ped_matrix = mit_ped_matrix, + writetodisk = FALSE + ) + + expect_true(is.data.frame(result_beta)) + expect_true(all(c("ID1", "ID2", "mitRel") %in% colnames(result_beta))) + + + result <- com2links( + mit_ped_matrix = mit_ped_matrix, + writetodisk = FALSE + ) + + expect_true(is.data.frame(result)) + expect_true(all(c("ID1", "ID2", "mitRel") %in% colnames(result))) + # Drop row names to avoid mismatches in expect_equal + rownames(result) <- NULL + rownames(result_beta) <- NULL + + # Final comparison between versions + expect_equal(result_beta, result) +}) + + + test_that("com2links correctly handles missing matrices", { data(hazard) From 61fb620c45124b84a590f7fac0c8740a951106fa Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Thu, 10 Apr 2025 00:16:11 -0400 Subject: [PATCH 07/35] renaming beta --- R/makeLinks.R | 21 ++++++++++++----- benchmark_results.csv | 6 ++--- man/addRowlessParents.Rd | 4 ++-- man/com2links.Rd | 3 --- man/com2links.legacy.Rd | 27 +++++++++++++++++++--- man/com2links.og.Rd | 41 +++++++++++++++++++++++++++++++++ tests/testthat/test-makeLinks.R | 14 +++++------ 7 files changed, 92 insertions(+), 24 deletions(-) create mode 100644 man/com2links.og.Rd diff --git a/R/makeLinks.R b/R/makeLinks.R index 930d3d2b..7bbdb082 100644 --- a/R/makeLinks.R +++ b/R/makeLinks.R @@ -20,8 +20,9 @@ #' @param ... Additional arguments to be passed to \code{\link{com2links}} #' #' @return A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk. -#' @export -com2links <- function( +#' @keywords internal + +com2links.legacy <- function( rel_pairs_file = "dataRelatedPairs.csv", ad_ped_matrix = NULL, mit_ped_matrix = mt_ped_matrix, @@ -498,7 +499,7 @@ com2links <- function( } else if (legacy) { # --- Legacy Mode --- # In legacy mode, convert matrices to the expected symmetric formats. - com2links.legacy( + com2links.og( rel_pairs_file = rel_pairs_file, ad_ped_matrix = ad_ped_matrix, mit_ped_matrix = mit_ped_matrix, @@ -521,9 +522,11 @@ com2links <- function( #' Convert Pedigree Matrices to Related Pairs File (Legacy) #' @description #' This legacy function converts pedigree matrices into a related pairs file. -#' @inheritParams com2links +#' @inheritParams com2links.legacy +#' @keywords internal -com2links.legacy <- function( + +com2links.og <- function( rel_pairs_file = "dataRelatedPairs.csv", ad_ped_matrix = NULL, mit_ped_matrix = mt_ped_matrix, @@ -645,7 +648,13 @@ com2links.legacy <- function( return(NULL) } -com2links.beta <- function( +#' Convert Sparse Relationship Matrices to Kinship Links +#' @inheritParams com2links.legacy +#' @inherit com2links.legacy description +#' @inherit com2links.legacy details +#' @export + +com2links <- function( rel_pairs_file = "dataRelatedPairs.csv", ad_ped_matrix = NULL, mit_ped_matrix = mt_ped_matrix, diff --git a/benchmark_results.csv b/benchmark_results.csv index efe84ed8..192b22dd 100644 --- a/benchmark_results.csv +++ b/benchmark_results.csv @@ -1,4 +1,4 @@ "expr","min","lq","mean","median","uq","max","neval" -"beta",430.4392,516.5138,574.575919,563.7353,623.24015,956.2494,100 -"regular",424.8536,507.47875,578.514,564.8453,631.97425,997.8213,100 -"legacy",580.1082,1392.24995,2086.481802,1783.02635,2686.8328,4768.9119,100 +"beta",16.0127553,19.82506435,22.108722786,21.26392435,23.70929065,33.0821112,100 +"regular",16.1161818,19.80905795,22.26289046694,21.25613265,24.39315385,33.3530921,100 +"legacy",32.0135859,39.7116454,43.55598528601,42.62947545,47.32561085,65.684218301,100 diff --git a/man/addRowlessParents.Rd b/man/addRowlessParents.Rd index 8161ee4e..83132805 100644 --- a/man/addRowlessParents.Rd +++ b/man/addRowlessParents.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/checkParents.R \name{addRowlessParents} \alias{addRowlessParents} -\title{Add Phantom Parents} +\title{Add addRowlessParents} \usage{ addRowlessParents(ped, verbose, validation_results) } @@ -14,5 +14,5 @@ addRowlessParents(ped, verbose, validation_results) \item{validation_results}{validation results} } \description{ -This function adds parents w +This function adds parents who appear in momID or dadID but are missing from ID } diff --git a/man/com2links.Rd b/man/com2links.Rd index 9dae2f09..e9c7ac2b 100644 --- a/man/com2links.Rd +++ b/man/com2links.Rd @@ -50,9 +50,6 @@ com2links( \item{...}{Additional arguments to be passed to \code{\link{com2links}}} } -\value{ -A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk. -} \description{ This function processes one or more sparse relationship components (additive, mitochondrial, and common nuclear) and converts them into kinship link pairs. The resulting related pairs are diff --git a/man/com2links.legacy.Rd b/man/com2links.legacy.Rd index 757117a1..a523a106 100644 --- a/man/com2links.legacy.Rd +++ b/man/com2links.legacy.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/makeLinks.R \name{com2links.legacy} \alias{com2links.legacy} -\title{Convert Pedigree Matrices to Related Pairs File (Legacy)} +\title{Convert Sparse Relationship Matrices to Kinship Links} \usage{ com2links.legacy( rel_pairs_file = "dataRelatedPairs.csv", @@ -10,9 +10,14 @@ com2links.legacy( mit_ped_matrix = mt_ped_matrix, mt_ped_matrix = NULL, cn_ped_matrix = NULL, - update_rate = 500, + write_buffer_size = 1000, + update_rate = 1000, + gc = TRUE, + writetodisk = TRUE, verbose = FALSE, + legacy = FALSE, outcome_name = "data", + drop_upper_triangular = TRUE, ... ) } @@ -27,14 +32,30 @@ com2links.legacy( \item{cn_ped_matrix}{Matrix of common nuclear relatedness coefficients.} +\item{write_buffer_size}{Number of related pairs to write to disk at a time.} + \item{update_rate}{Numeric. Frequency (in iterations) at which progress messages are printed.} +\item{gc}{Logical. If TRUE, performs garbage collection via \code{\link{gc}} to free memory.} + +\item{writetodisk}{Logical. If TRUE, writes the related pairs to disk; if FALSE, returns a data frame.} + \item{verbose}{Logical. If TRUE, prints progress messages.} +\item{legacy}{Logical. If TRUE, uses the legacy branch of the function.} + \item{outcome_name}{Character string representing the outcome name (used in file naming).} +\item{drop_upper_triangular}{Logical. If TRUE, drops the upper triangular portion of the matrix.} + \item{...}{Additional arguments to be passed to \code{\link{com2links}}} } +\value{ +A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk. +} \description{ -This legacy function converts pedigree matrices into a related pairs file. +This function processes one or more sparse relationship components (additive, mitochondrial, +and common nuclear) and converts them into kinship link pairs. The resulting related pairs are +either returned as a data frame or written to disk in CSV format. } +\keyword{internal} diff --git a/man/com2links.og.Rd b/man/com2links.og.Rd new file mode 100644 index 00000000..b6c3d71b --- /dev/null +++ b/man/com2links.og.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/makeLinks.R +\name{com2links.og} +\alias{com2links.og} +\title{Convert Pedigree Matrices to Related Pairs File (Legacy)} +\usage{ +com2links.og( + rel_pairs_file = "dataRelatedPairs.csv", + ad_ped_matrix = NULL, + mit_ped_matrix = mt_ped_matrix, + mt_ped_matrix = NULL, + cn_ped_matrix = NULL, + update_rate = 500, + verbose = FALSE, + outcome_name = "data", + ... +) +} +\arguments{ +\item{rel_pairs_file}{File path to write related pairs to (CSV format).} + +\item{ad_ped_matrix}{Matrix of additive genetic relatedness coefficients.} + +\item{mit_ped_matrix}{Matrix of mitochondrial relatedness coefficients. Alias: \code{mt_ped_matrix}.} + +\item{mt_ped_matrix}{Matrix of mitochondrial relatedness coefficients.} + +\item{cn_ped_matrix}{Matrix of common nuclear relatedness coefficients.} + +\item{update_rate}{Numeric. Frequency (in iterations) at which progress messages are printed.} + +\item{verbose}{Logical. If TRUE, prints progress messages.} + +\item{outcome_name}{Character string representing the outcome name (used in file naming).} + +\item{...}{Additional arguments to be passed to \code{\link{com2links}}} +} +\description{ +This legacy function converts pedigree matrices into a related pairs file. +} +\keyword{internal} diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R index 200b8934..a2666f2e 100644 --- a/tests/testthat/test-makeLinks.R +++ b/tests/testthat/test-makeLinks.R @@ -95,7 +95,7 @@ test_that("com2links legacy works", { mit_ped_matrix <- ped2com(hazard, component = "mitochondrial", adjacency_method = "direct", sparse = TRUE) cn_ped_matrix <- ped2com(hazard, component = "common nuclear", adjacency_method = "indexed", sparse = TRUE) - resultlegacy <- com2links( + resultlegacy <- com2links.legacy( ad_ped_matrix = ad_ped_matrix, mit_ped_matrix = mit_ped_matrix, cn_ped_matrix = cn_ped_matrix, legacy = TRUE @@ -109,7 +109,7 @@ test_that("com2links legacy works", { expect_true(all(c("ID1", "ID2", "addRel", "mitRel", "cnuRel") %in% colnames(written_data))) - result_beta <- com2links.beta( + result_beta <- com2links( ad_ped_matrix = ad_ped_matrix, mit_ped_matrix = mit_ped_matrix, cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE @@ -119,7 +119,7 @@ test_that("com2links legacy works", { expect_true(all(c("ID1", "ID2", "addRel", "mitRel", "cnuRel") %in% colnames(result_beta))) - result <- com2links( + result <- com2links.legacy( ad_ped_matrix = ad_ped_matrix, mit_ped_matrix = mit_ped_matrix, cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE @@ -145,7 +145,7 @@ test_that("com2links beta works", { cn_ped_matrix <- ped2com(hazard, component = "common nuclear", adjacency_method = "indexed", sparse = TRUE) # compare 2 - result_beta <- com2links.beta( + result_beta <- com2links( ad_ped_matrix = ad_ped_matrix, mit_ped_matrix = mit_ped_matrix, writetodisk = FALSE @@ -155,7 +155,7 @@ test_that("com2links beta works", { expect_true(all(c("ID1", "ID2", "addRel", "mitRel") %in% colnames(result_beta))) - result <- com2links( + result <- com2links.legacy( ad_ped_matrix = ad_ped_matrix, mit_ped_matrix = mit_ped_matrix, writetodisk = FALSE @@ -172,7 +172,7 @@ test_that("com2links beta works", { # write to disk - result_disk <- com2links.beta( + result_disk <- com2links( ad_ped_matrix = ad_ped_matrix, mit_ped_matrix = mit_ped_matrix, writetodisk = TRUE @@ -188,7 +188,7 @@ test_that("com2links beta works", { expect_equal(result, written_data) # compare 1 - result_beta <- com2links.beta( + result_beta <- com2links( mit_ped_matrix = mit_ped_matrix, writetodisk = FALSE ) From 2c0961cc070dda438c5839cdd0dc46bcf32b5f6a Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Thu, 10 Apr 2025 09:55:57 -0400 Subject: [PATCH 08/35] Update .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index dc07da73..f84ea082 100644 --- a/.gitignore +++ b/.gitignore @@ -12,7 +12,7 @@ tests/testthat/Rplots.pdf *.ASOIAF.ged ASOIAF.ged *.Rproj - +benchmark_results.csv .vscode/launch.json dataRelatedPairs_new2.csv data-raw/ASOIAF_040725.ged From edf27455ba0f12c93361cb0be118329e1afeeeef Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Thu, 10 Apr 2025 10:03:16 -0400 Subject: [PATCH 09/35] reorder coms --- R/makeLinks.R | 406 +++++++++++++++++++++++++------------------------- 1 file changed, 203 insertions(+), 203 deletions(-) diff --git a/R/makeLinks.R b/R/makeLinks.R index 7bbdb082..2894a3bf 100644 --- a/R/makeLinks.R +++ b/R/makeLinks.R @@ -20,7 +20,208 @@ #' @param ... Additional arguments to be passed to \code{\link{com2links}} #' #' @return A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk. -#' @keywords internal +#' @export com2links + +com2links <- function( + rel_pairs_file = "dataRelatedPairs.csv", + ad_ped_matrix = NULL, + mit_ped_matrix = mt_ped_matrix, + mt_ped_matrix = NULL, + cn_ped_matrix = NULL, + # pat_ped_matrix = NULL, + # mat_ped_matrix = NULL, + # mapa_id_file = "data_mapaID.csv", + write_buffer_size = 1000, + update_rate = 1000, + gc = TRUE, + writetodisk = TRUE, + verbose = FALSE, + legacy = FALSE, + outcome_name = "data", + drop_upper_triangular = TRUE, + ...) { + # --- Input Validations and Preprocessing --- + + # Ensure that at least one relationship matrix is provided. + if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) { + stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.") + } + # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided. + if (!is.null(ad_ped_matrix)) { + ad_ped_matrix <- validate_and_convert_matrix( + mat = ad_ped_matrix, + name = "ad_ped_matrix" + ) + } + + # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided. + if (!is.null(cn_ped_matrix)) { + cn_ped_matrix <- validate_and_convert_matrix( + mat = cn_ped_matrix, + name = "cn_ped_matrix", + ensure_symmetric = TRUE + ) + } + + # Validate and process mit_ped_matrix: convert and ensure binary values. + if (!is.null(mit_ped_matrix)) { + mit_ped_matrix <- validate_and_convert_matrix( + mat = mit_ped_matrix, + name = "mit_ped_matrix", force_binary = TRUE, + ensure_symmetric = TRUE + ) + } + + # --- Build IDs and Prepare Matrix Pointers --- + + # Extract individual IDs from the first available matrix. + ids <- NULL + + + + if (!is.null(cn_ped_matrix)) { + ids <- as.numeric(dimnames(cn_ped_matrix)[[1]]) + nc <- ncol(cn_ped_matrix) + } else if (!is.null(ad_ped_matrix)) { + ids <- as.numeric(dimnames(ad_ped_matrix)[[1]]) + nc <- ncol(ad_ped_matrix) + } else if (!is.null(mit_ped_matrix)) { + ids <- as.numeric(dimnames(mit_ped_matrix)[[1]]) + nc <- ncol(mit_ped_matrix) + } + + if (is.null(ids)) { + stop("Could not extract IDs from the provided matrices.") + } + + # --- matrix_case construction and switch dispatch --- + matrix_case <- paste(sort(c( + if (!is.null(ad_ped_matrix)) "ad" else NULL, + if (!is.null(mit_ped_matrix)) "mt" else NULL, + if (!is.null(cn_ped_matrix)) "cn" else NULL + )), collapse = "-") + + if (verbose) { + print(matrix_case) + } + + switch(matrix_case, + "ad" = process_one( + matrix = ad_ped_matrix, + rel_name = "addRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "mt" = process_one( + matrix = mit_ped_matrix, + rel_name = "mitRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "cn" = process_one( + matrix = cn_ped_matrix, + rel_name = "cnuRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "ad-mt" = process_two( + matrix1 = ad_ped_matrix, + name1 = "addRel", + matrix2 = mit_ped_matrix, + name2 = "mitRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "ad-cn" = process_two( + matrix1 = ad_ped_matrix, + name1 = "addRel", + matrix2 = cn_ped_matrix, + name2 = "cnuRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "cn-mt" = process_two( + matrix1 = cn_ped_matrix, + name1 = "cnuRel", + matrix2 = mit_ped_matrix, + name2 = "mitRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "ad-cn-mt" = process_all_three( + mat1 = ad_ped_matrix, + name1 = "addRel", + mat2 = mit_ped_matrix, + name2 = "mitRel", + mat3 = cn_ped_matrix, + name3 = "cnuRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + stop("Unsupported matrix combination") + ) +} +#' Convert Sparse Relationship Matrices to Kinship Links +#' @inheritParams com2links +#' @inherit com2links description +#' @inherit com2links details +#' @keyword internal com2links.legacy <- function( rel_pairs_file = "dataRelatedPairs.csv", @@ -522,7 +723,7 @@ com2links.legacy <- function( #' Convert Pedigree Matrices to Related Pairs File (Legacy) #' @description #' This legacy function converts pedigree matrices into a related pairs file. -#' @inheritParams com2links.legacy +#' @inheritParams com2links #' @keywords internal @@ -648,207 +849,6 @@ com2links.og <- function( return(NULL) } -#' Convert Sparse Relationship Matrices to Kinship Links -#' @inheritParams com2links.legacy -#' @inherit com2links.legacy description -#' @inherit com2links.legacy details -#' @export - -com2links <- function( - rel_pairs_file = "dataRelatedPairs.csv", - ad_ped_matrix = NULL, - mit_ped_matrix = mt_ped_matrix, - mt_ped_matrix = NULL, - cn_ped_matrix = NULL, - # pat_ped_matrix = NULL, - # mat_ped_matrix = NULL, - # mapa_id_file = "data_mapaID.csv", - write_buffer_size = 1000, - update_rate = 1000, - gc = TRUE, - writetodisk = TRUE, - verbose = FALSE, - legacy = FALSE, - outcome_name = "data", - drop_upper_triangular = TRUE, - ...) { - # --- Input Validations and Preprocessing --- - - # Ensure that at least one relationship matrix is provided. - if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) { - stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.") - } - # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided. - if (!is.null(ad_ped_matrix)) { - ad_ped_matrix <- validate_and_convert_matrix( - mat = ad_ped_matrix, - name = "ad_ped_matrix" - ) - } - - # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided. - if (!is.null(cn_ped_matrix)) { - cn_ped_matrix <- validate_and_convert_matrix( - mat = cn_ped_matrix, - name = "cn_ped_matrix", - ensure_symmetric = TRUE - ) - } - - # Validate and process mit_ped_matrix: convert and ensure binary values. - if (!is.null(mit_ped_matrix)) { - mit_ped_matrix <- validate_and_convert_matrix( - mat = mit_ped_matrix, - name = "mit_ped_matrix", force_binary = TRUE, - ensure_symmetric = TRUE - ) - } - - # --- Build IDs and Prepare Matrix Pointers --- - - # Extract individual IDs from the first available matrix. - ids <- NULL - - - - if (!is.null(cn_ped_matrix)) { - ids <- as.numeric(dimnames(cn_ped_matrix)[[1]]) - nc <- ncol(cn_ped_matrix) - } else if (!is.null(ad_ped_matrix)) { - ids <- as.numeric(dimnames(ad_ped_matrix)[[1]]) - nc <- ncol(ad_ped_matrix) - } else if (!is.null(mit_ped_matrix)) { - ids <- as.numeric(dimnames(mit_ped_matrix)[[1]]) - nc <- ncol(mit_ped_matrix) - } - - if (is.null(ids)) { - stop("Could not extract IDs from the provided matrices.") - } - - # --- matrix_case construction and switch dispatch --- - matrix_case <- paste(sort(c( - if (!is.null(ad_ped_matrix)) "ad" else NULL, - if (!is.null(mit_ped_matrix)) "mt" else NULL, - if (!is.null(cn_ped_matrix)) "cn" else NULL - )), collapse = "-") - - if (verbose) { - print(matrix_case) - } - - switch(matrix_case, - "ad" = process_one( - matrix = ad_ped_matrix, - rel_name = "addRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "mt" = process_one( - matrix = mit_ped_matrix, - rel_name = "mitRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "cn" = process_one( - matrix = cn_ped_matrix, - rel_name = "cnuRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "ad-mt" = process_two( - matrix1 = ad_ped_matrix, - name1 = "addRel", - matrix2 = mit_ped_matrix, - name2 = "mitRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "ad-cn" = process_two( - matrix1 = ad_ped_matrix, - name1 = "addRel", - matrix2 = cn_ped_matrix, - name2 = "cnuRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "cn-mt" = process_two( - matrix1 = cn_ped_matrix, - name1 = "cnuRel", - matrix2 = mit_ped_matrix, - name2 = "mitRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "ad-cn-mt" = process_all_three( - mat1 = ad_ped_matrix, - name1 = "addRel", - mat2 = mit_ped_matrix, - name2 = "mitRel", - mat3 = cn_ped_matrix, - name3 = "cnuRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - stop("Unsupported matrix combination") - ) -} process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, write_buffer_size, drop_upper_triangular, update_rate, verbose, gc, ...) { # Extract pointers and indices from the matrix. From 39fde6cb5f2c3d9309ed374b224d4e84f0a56fda Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Thu, 10 Apr 2025 10:03:51 -0400 Subject: [PATCH 10/35] Delete benchmark_results.csv --- benchmark_results.csv | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 benchmark_results.csv diff --git a/benchmark_results.csv b/benchmark_results.csv deleted file mode 100644 index 192b22dd..00000000 --- a/benchmark_results.csv +++ /dev/null @@ -1,4 +0,0 @@ -"expr","min","lq","mean","median","uq","max","neval" -"beta",16.0127553,19.82506435,22.108722786,21.26392435,23.70929065,33.0821112,100 -"regular",16.1161818,19.80905795,22.26289046694,21.25613265,24.39315385,33.3530921,100 -"legacy",32.0135859,39.7116454,43.55598528601,42.62947545,47.32561085,65.684218301,100 From 015c32abfa9461162f0b9c5ffc0f7f47ca3bc1cb Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Thu, 10 Apr 2025 10:03:16 -0400 Subject: [PATCH 11/35] reorder coms --- R/makeLinks.R | 403 ++++++++++++++++++++-------------------- man/com2links.Rd | 3 + man/com2links.legacy.Rd | 3 - 3 files changed, 204 insertions(+), 205 deletions(-) diff --git a/R/makeLinks.R b/R/makeLinks.R index 7bbdb082..f0bee11e 100644 --- a/R/makeLinks.R +++ b/R/makeLinks.R @@ -20,8 +20,208 @@ #' @param ... Additional arguments to be passed to \code{\link{com2links}} #' #' @return A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk. +#' @export com2links + +com2links <- function( + rel_pairs_file = "dataRelatedPairs.csv", + ad_ped_matrix = NULL, + mit_ped_matrix = mt_ped_matrix, + mt_ped_matrix = NULL, + cn_ped_matrix = NULL, + # pat_ped_matrix = NULL, + # mat_ped_matrix = NULL, + # mapa_id_file = "data_mapaID.csv", + write_buffer_size = 1000, + update_rate = 1000, + gc = TRUE, + writetodisk = TRUE, + verbose = FALSE, + legacy = FALSE, + outcome_name = "data", + drop_upper_triangular = TRUE, + ...) { + # --- Input Validations and Preprocessing --- + + # Ensure that at least one relationship matrix is provided. + if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) { + stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.") + } + # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided. + if (!is.null(ad_ped_matrix)) { + ad_ped_matrix <- validate_and_convert_matrix( + mat = ad_ped_matrix, + name = "ad_ped_matrix" + ) + } + + # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided. + if (!is.null(cn_ped_matrix)) { + cn_ped_matrix <- validate_and_convert_matrix( + mat = cn_ped_matrix, + name = "cn_ped_matrix", + ensure_symmetric = TRUE + ) + } + + # Validate and process mit_ped_matrix: convert and ensure binary values. + if (!is.null(mit_ped_matrix)) { + mit_ped_matrix <- validate_and_convert_matrix( + mat = mit_ped_matrix, + name = "mit_ped_matrix", force_binary = TRUE, + ensure_symmetric = TRUE + ) + } + + # --- Build IDs and Prepare Matrix Pointers --- + + # Extract individual IDs from the first available matrix. + ids <- NULL + + + + if (!is.null(cn_ped_matrix)) { + ids <- as.numeric(dimnames(cn_ped_matrix)[[1]]) + nc <- ncol(cn_ped_matrix) + } else if (!is.null(ad_ped_matrix)) { + ids <- as.numeric(dimnames(ad_ped_matrix)[[1]]) + nc <- ncol(ad_ped_matrix) + } else if (!is.null(mit_ped_matrix)) { + ids <- as.numeric(dimnames(mit_ped_matrix)[[1]]) + nc <- ncol(mit_ped_matrix) + } + + if (is.null(ids)) { + stop("Could not extract IDs from the provided matrices.") + } + + # --- matrix_case construction and switch dispatch --- + matrix_case <- paste(sort(c( + if (!is.null(ad_ped_matrix)) "ad" else NULL, + if (!is.null(mit_ped_matrix)) "mt" else NULL, + if (!is.null(cn_ped_matrix)) "cn" else NULL + )), collapse = "-") + + if (verbose) { + print(matrix_case) + } + + switch(matrix_case, + "ad" = process_one( + matrix = ad_ped_matrix, + rel_name = "addRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "mt" = process_one( + matrix = mit_ped_matrix, + rel_name = "mitRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "cn" = process_one( + matrix = cn_ped_matrix, + rel_name = "cnuRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "ad-mt" = process_two( + matrix1 = ad_ped_matrix, + name1 = "addRel", + matrix2 = mit_ped_matrix, + name2 = "mitRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "ad-cn" = process_two( + matrix1 = ad_ped_matrix, + name1 = "addRel", + matrix2 = cn_ped_matrix, + name2 = "cnuRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "cn-mt" = process_two( + matrix1 = cn_ped_matrix, + name1 = "cnuRel", + matrix2 = mit_ped_matrix, + name2 = "mitRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "ad-cn-mt" = process_all_three( + mat1 = ad_ped_matrix, + name1 = "addRel", + mat2 = mit_ped_matrix, + name2 = "mitRel", + mat3 = cn_ped_matrix, + name3 = "cnuRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + stop("Unsupported matrix combination") + ) +} +#' Convert Sparse Relationship Matrices to Kinship Links +#' @inheritParams com2links #' @keywords internal + com2links.legacy <- function( rel_pairs_file = "dataRelatedPairs.csv", ad_ped_matrix = NULL, @@ -522,7 +722,7 @@ com2links.legacy <- function( #' Convert Pedigree Matrices to Related Pairs File (Legacy) #' @description #' This legacy function converts pedigree matrices into a related pairs file. -#' @inheritParams com2links.legacy +#' @inheritParams com2links #' @keywords internal @@ -648,207 +848,6 @@ com2links.og <- function( return(NULL) } -#' Convert Sparse Relationship Matrices to Kinship Links -#' @inheritParams com2links.legacy -#' @inherit com2links.legacy description -#' @inherit com2links.legacy details -#' @export - -com2links <- function( - rel_pairs_file = "dataRelatedPairs.csv", - ad_ped_matrix = NULL, - mit_ped_matrix = mt_ped_matrix, - mt_ped_matrix = NULL, - cn_ped_matrix = NULL, - # pat_ped_matrix = NULL, - # mat_ped_matrix = NULL, - # mapa_id_file = "data_mapaID.csv", - write_buffer_size = 1000, - update_rate = 1000, - gc = TRUE, - writetodisk = TRUE, - verbose = FALSE, - legacy = FALSE, - outcome_name = "data", - drop_upper_triangular = TRUE, - ...) { - # --- Input Validations and Preprocessing --- - - # Ensure that at least one relationship matrix is provided. - if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) { - stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.") - } - # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided. - if (!is.null(ad_ped_matrix)) { - ad_ped_matrix <- validate_and_convert_matrix( - mat = ad_ped_matrix, - name = "ad_ped_matrix" - ) - } - - # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided. - if (!is.null(cn_ped_matrix)) { - cn_ped_matrix <- validate_and_convert_matrix( - mat = cn_ped_matrix, - name = "cn_ped_matrix", - ensure_symmetric = TRUE - ) - } - - # Validate and process mit_ped_matrix: convert and ensure binary values. - if (!is.null(mit_ped_matrix)) { - mit_ped_matrix <- validate_and_convert_matrix( - mat = mit_ped_matrix, - name = "mit_ped_matrix", force_binary = TRUE, - ensure_symmetric = TRUE - ) - } - - # --- Build IDs and Prepare Matrix Pointers --- - - # Extract individual IDs from the first available matrix. - ids <- NULL - - - - if (!is.null(cn_ped_matrix)) { - ids <- as.numeric(dimnames(cn_ped_matrix)[[1]]) - nc <- ncol(cn_ped_matrix) - } else if (!is.null(ad_ped_matrix)) { - ids <- as.numeric(dimnames(ad_ped_matrix)[[1]]) - nc <- ncol(ad_ped_matrix) - } else if (!is.null(mit_ped_matrix)) { - ids <- as.numeric(dimnames(mit_ped_matrix)[[1]]) - nc <- ncol(mit_ped_matrix) - } - - if (is.null(ids)) { - stop("Could not extract IDs from the provided matrices.") - } - - # --- matrix_case construction and switch dispatch --- - matrix_case <- paste(sort(c( - if (!is.null(ad_ped_matrix)) "ad" else NULL, - if (!is.null(mit_ped_matrix)) "mt" else NULL, - if (!is.null(cn_ped_matrix)) "cn" else NULL - )), collapse = "-") - - if (verbose) { - print(matrix_case) - } - - switch(matrix_case, - "ad" = process_one( - matrix = ad_ped_matrix, - rel_name = "addRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "mt" = process_one( - matrix = mit_ped_matrix, - rel_name = "mitRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "cn" = process_one( - matrix = cn_ped_matrix, - rel_name = "cnuRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "ad-mt" = process_two( - matrix1 = ad_ped_matrix, - name1 = "addRel", - matrix2 = mit_ped_matrix, - name2 = "mitRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "ad-cn" = process_two( - matrix1 = ad_ped_matrix, - name1 = "addRel", - matrix2 = cn_ped_matrix, - name2 = "cnuRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "cn-mt" = process_two( - matrix1 = cn_ped_matrix, - name1 = "cnuRel", - matrix2 = mit_ped_matrix, - name2 = "mitRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "ad-cn-mt" = process_all_three( - mat1 = ad_ped_matrix, - name1 = "addRel", - mat2 = mit_ped_matrix, - name2 = "mitRel", - mat3 = cn_ped_matrix, - name3 = "cnuRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - stop("Unsupported matrix combination") - ) -} process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, write_buffer_size, drop_upper_triangular, update_rate, verbose, gc, ...) { # Extract pointers and indices from the matrix. diff --git a/man/com2links.Rd b/man/com2links.Rd index e9c7ac2b..9dae2f09 100644 --- a/man/com2links.Rd +++ b/man/com2links.Rd @@ -50,6 +50,9 @@ com2links( \item{...}{Additional arguments to be passed to \code{\link{com2links}}} } +\value{ +A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk. +} \description{ This function processes one or more sparse relationship components (additive, mitochondrial, and common nuclear) and converts them into kinship link pairs. The resulting related pairs are diff --git a/man/com2links.legacy.Rd b/man/com2links.legacy.Rd index a523a106..bf4bc6e4 100644 --- a/man/com2links.legacy.Rd +++ b/man/com2links.legacy.Rd @@ -50,9 +50,6 @@ com2links.legacy( \item{...}{Additional arguments to be passed to \code{\link{com2links}}} } -\value{ -A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk. -} \description{ This function processes one or more sparse relationship components (additive, mitochondrial, and common nuclear) and converts them into kinship link pairs. The resulting related pairs are From 0850662838bae94e354bd4898d2ae71c8682770e Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Thu, 10 Apr 2025 10:26:25 -0400 Subject: [PATCH 12/35] Update test-makeLinks.R --- tests/testthat/test-makeLinks.R | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R index a2666f2e..51bdcb49 100644 --- a/tests/testthat/test-makeLinks.R +++ b/tests/testthat/test-makeLinks.R @@ -25,6 +25,18 @@ test_that("com2links produces correct output with a single relationship matrix ( expect_true(all(result$addRel >= 0)) # Relatedness values should be non-negative }) +test_that("com2links produces correct output with cn_ped_matrix", { + data(ASOIAF) + cn_ped_matrix <- ped2mit(ASOIAF, sparse = TRUE) + + result <- com2links(cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE) + + expect_true(is.data.frame(result)) + expect_true(all(c("ID1", "ID2", "cnRel") %in% colnames(result))) + expect_equal(ncol(result), 3) # Expect ID1, ID2, and addRel + expect_true(all(result$cnRel >= 0)) # Relatedness values should be non-negative +}) + test_that("com2links produces correct output with mt_ped_matrix", { data(hazard) mit_ped_matrix <- ped2mit(hazard, sparse = TRUE) @@ -34,7 +46,7 @@ test_that("com2links produces correct output with mt_ped_matrix", { expect_true(is.data.frame(result)) expect_true(all(c("ID1", "ID2", "mitRel") %in% colnames(result))) expect_equal(ncol(result), 3) # Expect ID1, ID2, and addRel - expect_true(all(result$addRel >= 0)) # Relatedness values should be non-negative + expect_true(all(result$mitRel %in% c(0, 1))) # Mitochondrial should be binary }) test_that("com2links processes multiple matrices correctly (hazard dataset)", { From c5eabef3ccce44d39580740a9a6799ea8ceced08 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Thu, 10 Apr 2025 10:58:00 -0400 Subject: [PATCH 13/35] splitting links into legacy --- R/makeLinks.R | 635 +------------------------------- R/makeLinkslegacy.R | 631 +++++++++++++++++++++++++++++++ tests/testthat/test-makeLinks.R | 55 ++- 3 files changed, 684 insertions(+), 637 deletions(-) create mode 100644 R/makeLinkslegacy.R diff --git a/R/makeLinks.R b/R/makeLinks.R index f0bee11e..0156717c 100644 --- a/R/makeLinks.R +++ b/R/makeLinks.R @@ -222,632 +222,6 @@ com2links <- function( #' @keywords internal -com2links.legacy <- function( - rel_pairs_file = "dataRelatedPairs.csv", - ad_ped_matrix = NULL, - mit_ped_matrix = mt_ped_matrix, - mt_ped_matrix = NULL, - cn_ped_matrix = NULL, - # pat_ped_matrix = NULL, - # mat_ped_matrix = NULL, - # mapa_id_file = "data_mapaID.csv", - write_buffer_size = 1000, - update_rate = 1000, - gc = TRUE, - writetodisk = TRUE, - verbose = FALSE, - legacy = FALSE, - outcome_name = "data", - drop_upper_triangular = TRUE, - ...) { - # Non-legacy mode processing - - if (!legacy) { - # --- Input Validations and Preprocessing --- - - # Ensure that at least one relationship matrix is provided. - if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) { - stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.") - } - # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided. - if (!is.null(ad_ped_matrix)) { - ad_ped_matrix <- validate_and_convert_matrix( - mat = ad_ped_matrix, - name = "ad_ped_matrix" - ) - } - - # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided. - if (!is.null(cn_ped_matrix)) { - cn_ped_matrix <- validate_and_convert_matrix( - mat = cn_ped_matrix, - name = "cn_ped_matrix", - ensure_symmetric = TRUE - ) - } - - # Validate and process mit_ped_matrix: convert and ensure binary values. - if (!is.null(mit_ped_matrix)) { - mit_ped_matrix <- validate_and_convert_matrix( - mat = mit_ped_matrix, - name = "mit_ped_matrix", force_binary = TRUE, - ensure_symmetric = TRUE - ) - } - - # --- Build IDs and Prepare Matrix Pointers --- - - # Extract individual IDs from the first available matrix. - ids <- NULL - - - - if (!is.null(cn_ped_matrix)) { - ids <- as.numeric(dimnames(cn_ped_matrix)[[1]]) - nc <- ncol(cn_ped_matrix) - } else if (!is.null(ad_ped_matrix)) { - ids <- as.numeric(dimnames(ad_ped_matrix)[[1]]) - nc <- ncol(ad_ped_matrix) - } else if (!is.null(mit_ped_matrix)) { - ids <- as.numeric(dimnames(mit_ped_matrix)[[1]]) - nc <- ncol(mit_ped_matrix) - } - - if (is.null(ids)) { - stop("Could not extract IDs from the provided matrices.") - } - - - # Construct case identifier - matrix_case <- paste(sort(c( - if (!is.null(ad_ped_matrix)) "ad" else NULL, - if (!is.null(mit_ped_matrix)) "mt" else NULL, - if (!is.null(cn_ped_matrix)) "cn" else NULL - )), collapse = "-") - - - # Count how many matrices are provided. - sum_nulls <- sum(!is.null(ad_ped_matrix), - !is.null(mit_ped_matrix), - !is.null(cn_ped_matrix), - na.rm = TRUE - ) - if (verbose) { - print(matrix_case) - } - - # Extract the internal pointers (p, i, and x slots) for each provided matrix. - if (!is.null(ad_ped_matrix)) { - ad_ped_p <- ad_ped_matrix@p + 1L - ad_ped_i <- ad_ped_matrix@i + 1L - ad_ped_x <- ad_ped_matrix@x - } - if (!is.null(mit_ped_matrix)) { - mt_p <- mit_ped_matrix@p + 1L - mt_i <- mit_ped_matrix@i + 1L - mt_x <- mit_ped_matrix@x - } - if (!is.null(cn_ped_matrix)) { - cn_p <- cn_ped_matrix@p + 1L - cn_i <- cn_ped_matrix@i + 1L - cn_x <- cn_ped_matrix@x - } - - # --- Process Based on the Number of Provided Matrices --- - # --- Case: All Three Matrices Provided --- - if (sum_nulls == 3) { - # Set pointers for all three matrices. - newColPos1 <- ad_ped_p - iss1 <- ad_ped_i - x1 <- ad_ped_x - - newColPos2 <- mt_p - iss2 <- mt_i - x2 <- mt_x - - newColPos3 <- cn_p - iss3 <- cn_i - x3 <- cn_x - - # Define relationship column names. - relNames <- c("addRel", "mitRel", "cnuRel") - - # Optionally remove the original pointers to free memory. - if (gc == TRUE) { - remove(ad_ped_p, ad_ped_i, ad_ped_x, mt_p, mt_i, mt_x, cn_p, cn_i, cn_x) - } - if (verbose) { - message("All 3 matrix is present") - } - - # File names - # rel_pairs_file <- paste0(outcome_name, "_dataRelatedPairs.csv") - # mapa_id_file <- paste0(outcome_name, "_data_mapaID.csv") - - # Initialize the related pairs file with headers. - df_relpairs <- initialize_empty_df(relNames = relNames) - - # Write the headers to the related pairs file. - if (writetodisk == TRUE) { - utils::write.table( - df_relpairs, - file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE - ) - - # Prepare an empty buffer for batching writes. - write_buffer <- list() - remove(df_relpairs) - } - - # Loop over each column (individual) in the matrix. - for (j in 1L:nc) { - ID2 <- ids[j] - - # Extract column indices for the 1st component - ncp1 <- newColPos1[j] - ncp1p <- newColPos1[j + 1L] - cond1 <- ncp1 < ncp1p - if (cond1) { - vv1 <- ncp1:(ncp1p - 1L) - iss1vv <- iss1[vv1] - } - # Extract indices for the 2nd component - ncp2 <- newColPos2[j] - ncp2p <- newColPos2[j + 1L] - cond2 <- ncp2 < ncp2p - if (cond2) { - vv2 <- ncp2:(ncp2p - 1L) - iss2vv <- iss2[vv2] - } - - # Extract indices for the 3rd component - ncp3 <- newColPos3[j] - ncp3p <- newColPos3[j + 1L] - cond3 <- ncp3 < ncp3p - if (cond3) { - vv3 <- ncp3:(ncp3p - 1L) - iss3vv <- iss3[vv3] - } - - # Create a unique, sorted set of row indices from all provided matrices. - u <- sort(igraph::union(igraph::union(if (cond1) { - iss1vv - }, if (cond2) { - iss2vv - }), if (cond3) { - iss3vv - })) - - # If any relationships exist for this individual, build the related pairs. - if (cond1 || cond2 || cond3) { - ID1 <- ids[u] - tds <- data.frame(ID1 = ID1, ID2 = ID2) - tds[[relNames[1]]] <- 0 - tds[[relNames[2]]] <- 0 - tds[[relNames[3]]] <- 0 - - # Assign the relationship coefficients from each matrix. - if (cond1) { - tds[u %in% iss1vv, relNames[1]] <- x1[vv1] - } - if (cond2) { - tds[u %in% iss2vv, relNames[2]] <- x2[vv2] - } - if (cond3) { - tds[u %in% iss3vv, relNames[3]] <- x3[vv3] - } - - # Optionally drop upper-triangular entries. - if (drop_upper_triangular == TRUE) { - tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle - } - - # Write the batch to disk or accumulate in the data frame. - if (nrow(tds) > 0) { - if (writetodisk == TRUE) { - write_buffer[[length(write_buffer) + 1]] <- tds - - if (length(write_buffer) >= write_buffer_size) { # Write in batches - utils::write.table(do.call(rbind, write_buffer), - file = rel_pairs_file, - row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," - ) - write_buffer <- list() - } - } else { - df_relpairs <- rbind(df_relpairs, tds) - } - } - } - if (verbose && (j %% update_rate == 0L)) { - cat("Done with", j, "of", nc, "\n") - } - } - } else if (sum_nulls == 2) { - # --- Case: Two Matrices Provided --- - # Set pointers and relationship names based on which matrix is missing. - - if (is.null(ad_ped_matrix)) { - newColPos1 <- mt_p - iss1 <- mt_i - x1 <- mt_x - newColPos2 <- cn_p - iss2 <- cn_i - x2 <- cn_x - relNames <- c("mitRel", "cnuRel") - if (gc == TRUE) { - remove(mt_p, mt_i, mt_x, cn_p, cn_i, cn_x) - } - } - if (is.null(mit_ped_matrix)) { - newColPos1 <- ad_ped_p - iss1 <- ad_ped_i - x1 <- ad_ped_x - newColPos2 <- cn_p - iss2 <- cn_i - x2 <- cn_x - relNames <- c("addRel", "cnuRel") - if (gc == TRUE) { - remove(ad_ped_p, ad_ped_i, ad_ped_x, cn_p, cn_i, cn_x) - } - } - if (is.null(cn_ped_matrix)) { - newColPos1 <- ad_ped_p - iss1 <- ad_ped_i - x1 <- ad_ped_x - newColPos2 <- mt_p - iss2 <- mt_i - x2 <- mt_x - relNames <- c("addRel", "mitRel") - if (gc == TRUE) { - remove(ad_ped_p, ad_ped_i, ad_ped_x, mt_p, mt_i, mt_x) - } - } - - # Initialize the related pairs file with the appropriate headers. - df_relpairs <- initialize_empty_df(relNames = relNames) - - if (writetodisk == TRUE) { - utils::write.table( - df_relpairs, - file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE - ) - # initial buffer - write_buffer <- list() - remove(df_relpairs) - } - - # Process each column to extract relationships. - for (j in 1L:nc) { - ID2 <- ids[j] - - # Extract indices from the first matrix. - ncp1 <- newColPos1[j] - ncp1p <- newColPos1[j + 1L] - cond1 <- ncp1 < ncp1p - if (cond1) { - vv1 <- ncp1:(ncp1p - 1L) - iss1vv <- iss1[vv1] - } - # Extract indices from the second matrix. - ncp2 <- newColPos2[j] - ncp2p <- newColPos2[j + 1L] - cond2 <- ncp2 < ncp2p - if (cond2) { - vv2 <- ncp2:(ncp2p - 1L) - iss2vv <- iss2[vv2] - } - - # Merge the indices from both matrices. - u <- sort(igraph::union(if (cond1) { - iss1vv - }, if (cond2) { - iss2vv - })) - - # Create related pairs if relationships are found. - if (cond1 || cond2) { - ID1 <- ids[u] - tds <- data.frame(ID1 = ID1, ID2 = ID2) - tds[[relNames[1]]] <- 0 - tds[[relNames[2]]] <- 0 - - if (cond1) { - tds[u %in% iss1vv, relNames[1]] <- x1[vv1] - } - if (cond2) { - tds[u %in% iss2vv, relNames[2]] <- x2[vv2] - } - if (drop_upper_triangular == TRUE) { - tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle - } - - # Write the batch to disk or accumulate in the data frame. - if (nrow(tds) > 0) { - if (writetodisk == TRUE) { - write_buffer[[length(write_buffer) + 1]] <- tds - - if (length(write_buffer) >= write_buffer_size) { # Write in batches - utils::write.table(do.call(rbind, write_buffer), - file = rel_pairs_file, - row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," - ) - write_buffer <- list() - } - } else { - df_relpairs <- rbind(df_relpairs, tds) - } - } - } - if (verbose && (j %% update_rate == 0L)) { - cat("Done with", j, "of", nc, "\n") - } - } - } else if (sum_nulls == 1) { - # --- Case: Only One Matrix Provided --- - if (verbose) { - message("Only one matrix is present") - } - if (!is.null(ad_ped_matrix)) { - newColPos1 <- ad_ped_p - iss1 <- ad_ped_i - x1 <- ad_ped_x - relNames <- c("addRel") - if (gc == TRUE) { - remove(ad_ped_p, ad_ped_i, ad_ped_x) - } - } - if (!is.null(mit_ped_matrix)) { - newColPos1 <- mt_p - iss1 <- mt_i - x1 <- mt_x - relNames <- c("mitRel") - if (gc == TRUE) { - remove(mt_p, mt_i, mt_x) - } - } - if (!is.null(cn_ped_matrix)) { - newColPos1 <- cn_p - iss1 <- cn_i - x1 <- cn_x - relNames <- c("cnuRel") - if (gc == TRUE) { - remove(cn_p, cn_i, cn_x) - } - } - - # Initialize the related pairs file. - df_relpairs <- initialize_empty_df(relNames = relNames) - - if (writetodisk == TRUE) { - utils::write.table( - df_relpairs, - file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE - ) - - # initial buffer - write_buffer <- list() - - remove(df_relpairs) - } - - # Process each column. - for (j in 1L:nc) { - ID2 <- ids[j] - # Extract column indices - ncp1 <- newColPos1[j] - ncp1p <- newColPos1[j + 1L] - cond1 <- ncp1 < ncp1p - if (cond1) { - vv1 <- ncp1:(ncp1p - 1L) - iss1vv <- iss1[vv1] - } - - # Use the indices from the single matrix. - u <- sort(iss1vv) - - if (cond1) { - ID1 <- ids[u] - tds <- data.frame(ID1 = ID1, ID2 = ID2) - tds[[relNames[1]]] <- 0 - - if (cond1) { - tds[u %in% iss1vv, relNames[1]] <- x1[vv1] - } - if (drop_upper_triangular == TRUE) { - tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle - } - - # Write the batch to disk or accumulate in the data frame. - if (nrow(tds) > 0) { - if (writetodisk == TRUE) { - write_buffer[[length(write_buffer) + 1]] <- tds - - if (length(write_buffer) >= write_buffer_size) { # Write in batches - utils::write.table(do.call(rbind, write_buffer), - file = rel_pairs_file, - row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," - ) - write_buffer <- list() - } - } else { - df_relpairs <- rbind(df_relpairs, tds) - } - } - } - if (verbose && (j %% update_rate == 0L)) { - cat("Done with", j, "of", nc, "\n") - } - } - } else { - stop("No matrices provided") - } - - # If not writing to disk, return the accumulated data frame. - if (writetodisk == FALSE) { - return(df_relpairs) - } else { - # Write any remaining buffered rows. - if (length(write_buffer) > 0) { - utils::write.table(do.call(rbind, write_buffer), - file = rel_pairs_file, - row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," - ) - } - # return(NULL) - } - } else if (legacy) { - # --- Legacy Mode --- - # In legacy mode, convert matrices to the expected symmetric formats. - com2links.og( - rel_pairs_file = rel_pairs_file, - ad_ped_matrix = ad_ped_matrix, - mit_ped_matrix = mit_ped_matrix, - cn_ped_matrix = cn_ped_matrix, - update_rate = update_rate, - verbose = verbose, - outcome_name = outcome_name - ) - return(NULL) - } - - # --- End of Legacy Mode --- - - # Merge and write the parentage matrices - # df <- full_join(mat_ped_matrix %>% arrange(ID), pat_ped_matrix %>% arrange(ID)) - - # write.table(df, file = mapa_id_file, sep = ",", append = FALSE, row.names = FALSE) -} - -#' Convert Pedigree Matrices to Related Pairs File (Legacy) -#' @description -#' This legacy function converts pedigree matrices into a related pairs file. -#' @inheritParams com2links -#' @keywords internal - - -com2links.og <- function( - rel_pairs_file = "dataRelatedPairs.csv", - ad_ped_matrix = NULL, - mit_ped_matrix = mt_ped_matrix, - mt_ped_matrix = NULL, - cn_ped_matrix = NULL, - update_rate = 500, - verbose = FALSE, - outcome_name = "data", - ...) { - # --- Legacy Mode --- - if (verbose) { - message("Using legacy mode") - } - # In legacy mode, convert matrices to the expected symmetric formats. - - # load(paste0(outcome_name,'_dataBiggestCnPedigree.Rdata')) - # biggestCnPed <- methods::as(biggestCnPed, "symmetricMatrix") - # load(paste0(outcome_name,'_dataBiggestPedigree.Rdata')) - # load(paste0(outcome_name,'_dataBiggestMtPedigree.Rdata')) - - # rel_pairs_file <- paste0(outcome_name,'_datacnmitBiggestRelatedPairsTake3.csv') - - biggestMtPed <- mit_ped_matrix - remove(mit_ped_matrix) - biggestCnPed <- methods::as(cn_ped_matrix, "symmetricMatrix") - remove(cn_ped_matrix) - biggestPed <- ad_ped_matrix - remove(ad_ped_matrix) - biggestMtPed@x[biggestMtPed@x > 0] <- 1 - - # Set the output file name. - if (exists("rel_pairs_file")) { - fname <- rel_pairs_file - } else { - fname <- paste0(outcome_name, "_dataBiggestRelatedPairsTake2.csv") - } - # Initialize the output file with headers. - ds <- data.frame( - ID1 = numeric(0), ID2 = numeric(0), - addRel = numeric(0), - mitRel = numeric(0), cnuRel = numeric(0) - ) - - utils::write.table(ds, - file = fname, sep = ",", - append = FALSE, row.names = FALSE - ) - - # Extract IDs from the common nuclear matrix. - ids <- as.numeric(dimnames(biggestCnPed)[[1]]) - - # Extract pointers from the legacy matrices. - newColPos1 <- biggestPed@p + 1L - iss1 <- biggestPed@i + 1L - newColPos2 <- biggestMtPed@p + 1L - iss2 <- biggestMtPed@i + 1L - newColPos3 <- biggestCnPed@p + 1L - iss3 <- biggestCnPed@i + 1L - nc <- ncol(biggestPed) - - # Process each individual. - for (j in 1L:nc) { - ID2 <- ids[j] - ncp1 <- newColPos1[j] - ncp1p <- newColPos1[j + 1L] - cond1 <- ncp1 < ncp1p - if (cond1) { - vv1 <- ncp1:(ncp1p - 1L) - iss1vv <- iss1[vv1] - } - ncp2 <- newColPos2[j] - ncp2p <- newColPos2[j + 1L] - cond2 <- ncp2 < ncp2p - if (cond2) { - vv2 <- ncp2:(ncp2p - 1L) - iss2vv <- iss2[vv2] - } - ncp3 <- newColPos3[j] - ncp3p <- newColPos3[j + 1L] - cond3 <- ncp3 < ncp3p - if (cond3) { - vv3 <- ncp3:(ncp3p - 1L) - iss3vv <- iss3[vv3] - } - - # Merge indices from all three matrices. - u <- sort(igraph::union(igraph::union(if (cond1) { - iss1vv - }, if (cond2) { - iss2vv - }), if (cond3) { - iss3vv - })) - # browser() - if (cond1 || cond2 || cond3) { - ID1 <- ids[u] - tds <- data.frame( - ID1 = ID1, ID2 = ID2, - addRel = 0, mitRel = 0, cnuRel = 0 - ) - if (cond1) { - tds$addRel[u %in% iss1vv] <- biggestPed@x[vv1] - } - if (cond2) { - tds$mitRel[u %in% iss2vv] <- biggestMtPed@x[vv2] - } - if (cond3) { - tds$cnuRel[u %in% iss3vv] <- biggestCnPed@x[vv3] - } - utils::write.table(tds, - file = fname, row.names = FALSE, - col.names = FALSE, append = TRUE, sep = "," - ) - } - if (!(j %% update_rate)) { - cat(paste0("Done with ", j, " of ", nc, "\n")) - } - } - return(NULL) -} - process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, write_buffer_size, drop_upper_triangular, update_rate, verbose, gc, ...) { # Extract pointers and indices from the matrix. @@ -1149,11 +523,12 @@ process_two <- function( #' #' @return The validated and converted matrix. validate_and_convert_matrix <- function(mat, name, ensure_symmetric = FALSE, force_binary = FALSE) { - if (!inherits(mat, c("matrix", "dgCMatrix", "dsCMatrix"))) { - stop(paste0("The '", name, "' must be a matrix or dgCMatrix.")) + if (!inherits(mat, c("matrix", "dgCMatrix", "dsCMatrix","generalMatrix", + "symmetricMatrix", "triangularMatrix", "dsyMatrix", "dspMatrix", "dsyMatrix"))) { + stop(paste0("The '", name, "' must be a matrix or generalMatrix")) } - if (!inherits(mat, "dgCMatrix")) { - mat <- methods::as(mat, if (ensure_symmetric) "symmetricMatrix" else "dgCMatrix") + if (!inherits(mat, "generalMatrix")) { + mat <- methods::as(mat, if (ensure_symmetric) "symmetricMatrix" else "generalMatrix") } if (force_binary) { mat@x[mat@x > 0] <- 1 diff --git a/R/makeLinkslegacy.R b/R/makeLinkslegacy.R new file mode 100644 index 00000000..27a6e45a --- /dev/null +++ b/R/makeLinkslegacy.R @@ -0,0 +1,631 @@ +#' Convert Sparse Relationship Matrices to Kinship Links +#' @inheritParams com2links +#' @keywords internal + + +com2links.legacy <- function( + rel_pairs_file = "dataRelatedPairs.csv", + ad_ped_matrix = NULL, + mit_ped_matrix = mt_ped_matrix, + mt_ped_matrix = NULL, + cn_ped_matrix = NULL, + # pat_ped_matrix = NULL, + # mat_ped_matrix = NULL, + # mapa_id_file = "data_mapaID.csv", + write_buffer_size = 1000, + update_rate = 1000, + gc = TRUE, + writetodisk = TRUE, + verbose = FALSE, + legacy = FALSE, + outcome_name = "data", + drop_upper_triangular = TRUE, + ...) { + # Non-legacy mode processing + + if (!legacy) { + # --- Input Validations and Preprocessing --- + + # Ensure that at least one relationship matrix is provided. + if (is.null(ad_ped_matrix) && is.null(mit_ped_matrix) && is.null(cn_ped_matrix)) { + stop("At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided.") + } + # Validate and convert ad_ped_matrix to a sparse dgCMatrix if provided. + if (!is.null(ad_ped_matrix)) { + ad_ped_matrix <- validate_and_convert_matrix( + mat = ad_ped_matrix, + name = "ad_ped_matrix" + ) + } + + # Validate and convert cn_ped_matrix to a sparse dgCMatrix if provided. + if (!is.null(cn_ped_matrix)) { + cn_ped_matrix <- validate_and_convert_matrix( + mat = cn_ped_matrix, + name = "cn_ped_matrix", + ensure_symmetric = TRUE + ) + } + + # Validate and process mit_ped_matrix: convert and ensure binary values. + if (!is.null(mit_ped_matrix)) { + mit_ped_matrix <- validate_and_convert_matrix( + mat = mit_ped_matrix, + name = "mit_ped_matrix", force_binary = TRUE, + ensure_symmetric = TRUE + ) + } + + # --- Build IDs and Prepare Matrix Pointers --- + + # Extract individual IDs from the first available matrix. + ids <- NULL + + + + if (!is.null(cn_ped_matrix)) { + ids <- as.numeric(dimnames(cn_ped_matrix)[[1]]) + nc <- ncol(cn_ped_matrix) + } else if (!is.null(ad_ped_matrix)) { + ids <- as.numeric(dimnames(ad_ped_matrix)[[1]]) + nc <- ncol(ad_ped_matrix) + } else if (!is.null(mit_ped_matrix)) { + ids <- as.numeric(dimnames(mit_ped_matrix)[[1]]) + nc <- ncol(mit_ped_matrix) + } + + if (is.null(ids)) { + stop("Could not extract IDs from the provided matrices.") + } + + + # Construct case identifier + matrix_case <- paste(sort(c( + if (!is.null(ad_ped_matrix)) "ad" else NULL, + if (!is.null(mit_ped_matrix)) "mt" else NULL, + if (!is.null(cn_ped_matrix)) "cn" else NULL + )), collapse = "-") + + + # Count how many matrices are provided. + sum_nulls <- sum(!is.null(ad_ped_matrix), + !is.null(mit_ped_matrix), + !is.null(cn_ped_matrix), + na.rm = TRUE + ) + if (verbose) { + print(matrix_case) + } + + # Extract the internal pointers (p, i, and x slots) for each provided matrix. + if (!is.null(ad_ped_matrix)) { + ad_ped_p <- ad_ped_matrix@p + 1L + ad_ped_i <- ad_ped_matrix@i + 1L + ad_ped_x <- ad_ped_matrix@x + } + if (!is.null(mit_ped_matrix)) { + mt_p <- mit_ped_matrix@p + 1L + mt_i <- mit_ped_matrix@i + 1L + mt_x <- mit_ped_matrix@x + } + if (!is.null(cn_ped_matrix)) { + cn_p <- cn_ped_matrix@p + 1L + cn_i <- cn_ped_matrix@i + 1L + cn_x <- cn_ped_matrix@x + } + + # --- Process Based on the Number of Provided Matrices --- + # --- Case: All Three Matrices Provided --- + if (sum_nulls == 3) { + # Set pointers for all three matrices. + newColPos1 <- ad_ped_p + iss1 <- ad_ped_i + x1 <- ad_ped_x + + newColPos2 <- mt_p + iss2 <- mt_i + x2 <- mt_x + + newColPos3 <- cn_p + iss3 <- cn_i + x3 <- cn_x + + # Define relationship column names. + relNames <- c("addRel", "mitRel", "cnuRel") + + # Optionally remove the original pointers to free memory. + if (gc == TRUE) { + remove(ad_ped_p, ad_ped_i, ad_ped_x, mt_p, mt_i, mt_x, cn_p, cn_i, cn_x) + } + if (verbose) { + message("All 3 matrix is present") + } + + # File names + # rel_pairs_file <- paste0(outcome_name, "_dataRelatedPairs.csv") + # mapa_id_file <- paste0(outcome_name, "_data_mapaID.csv") + + # Initialize the related pairs file with headers. + df_relpairs <- initialize_empty_df(relNames = relNames) + + # Write the headers to the related pairs file. + if (writetodisk == TRUE) { + utils::write.table( + df_relpairs, + file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE + ) + + # Prepare an empty buffer for batching writes. + write_buffer <- list() + remove(df_relpairs) + } + + # Loop over each column (individual) in the matrix. + for (j in 1L:nc) { + ID2 <- ids[j] + + # Extract column indices for the 1st component + ncp1 <- newColPos1[j] + ncp1p <- newColPos1[j + 1L] + cond1 <- ncp1 < ncp1p + if (cond1) { + vv1 <- ncp1:(ncp1p - 1L) + iss1vv <- iss1[vv1] + } + # Extract indices for the 2nd component + ncp2 <- newColPos2[j] + ncp2p <- newColPos2[j + 1L] + cond2 <- ncp2 < ncp2p + if (cond2) { + vv2 <- ncp2:(ncp2p - 1L) + iss2vv <- iss2[vv2] + } + + # Extract indices for the 3rd component + ncp3 <- newColPos3[j] + ncp3p <- newColPos3[j + 1L] + cond3 <- ncp3 < ncp3p + if (cond3) { + vv3 <- ncp3:(ncp3p - 1L) + iss3vv <- iss3[vv3] + } + + # Create a unique, sorted set of row indices from all provided matrices. + u <- sort(igraph::union(igraph::union(if (cond1) { + iss1vv + }, if (cond2) { + iss2vv + }), if (cond3) { + iss3vv + })) + + # If any relationships exist for this individual, build the related pairs. + if (cond1 || cond2 || cond3) { + ID1 <- ids[u] + tds <- data.frame(ID1 = ID1, ID2 = ID2) + tds[[relNames[1]]] <- 0 + tds[[relNames[2]]] <- 0 + tds[[relNames[3]]] <- 0 + + # Assign the relationship coefficients from each matrix. + if (cond1) { + tds[u %in% iss1vv, relNames[1]] <- x1[vv1] + } + if (cond2) { + tds[u %in% iss2vv, relNames[2]] <- x2[vv2] + } + if (cond3) { + tds[u %in% iss3vv, relNames[3]] <- x3[vv3] + } + + # Optionally drop upper-triangular entries. + if (drop_upper_triangular == TRUE) { + tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle + } + + # Write the batch to disk or accumulate in the data frame. + if (nrow(tds) > 0) { + if (writetodisk == TRUE) { + write_buffer[[length(write_buffer) + 1]] <- tds + + if (length(write_buffer) >= write_buffer_size) { # Write in batches + utils::write.table(do.call(rbind, write_buffer), + file = rel_pairs_file, + row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," + ) + write_buffer <- list() + } + } else { + df_relpairs <- rbind(df_relpairs, tds) + } + } + } + if (verbose && (j %% update_rate == 0L)) { + cat("Done with", j, "of", nc, "\n") + } + } + } else if (sum_nulls == 2) { + # --- Case: Two Matrices Provided --- + # Set pointers and relationship names based on which matrix is missing. + + if (is.null(ad_ped_matrix)) { + newColPos1 <- mt_p + iss1 <- mt_i + x1 <- mt_x + newColPos2 <- cn_p + iss2 <- cn_i + x2 <- cn_x + relNames <- c("mitRel", "cnuRel") + if (gc == TRUE) { + remove(mt_p, mt_i, mt_x, cn_p, cn_i, cn_x) + } + } + if (is.null(mit_ped_matrix)) { + newColPos1 <- ad_ped_p + iss1 <- ad_ped_i + x1 <- ad_ped_x + newColPos2 <- cn_p + iss2 <- cn_i + x2 <- cn_x + relNames <- c("addRel", "cnuRel") + if (gc == TRUE) { + remove(ad_ped_p, ad_ped_i, ad_ped_x, cn_p, cn_i, cn_x) + } + } + if (is.null(cn_ped_matrix)) { + newColPos1 <- ad_ped_p + iss1 <- ad_ped_i + x1 <- ad_ped_x + newColPos2 <- mt_p + iss2 <- mt_i + x2 <- mt_x + relNames <- c("addRel", "mitRel") + if (gc == TRUE) { + remove(ad_ped_p, ad_ped_i, ad_ped_x, mt_p, mt_i, mt_x) + } + } + + # Initialize the related pairs file with the appropriate headers. + df_relpairs <- initialize_empty_df(relNames = relNames) + + if (writetodisk == TRUE) { + utils::write.table( + df_relpairs, + file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE + ) + # initial buffer + write_buffer <- list() + remove(df_relpairs) + } + + # Process each column to extract relationships. + for (j in 1L:nc) { + ID2 <- ids[j] + + # Extract indices from the first matrix. + ncp1 <- newColPos1[j] + ncp1p <- newColPos1[j + 1L] + cond1 <- ncp1 < ncp1p + if (cond1) { + vv1 <- ncp1:(ncp1p - 1L) + iss1vv <- iss1[vv1] + } + # Extract indices from the second matrix. + ncp2 <- newColPos2[j] + ncp2p <- newColPos2[j + 1L] + cond2 <- ncp2 < ncp2p + if (cond2) { + vv2 <- ncp2:(ncp2p - 1L) + iss2vv <- iss2[vv2] + } + + # Merge the indices from both matrices. + u <- sort(igraph::union(if (cond1) { + iss1vv + }, if (cond2) { + iss2vv + })) + + # Create related pairs if relationships are found. + if (cond1 || cond2) { + ID1 <- ids[u] + tds <- data.frame(ID1 = ID1, ID2 = ID2) + tds[[relNames[1]]] <- 0 + tds[[relNames[2]]] <- 0 + + if (cond1) { + tds[u %in% iss1vv, relNames[1]] <- x1[vv1] + } + if (cond2) { + tds[u %in% iss2vv, relNames[2]] <- x2[vv2] + } + if (drop_upper_triangular == TRUE) { + tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle + } + + # Write the batch to disk or accumulate in the data frame. + if (nrow(tds) > 0) { + if (writetodisk == TRUE) { + write_buffer[[length(write_buffer) + 1]] <- tds + + if (length(write_buffer) >= write_buffer_size) { # Write in batches + utils::write.table(do.call(rbind, write_buffer), + file = rel_pairs_file, + row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," + ) + write_buffer <- list() + } + } else { + df_relpairs <- rbind(df_relpairs, tds) + } + } + } + if (verbose && (j %% update_rate == 0L)) { + cat("Done with", j, "of", nc, "\n") + } + } + } else if (sum_nulls == 1) { + # --- Case: Only One Matrix Provided --- + if (verbose) { + message("Only one matrix is present") + } + if (!is.null(ad_ped_matrix)) { + newColPos1 <- ad_ped_p + iss1 <- ad_ped_i + x1 <- ad_ped_x + relNames <- c("addRel") + if (gc == TRUE) { + remove(ad_ped_p, ad_ped_i, ad_ped_x) + } + } + if (!is.null(mit_ped_matrix)) { + newColPos1 <- mt_p + iss1 <- mt_i + x1 <- mt_x + relNames <- c("mitRel") + if (gc == TRUE) { + remove(mt_p, mt_i, mt_x) + } + } + if (!is.null(cn_ped_matrix)) { + newColPos1 <- cn_p + iss1 <- cn_i + x1 <- cn_x + relNames <- c("cnuRel") + if (gc == TRUE) { + remove(cn_p, cn_i, cn_x) + } + } + + # Initialize the related pairs file. + df_relpairs <- initialize_empty_df(relNames = relNames) + + if (writetodisk == TRUE) { + utils::write.table( + df_relpairs, + file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE + ) + + # initial buffer + write_buffer <- list() + + remove(df_relpairs) + } + + # Process each column. + for (j in 1L:nc) { + ID2 <- ids[j] + # Extract column indices + ncp1 <- newColPos1[j] + ncp1p <- newColPos1[j + 1L] + cond1 <- ncp1 < ncp1p + if (cond1) { + vv1 <- ncp1:(ncp1p - 1L) + iss1vv <- iss1[vv1] + } + + # Use the indices from the single matrix. + u <- sort(iss1vv) + + if (cond1) { + ID1 <- ids[u] + tds <- data.frame(ID1 = ID1, ID2 = ID2) + tds[[relNames[1]]] <- 0 + + if (cond1) { + tds[u %in% iss1vv, relNames[1]] <- x1[vv1] + } + if (drop_upper_triangular == TRUE) { + tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle + } + + # Write the batch to disk or accumulate in the data frame. + if (nrow(tds) > 0) { + if (writetodisk == TRUE) { + write_buffer[[length(write_buffer) + 1]] <- tds + + if (length(write_buffer) >= write_buffer_size) { # Write in batches + utils::write.table(do.call(rbind, write_buffer), + file = rel_pairs_file, + row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," + ) + write_buffer <- list() + } + } else { + df_relpairs <- rbind(df_relpairs, tds) + } + } + } + if (verbose && (j %% update_rate == 0L)) { + cat("Done with", j, "of", nc, "\n") + } + } + } else { + stop("No matrices provided") + } + + # If not writing to disk, return the accumulated data frame. + if (writetodisk == FALSE) { + return(df_relpairs) + } else { + # Write any remaining buffered rows. + if (length(write_buffer) > 0) { + utils::write.table(do.call(rbind, write_buffer), + file = rel_pairs_file, + row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," + ) + } + # return(NULL) + } + } else if (legacy) { + # --- Legacy Mode --- + # In legacy mode, convert matrices to the expected symmetric formats. + com2links.og( + rel_pairs_file = rel_pairs_file, + ad_ped_matrix = ad_ped_matrix, + mit_ped_matrix = mit_ped_matrix, + cn_ped_matrix = cn_ped_matrix, + update_rate = update_rate, + verbose = verbose, + outcome_name = outcome_name + ) + return(NULL) + } + + # --- End of Legacy Mode --- + + # Merge and write the parentage matrices + # df <- full_join(mat_ped_matrix %>% arrange(ID), pat_ped_matrix %>% arrange(ID)) + + # write.table(df, file = mapa_id_file, sep = ",", append = FALSE, row.names = FALSE) +} + +#' Convert Pedigree Matrices to Related Pairs File (Legacy) +#' @description +#' This legacy function converts pedigree matrices into a related pairs file. +#' @inheritParams com2links +#' @keywords internal + + +com2links.og <- function( + rel_pairs_file = "dataRelatedPairs.csv", + ad_ped_matrix = NULL, + mit_ped_matrix = mt_ped_matrix, + mt_ped_matrix = NULL, + cn_ped_matrix = NULL, + update_rate = 500, + verbose = FALSE, + outcome_name = "data", + ...) { + # --- Legacy Mode --- + if (verbose) { + message("Using legacy mode") + } + # In legacy mode, convert matrices to the expected symmetric formats. + + # load(paste0(outcome_name,'_dataBiggestCnPedigree.Rdata')) + # biggestCnPed <- methods::as(biggestCnPed, "symmetricMatrix") + # load(paste0(outcome_name,'_dataBiggestPedigree.Rdata')) + # load(paste0(outcome_name,'_dataBiggestMtPedigree.Rdata')) + + # rel_pairs_file <- paste0(outcome_name,'_datacnmitBiggestRelatedPairsTake3.csv') + + biggestMtPed <- mit_ped_matrix + remove(mit_ped_matrix) + biggestCnPed <- methods::as(cn_ped_matrix, "symmetricMatrix") + remove(cn_ped_matrix) + biggestPed <- ad_ped_matrix + remove(ad_ped_matrix) + biggestMtPed@x[biggestMtPed@x > 0] <- 1 + + # Set the output file name. + if (exists("rel_pairs_file")) { + fname <- rel_pairs_file + } else { + fname <- paste0(outcome_name, "_dataBiggestRelatedPairsTake2.csv") + } + # Initialize the output file with headers. + ds <- data.frame( + ID1 = numeric(0), ID2 = numeric(0), + addRel = numeric(0), + mitRel = numeric(0), cnuRel = numeric(0) + ) + + utils::write.table(ds, + file = fname, sep = ",", + append = FALSE, row.names = FALSE + ) + + # Extract IDs from the common nuclear matrix. + ids <- as.numeric(dimnames(biggestCnPed)[[1]]) + + # Extract pointers from the legacy matrices. + newColPos1 <- biggestPed@p + 1L + iss1 <- biggestPed@i + 1L + newColPos2 <- biggestMtPed@p + 1L + iss2 <- biggestMtPed@i + 1L + newColPos3 <- biggestCnPed@p + 1L + iss3 <- biggestCnPed@i + 1L + nc <- ncol(biggestPed) + + # Process each individual. + for (j in 1L:nc) { + ID2 <- ids[j] + ncp1 <- newColPos1[j] + ncp1p <- newColPos1[j + 1L] + cond1 <- ncp1 < ncp1p + if (cond1) { + vv1 <- ncp1:(ncp1p - 1L) + iss1vv <- iss1[vv1] + } + ncp2 <- newColPos2[j] + ncp2p <- newColPos2[j + 1L] + cond2 <- ncp2 < ncp2p + if (cond2) { + vv2 <- ncp2:(ncp2p - 1L) + iss2vv <- iss2[vv2] + } + ncp3 <- newColPos3[j] + ncp3p <- newColPos3[j + 1L] + cond3 <- ncp3 < ncp3p + if (cond3) { + vv3 <- ncp3:(ncp3p - 1L) + iss3vv <- iss3[vv3] + } + + # Merge indices from all three matrices. + u <- sort(igraph::union(igraph::union(if (cond1) { + iss1vv + }, if (cond2) { + iss2vv + }), if (cond3) { + iss3vv + })) + # browser() + if (cond1 || cond2 || cond3) { + ID1 <- ids[u] + tds <- data.frame( + ID1 = ID1, ID2 = ID2, + addRel = 0, mitRel = 0, cnuRel = 0 + ) + if (cond1) { + tds$addRel[u %in% iss1vv] <- biggestPed@x[vv1] + } + if (cond2) { + tds$mitRel[u %in% iss2vv] <- biggestMtPed@x[vv2] + } + if (cond3) { + tds$cnuRel[u %in% iss3vv] <- biggestCnPed@x[vv3] + } + utils::write.table(tds, + file = fname, row.names = FALSE, + col.names = FALSE, append = TRUE, sep = "," + ) + } + if (!(j %% update_rate)) { + cat(paste0("Done with ", j, " of ", nc, "\n")) + } + } + return(NULL) +} + diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R index 51bdcb49..52cc13c1 100644 --- a/tests/testthat/test-makeLinks.R +++ b/tests/testthat/test-makeLinks.R @@ -7,10 +7,9 @@ test_that("com2links handles missing matrices properly", { - test_that("com2links rejects invalid matrix types", { fake_matrix <- data.frame(A = c(1, 2), B = c(3, 4)) - expect_error(com2links(ad_ped_matrix = fake_matrix), "The 'ad_ped_matrix' must be a matrix or dgCMatrix.") + expect_error(com2links(ad_ped_matrix = fake_matrix), "The 'ad_ped_matrix' must be a matrix or generalMatrix") }) test_that("com2links produces correct output with a single relationship matrix (hazard dataset)", { @@ -27,12 +26,12 @@ test_that("com2links produces correct output with a single relationship matrix ( test_that("com2links produces correct output with cn_ped_matrix", { data(ASOIAF) - cn_ped_matrix <- ped2mit(ASOIAF, sparse = TRUE) + cn_ped_matrix <- ped2cn(ASOIAF, sparse = TRUE) result <- com2links(cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE) expect_true(is.data.frame(result)) - expect_true(all(c("ID1", "ID2", "cnRel") %in% colnames(result))) + expect_true(all(c("ID1", "ID2", "cnuRel") %in% colnames(result))) expect_equal(ncol(result), 3) # Expect ID1, ID2, and addRel expect_true(all(result$cnRel >= 0)) # Relatedness values should be non-negative }) @@ -65,7 +64,47 @@ test_that("com2links processes multiple matrices correctly (hazard dataset)", { expect_true(all(result$cnuRel >= 0)) }) +test_that("com2links processes creates same length for cn with 3, 2, and 1 matrices are used", { + data(hazard) + ad_ped_matrix <- ped2add(hazard, sparse = TRUE) + mit_ped_matrix <- ped2mit(hazard, sparse = TRUE) + cn_ped_matrix <- ped2cn(hazard, sparse = TRUE) + + result3 <- com2links(ad_ped_matrix = ad_ped_matrix, mit_ped_matrix = mit_ped_matrix, cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE) + + expect_true(is.data.frame(result3)) + expect_true(all(c("ID1", "ID2", "addRel", "mitRel", "cnuRel") %in% colnames(result3))) + expect_equal(ncol(result3), 5) # Expect ID1, ID2, addRel, mitRel, and cnuRel + expect_true(all(result3$addRel >= 0)) + expect_true(all(result3$mitRel %in% c(0, 1))) # Mitochondrial should be binary + expect_true(all(result3$cnuRel >= 0)) + + result2 <- com2links(ad_ped_matrix = ad_ped_matrix, cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE) + expect_true(is.data.frame(result2)) + expect_true(all(c("ID1", "ID2", "addRel", "cnuRel") %in% colnames(result2))) + expect_equal(ncol(result2), 4) # Expect ID1, ID2, addRel, and cnuRel + expect_true(all(result2$addRel >= 0)) + expect_true(all(result2$cnuRel >= 0)) + + expect_equal(result3$cnuRel,result2$cnuRel) + + result1 <- com2links(cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE) + result1_legacy <- com2links.legacy(cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE) + expect_true(is.data.frame(result1)) + expect_true(is.data.frame(result1_legacy)) + expect_true(all(c("ID1", "ID2", "cnuRel") %in% colnames(result1))) + expect_true(all(c("ID1", "ID2", "cnuRel") %in% colnames(result1_legacy))) + expect_equal(ncol(result1), 3) # Expect ID1, ID2, and cnuRel + expect_equal(ncol(result1_legacy), 3) # Expect ID1, ID2, and cnuRel + expect_true(all(result1$cnuRel >= 0)) + expect_true(all(result1_legacy$cnuRel >= 0)) + expect_equal(result3$cnuRel,result1$cnuRel) + expect_equal(result3$cnuRel,result1_legacy$cnuRel) + expect_equal(result2$cnuRel,result1$cnuRel) + expect_equal(result2$cnuRel,result1_legacy$cnuRel) + expect_equal(result1$cnuRel,result1_legacy$cnuRel) +}) test_that("com2links written version matchs", { data(hazard) ad_ped_matrix <- ped2com(hazard, component = "additive", adjacency_method = "direct", sparse = TRUE) @@ -236,7 +275,7 @@ test_that("com2links correctly handles missing matrices", { "At least one of 'ad_ped_matrix', 'mit_ped_matrix', or 'cn_ped_matrix' must be provided." ) - expect_error(com2links(ad_ped_matrix = hazard), "The 'ad_ped_matrix' must be a matrix or dgCMatrix.") + expect_error(com2links(ad_ped_matrix = hazard), "The 'ad_ped_matrix' must be a matrix or generalMatrix") }) test_that("com2links correctly processes inbreeding dataset", { @@ -281,14 +320,16 @@ test_that("com2links handles large batch writing correctly", { sexR <- 0.5 df_fam <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) - ad_ped_matrix <- ped2add(df_fam, sparse = TRUE) + cn_ped_matrix <- ped2cn(df_fam, sparse = TRUE) temp_file <- tempfile(fileext = ".csv") - com2links(ad_ped_matrix = ad_ped_matrix, rel_pairs_file = temp_file, writetodisk = TRUE, verbose = TRUE) + com2links(cn_ped_matrix = cn_ped_matrix, rel_pairs_file = temp_file, writetodisk = TRUE, verbose = TRUE) expect_true(file.exists(temp_file)) written_data <- read.csv(temp_file) expect_true(nrow(written_data) > 1000) # Ensuring batch writing logic works + expect_true(file.remove(temp_file)) + }) test_that("com2links garbage collection does not affect output, using two components", { From 8039f67956e256ef2dd282eba34e4b635856246c Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Thu, 10 Apr 2025 15:16:44 -0400 Subject: [PATCH 14/35] more tests and exploring option to write out all values --- R/makeLinks.R | 46 ++++++++++++++++++++++++------ man/com2links.Rd | 3 ++ man/com2links.legacy.Rd | 6 ++-- man/com2links.og.Rd | 2 +- man/process_one.Rd | 45 +++++++++++++++++++++++++++++ tests/testthat/test-makeLinks.R | 12 ++++---- tests/testthat/test-plotPedigree.R | 17 +++++++++++ 7 files changed, 112 insertions(+), 19 deletions(-) create mode 100644 man/process_one.Rd diff --git a/R/makeLinks.R b/R/makeLinks.R index 0156717c..46e7d743 100644 --- a/R/makeLinks.R +++ b/R/makeLinks.R @@ -17,6 +17,7 @@ #' @param legacy Logical. If TRUE, uses the legacy branch of the function. #' @param outcome_name Character string representing the outcome name (used in file naming). #' @param drop_upper_triangular Logical. If TRUE, drops the upper triangular portion of the matrix. +#' @param include_all_links_1ped Logical. If TRUE, includes all links in the output. (Default is true when only one ped is provided) #' @param ... Additional arguments to be passed to \code{\link{com2links}} #' #' @return A data frame of related pairs if \code{writetodisk} is FALSE; otherwise, writes the results to disk. @@ -39,6 +40,7 @@ com2links <- function( legacy = FALSE, outcome_name = "data", drop_upper_triangular = TRUE, + include_all_links_1ped=FALSE, ...) { # --- Input Validations and Preprocessing --- @@ -118,6 +120,7 @@ com2links <- function( update_rate = update_rate, verbose = verbose, gc = gc, + include_all_links = include_all_links_1ped, ... ), "mt" = process_one( @@ -132,6 +135,7 @@ com2links <- function( update_rate = update_rate, verbose = verbose, gc = gc, + include_all_links = include_all_links_1ped, ... ), "cn" = process_one( @@ -146,6 +150,7 @@ com2links <- function( update_rate = update_rate, verbose = verbose, gc = gc, + include_all_links = include_all_links_1ped, ... ), "ad-mt" = process_two( @@ -217,13 +222,17 @@ com2links <- function( stop("Unsupported matrix combination") ) } -#' Convert Sparse Relationship Matrices to Kinship Links +#' Convert Sparse Relationship Matrices to Kinship Links for one Matrix #' @inheritParams com2links +#' @param include_all_links Logical. If TRUE, all links are included in the output. #' @keywords internal -process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, write_buffer_size, drop_upper_triangular, update_rate, verbose, gc, ...) { +process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, + write_buffer_size, drop_upper_triangular, update_rate, verbose, gc, + include_all_links=TRUE, ...) { + if (include_all_links == FALSE) { # Extract pointers and indices from the matrix. newColPos <- matrix@p + 1L iss <- matrix@i + 1L @@ -245,7 +254,8 @@ process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, # Process each column in the matrix. for (j in 1L:nc) { - ID2 <- ids[j] + + ID2 <- ids[j] # Extract column indices ncp <- newColPos[j] @@ -261,10 +271,10 @@ process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, # If any relationships exist for this individual, build the related pairs. if (cond) { - ID1 <- ids[u] - tds <- data.frame(ID1 = ID1, ID2 = ID2) - tds[[rel_name]] <- 0 - + # Create a data frame with unique pairs. + ID1 <- ids[u] + tds <- data.frame(ID1 = ID1, ID2 = ID2) + tds[[rel_name]] <- 0 if (cond) { tds[u %in% issvv, rel_name] <- x[vv] } @@ -307,6 +317,26 @@ process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, } if (gc == TRUE) { remove(newColPos, iss, x) + } + }else{ + matrix2= matrix(rep(1,length(ids)^2), + nrow = length(ids), + dimnames = list(ids, ids)) + process_two(matrix2=matrix, name2=rel_name, + matrix1=methods::as(matrix2,"CsparseMatrix"), + name1="phantom", + ids=ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc) + + + } } @@ -524,7 +554,7 @@ process_two <- function( #' @return The validated and converted matrix. validate_and_convert_matrix <- function(mat, name, ensure_symmetric = FALSE, force_binary = FALSE) { if (!inherits(mat, c("matrix", "dgCMatrix", "dsCMatrix","generalMatrix", - "symmetricMatrix", "triangularMatrix", "dsyMatrix", "dspMatrix", "dsyMatrix"))) { + "symmetricMatrix", "triangularMatrix", "dsyMatrix", "dspMatrix", "dsyMatrix",'CsparseMatrix'))) { stop(paste0("The '", name, "' must be a matrix or generalMatrix")) } if (!inherits(mat, "generalMatrix")) { diff --git a/man/com2links.Rd b/man/com2links.Rd index 9dae2f09..dc846645 100644 --- a/man/com2links.Rd +++ b/man/com2links.Rd @@ -18,6 +18,7 @@ com2links( legacy = FALSE, outcome_name = "data", drop_upper_triangular = TRUE, + include_all_links_1ped = FALSE, ... ) } @@ -48,6 +49,8 @@ com2links( \item{drop_upper_triangular}{Logical. If TRUE, drops the upper triangular portion of the matrix.} +\item{include_all_links_1ped}{Logical. If TRUE, includes all links in the output. (Default is true when only one ped is provided)} + \item{...}{Additional arguments to be passed to \code{\link{com2links}}} } \value{ diff --git a/man/com2links.legacy.Rd b/man/com2links.legacy.Rd index bf4bc6e4..4467dc85 100644 --- a/man/com2links.legacy.Rd +++ b/man/com2links.legacy.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/makeLinks.R +% Please edit documentation in R/makeLinkslegacy.R \name{com2links.legacy} \alias{com2links.legacy} \title{Convert Sparse Relationship Matrices to Kinship Links} @@ -51,8 +51,6 @@ com2links.legacy( \item{...}{Additional arguments to be passed to \code{\link{com2links}}} } \description{ -This function processes one or more sparse relationship components (additive, mitochondrial, -and common nuclear) and converts them into kinship link pairs. The resulting related pairs are -either returned as a data frame or written to disk in CSV format. +Convert Sparse Relationship Matrices to Kinship Links } \keyword{internal} diff --git a/man/com2links.og.Rd b/man/com2links.og.Rd index b6c3d71b..e51c53cd 100644 --- a/man/com2links.og.Rd +++ b/man/com2links.og.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/makeLinks.R +% Please edit documentation in R/makeLinkslegacy.R \name{com2links.og} \alias{com2links.og} \title{Convert Pedigree Matrices to Related Pairs File (Legacy)} diff --git a/man/process_one.Rd b/man/process_one.Rd new file mode 100644 index 00000000..0d57de2b --- /dev/null +++ b/man/process_one.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/makeLinks.R +\name{process_one} +\alias{process_one} +\title{Convert Sparse Relationship Matrices to Kinship Links for one Matrix} +\usage{ +process_one( + matrix, + rel_name, + ids, + nc, + rel_pairs_file, + writetodisk, + write_buffer_size, + drop_upper_triangular, + update_rate, + verbose, + gc, + include_all_links = TRUE, + ... +) +} +\arguments{ +\item{rel_pairs_file}{File path to write related pairs to (CSV format).} + +\item{writetodisk}{Logical. If TRUE, writes the related pairs to disk; if FALSE, returns a data frame.} + +\item{write_buffer_size}{Number of related pairs to write to disk at a time.} + +\item{drop_upper_triangular}{Logical. If TRUE, drops the upper triangular portion of the matrix.} + +\item{update_rate}{Numeric. Frequency (in iterations) at which progress messages are printed.} + +\item{verbose}{Logical. If TRUE, prints progress messages.} + +\item{gc}{Logical. If TRUE, performs garbage collection via \code{\link{gc}} to free memory.} + +\item{include_all_links}{Logical. If TRUE, all links are included in the output.} + +\item{...}{Additional arguments to be passed to \code{\link{com2links}}} +} +\description{ +Convert Sparse Relationship Matrices to Kinship Links for one Matrix +} +\keyword{internal} diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R index 52cc13c1..e23b2390 100644 --- a/tests/testthat/test-makeLinks.R +++ b/tests/testthat/test-makeLinks.R @@ -98,11 +98,11 @@ test_that("com2links processes creates same length for cn with 3, 2, and 1 matri expect_equal(ncol(result1_legacy), 3) # Expect ID1, ID2, and cnuRel expect_true(all(result1$cnuRel >= 0)) expect_true(all(result1_legacy$cnuRel >= 0)) - expect_equal(result3$cnuRel,result1$cnuRel) - expect_equal(result3$cnuRel,result1_legacy$cnuRel) - expect_equal(result2$cnuRel,result1$cnuRel) - expect_equal(result2$cnuRel,result1_legacy$cnuRel) - expect_equal(result1$cnuRel,result1_legacy$cnuRel) + expect_equal(result3$cnuRel[result3$cnuRel==1],result1$cnuRel[result1$cnuRel==1]) + expect_equal(result3$cnuRel[result3$cnuRel==1],result1_legacy$cnuRel[result1_legacy$cnuRel==1]) + expect_equal(result2$cnuRel[result2$cnuRel==1],result1$cnuRel[result1$cnuRel==1]) + expect_equal(result2$cnuRel[result2$cnuRel==1],result1_legacy$cnuRel[result1_legacy$cnuRel==1]) + expect_equal(result1$cnuRel[result1$cnuRel==1],result1_legacy$cnuRel[result1_legacy$cnuRel==1]) }) test_that("com2links written version matchs", { @@ -327,7 +327,7 @@ test_that("com2links handles large batch writing correctly", { expect_true(file.exists(temp_file)) written_data <- read.csv(temp_file) - expect_true(nrow(written_data) > 1000) # Ensuring batch writing logic works + expect_true(nrow(written_data) == 155) # Ensuring batch writing logic works expect_true(file.remove(temp_file)) }) diff --git a/tests/testthat/test-plotPedigree.R b/tests/testthat/test-plotPedigree.R index 4944bcdb..2c5199d7 100644 --- a/tests/testthat/test-plotPedigree.R +++ b/tests/testthat/test-plotPedigree.R @@ -32,3 +32,20 @@ test_that("pedigree plots correctly with affected variables", { # file.remove("Rplots.pdf") }) # file.remove("Rplots.pdf") + +test_that("pedigree errs when affected variables named", { +data(inbreeding) + + expect_error(plotPedigree(data, verbose = TRUE, affected = "affected")) + + +}) + + +test_that("pedigree plots multiple families", { + data(inbreeding) + + expect_output(plotPedigree(inbreeding, verbose = TRUE)) + + +}) From 728d56030249f8497eecb66c38a95df7d63c3261 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Fri, 11 Apr 2025 09:20:00 -0400 Subject: [PATCH 15/35] additional aliases --- NAMESPACE | 4 ++++ NEWS.md | 1 + R/calculateFamilySize.R | 23 +++++++++++++------ R/helpGeneric.R | 14 +++++++---- R/helpPedigree.R | 6 ++++- R/insertEven.R | 6 ++++- R/simulatePedigree.R | 14 ++++------- R/tweakPedigree.R | 2 +- man/assignCoupleIds.Rd | 5 +++- man/{allGens.Rd => calcAllGens.Rd} | 5 +++- man/{famSizeCal.Rd => calcFamilySize.Rd} | 5 +++- ...{sizeAllGens.Rd => calcFamilySizeByGen.Rd} | 5 +++- man/{evenInsert.Rd => insertEven.Rd} | 5 +++- man/{nullToNA.Rd => null2NA.Rd} | 5 +++- man/{try_na.Rd => tryNA.Rd} | 5 +++- 15 files changed, 75 insertions(+), 30 deletions(-) rename man/{allGens.Rd => calcAllGens.Rd} (91%) rename man/{famSizeCal.Rd => calcFamilySize.Rd} (90%) rename man/{sizeAllGens.Rd => calcFamilySizeByGen.Rd} (86%) rename man/{evenInsert.Rd => insertEven.Rd} (93%) rename man/{nullToNA.Rd => null2NA.Rd} (86%) rename man/{try_na.Rd => tryNA.Rd} (89%) diff --git a/NAMESPACE b/NAMESPACE index 47496f84..972282d4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,8 @@ export(SimPed) export(allGens) +export(calcAllGens) +export(calcFamilySize) export(calculateRelatedness) export(checkIDs) export(checkParentIDs) @@ -17,6 +19,7 @@ export(famSizeCal) export(fitComponentModel) export(identifyComponentModel) export(inferRelatedness) +export(insertEven) export(makeInbreeding) export(makeTwins) export(parseTree) @@ -38,6 +41,7 @@ export(relatedness) export(repairSex) export(resample) export(simulatePedigree) +export(sizeAllGens) export(summarizeFamilies) export(summarizeMatrilines) export(summarizePatrilines) diff --git a/NEWS.md b/NEWS.md index 94602bd7..fd941969 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,7 @@ * reduced complexity of com2links, summarizePedigree, and checkIDs with the use of subfunctions * allow verbose argument to be passed to standardizeColnames * list SimPed and related_coef as aliases for functions +* harmonizing function names like calcFamilySize from famSizeCal # BGmisc 1.3.5.1 * Setting the default for the `sparse` argument in `ped2com()` to TRUE diff --git a/R/calculateFamilySize.R b/R/calculateFamilySize.R index 51365d85..06cc3f16 100644 --- a/R/calculateFamilySize.R +++ b/R/calculateFamilySize.R @@ -5,7 +5,7 @@ #' @param marR Mating rate (numeric value ranging from 0 to 1). #' @return Returns a vector containing the number of individuals in every generation. #' @export -allGens <- function(kpc, Ngen, marR) { +calcAllGens <- function(kpc, Ngen, marR) { # Check if the number of generations is valid if (Ngen < 1) { stop("The number of generations should be an integer greater or equal than 1") @@ -23,14 +23,16 @@ allGens <- function(kpc, Ngen, marR) { } return(allGens) } - +#' @rdname calcAllGens +#' @export +allGens <- calcAllGens #' sizeAllGens #' An internal supporting function for \code{simulatePedigree}. -#' @inheritParams allGens +#' @inheritParams calcAllGens #' @return Returns a vector including the number of individuals in every generation. -sizeAllGens <- function(kpc, Ngen, marR) { +calcFamilySizeByGen <- function(kpc, Ngen, marR) { Nmid <- Ngen - 2 midGens <- numeric(length = Nmid) @@ -46,14 +48,16 @@ sizeAllGens <- function(kpc, Ngen, marR) { # print(allGens) return(allGens) } - +#' @rdname calcFamilySizeByGen +#' @export +sizeAllGens <- calcFamilySizeByGen #' famSizeCal #' A function to calculate the total number of individuals in a pedigree given parameters. This is a supporting function for function \code{simulatePedigree} -#' @inheritParams allGens +#' @inheritParams calcAllGens #' @return Returns a numeric value indicating the total pedigree size. #' @export -famSizeCal <- function(kpc, Ngen, marR) { +calcFamilySize <- function(kpc, Ngen, marR) { if (Ngen < 1) { stop("The number of generations should be an integer greater than or equal to 1") } else if (Ngen == 1) { @@ -71,3 +75,8 @@ famSizeCal <- function(kpc, Ngen, marR) { } return(size) } + +#' @rdname calcFamilySize +#' @export +#' +famSizeCal <- calcFamilySize diff --git a/R/helpGeneric.R b/R/helpGeneric.R index 8dae2b2c..4e1d8ca9 100644 --- a/R/helpGeneric.R +++ b/R/helpGeneric.R @@ -24,10 +24,9 @@ rmvn <- function(n, sigma) { #' @param x vector of any length #' @return replaces null values in a vector to NA #' -nullToNA <- function(x) { +null2NA <- function(x) { if (length(x) == 0) { x <- NA - # Handle case when x is a list } else if (is.list(x)) { for (i in seq_along(x)) { @@ -39,6 +38,9 @@ nullToNA <- function(x) { return(x) } +#' @rdname null2NA +#' +nullToNA <- null2NA #' modified tryCatch function #' @@ -46,10 +48,14 @@ nullToNA <- function(x) { #' @keywords internal #' @return Fuses the nullToNA function with efunc #' -try_na <- function(x) { - nullToNA(tryCatch(x, error = efunc)) +tryNA <- function(x) { + null2NA(tryCatch(x, error = efunc)) } +#' @rdname tryNA +#' @keywords internal +#' +try_na <- tryNA #' Compute the null space of a matrix #' #' @param M a matrix of which the null space is desired diff --git a/R/helpPedigree.R b/R/helpPedigree.R index 7b6bb4be..3d054d4b 100644 --- a/R/helpPedigree.R +++ b/R/helpPedigree.R @@ -60,7 +60,7 @@ determineSex <- function(idGen, sexR) { #' #' @param df_Ngen The dataframe for the current generation, including columns for individual IDs and spouse IDs. #' @return The input dataframe augmented with a 'coupleId' column, where each mated pair has a unique identifier. -assignCoupleIds <- function(df_Ngen) { +assignCoupleIDs <- function(df_Ngen) { df_Ngen$coupleId <- NA_character_ # Initialize the coupleId column with NAs usedCoupleIds <- character() # Initialize an empty character vector to track used IDs @@ -86,6 +86,10 @@ assignCoupleIds <- function(df_Ngen) { return(df_Ngen) } + +#' @rdname assignCoupleIDs +assignCoupleIds <- assignCoupleIDs + #' Generate or Adjust Number of Kids per Couple Based on Mating Rate #' #' This function generates or adjusts the number of kids per couple in a generation diff --git a/R/insertEven.R b/R/insertEven.R index ba2d55e9..573bd8c9 100644 --- a/R/insertEven.R +++ b/R/insertEven.R @@ -12,7 +12,7 @@ #' @export #' @seealso \code{\link{SimPed}} for the main function that uses this supporting function. -evenInsert <- function(m, n, verbose = FALSE) { +insertEven <- function(m, n, verbose = FALSE) { if (length(m) > length(n)) { temp <- m m <- n @@ -36,3 +36,7 @@ evenInsert <- function(m, n, verbose = FALSE) { return(vec) } + +#' @rdname insertEven +#' @export +evenInsert <- insertEven diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R index 29407af8..8fc88915 100644 --- a/R/simulatePedigree.R +++ b/R/simulatePedigree.R @@ -24,7 +24,6 @@ buildWithinGenerations <- function(sizeGens, marR, sexR, Ngen) { df_Ngen$sex <- determineSex(idGen = idGen, sexR = sexR) - # print(paste("tiger",i)) # The first generation if (i == 1) { @@ -181,11 +180,9 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR, # count the number of couples in the i th gen countCouple <- (nrow(df_Ngen) - sum(is.na(df_Ngen$spID))) * .5 - # Now, assign couple IDs for the current generation df_Ngen <- assignCoupleIds(df_Ngen) - # get the number of linked female and male children after excluding the single children # get a vector of single person id in the ith generation IdSingle <- df_Ngen$id[is.na(df_Ngen$spID)] @@ -194,9 +191,11 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR, SingleM <- sum(df_Ngen$sex == "M" & is.na(df_Ngen$spID)) CoupleM <- N_LinkedMale - SingleM - df_Fam[df_Fam$gen == i, ] <- markPotentialChildren(df_Ngen = df_Ngen, i = i, Ngen = Ngen, sizeGens = sizeGens, CoupleF = CoupleF) - - + df_Fam[df_Fam$gen == i, ] <- markPotentialChildren(df_Ngen = df_Ngen, + i = i, + Ngen = Ngen, + sizeGens = sizeGens, + CoupleF = CoupleF) if (verbose) { print( "Step 2.2: mark a group of potential parents in the i-1 th generation" @@ -251,7 +250,6 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR, # generate link kids to the couples random_numbers <- adjustKidsPerCouple(nMates = sum(df_Ngen$ifparent) / 2, kpc = kpc, rd_kpc = rd_kpc) - # cat("final random numbers",random_numbers, "\n") # cat("mean",sum(random_numbers)/length(random_numbers), "\n") # create two vectors for maId and paId; replicate the ids to match the same length as IdOfp @@ -391,8 +389,6 @@ simulatePedigree <- function(kpc = 3, sizeGens = sizeGens, verbose = verbose, marR = marR, sexR = sexR, kpc = kpc, rd_kpc = rd_kpc ) - - df_Fam <- df_Fam[, 1:7] df_Fam <- df_Fam[!(is.na(df_Fam$pat) & is.na(df_Fam$mat) & is.na(df_Fam$spID)), ] colnames(df_Fam)[c(2, 4, 5)] <- c("ID", "dadID", "momID") diff --git a/R/tweakPedigree.R b/R/tweakPedigree.R index edefaf99..d617851b 100644 --- a/R/tweakPedigree.R +++ b/R/tweakPedigree.R @@ -255,7 +255,7 @@ dropLink <- function(ped, if (!is.na(ID_drop)) { ped[ped$ID %in% ID_drop, c("dadID", "momID")] <- NA_integer_ } else { - warning("No individual is dropped from his/her parents.") + warning("No individual is dropped from their parents.") } } else { ped[ped$ID == ID_drop, c("dadID", "momID")] <- NA_integer_ diff --git a/man/assignCoupleIds.Rd b/man/assignCoupleIds.Rd index 7f379f64..95165581 100644 --- a/man/assignCoupleIds.Rd +++ b/man/assignCoupleIds.Rd @@ -1,9 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/helpPedigree.R -\name{assignCoupleIds} +\name{assignCoupleIDs} +\alias{assignCoupleIDs} \alias{assignCoupleIds} \title{Assign Couple IDs} \usage{ +assignCoupleIDs(df_Ngen) + assignCoupleIds(df_Ngen) } \arguments{ diff --git a/man/allGens.Rd b/man/calcAllGens.Rd similarity index 91% rename from man/allGens.Rd rename to man/calcAllGens.Rd index 6bc6d9e0..66a89c25 100644 --- a/man/allGens.Rd +++ b/man/calcAllGens.Rd @@ -1,10 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/calculateFamilySize.R -\name{allGens} +\name{calcAllGens} +\alias{calcAllGens} \alias{allGens} \title{allGens A function to calculate the number of individuals in each generation. This is a supporting function for \code{simulatePedigree}.} \usage{ +calcAllGens(kpc, Ngen, marR) + allGens(kpc, Ngen, marR) } \arguments{ diff --git a/man/famSizeCal.Rd b/man/calcFamilySize.Rd similarity index 90% rename from man/famSizeCal.Rd rename to man/calcFamilySize.Rd index e51f95ee..a0128d0c 100644 --- a/man/famSizeCal.Rd +++ b/man/calcFamilySize.Rd @@ -1,10 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/calculateFamilySize.R -\name{famSizeCal} +\name{calcFamilySize} +\alias{calcFamilySize} \alias{famSizeCal} \title{famSizeCal A function to calculate the total number of individuals in a pedigree given parameters. This is a supporting function for function \code{simulatePedigree}} \usage{ +calcFamilySize(kpc, Ngen, marR) + famSizeCal(kpc, Ngen, marR) } \arguments{ diff --git a/man/sizeAllGens.Rd b/man/calcFamilySizeByGen.Rd similarity index 86% rename from man/sizeAllGens.Rd rename to man/calcFamilySizeByGen.Rd index e477c19e..ae3e5e88 100644 --- a/man/sizeAllGens.Rd +++ b/man/calcFamilySizeByGen.Rd @@ -1,10 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/calculateFamilySize.R -\name{sizeAllGens} +\name{calcFamilySizeByGen} +\alias{calcFamilySizeByGen} \alias{sizeAllGens} \title{sizeAllGens An internal supporting function for \code{simulatePedigree}.} \usage{ +calcFamilySizeByGen(kpc, Ngen, marR) + sizeAllGens(kpc, Ngen, marR) } \arguments{ diff --git a/man/evenInsert.Rd b/man/insertEven.Rd similarity index 93% rename from man/evenInsert.Rd rename to man/insertEven.Rd index 2dae39c1..7fc08138 100644 --- a/man/evenInsert.Rd +++ b/man/insertEven.Rd @@ -1,10 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/insertEven.R -\name{evenInsert} +\name{insertEven} +\alias{insertEven} \alias{evenInsert} \title{evenInsert A function to insert m elements evenly into a length n vector.} \usage{ +insertEven(m, n, verbose = FALSE) + evenInsert(m, n, verbose = FALSE) } \arguments{ diff --git a/man/nullToNA.Rd b/man/null2NA.Rd similarity index 86% rename from man/nullToNA.Rd rename to man/null2NA.Rd index 4bccb4b3..cb6d7571 100644 --- a/man/nullToNA.Rd +++ b/man/null2NA.Rd @@ -1,9 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/helpGeneric.R -\name{nullToNA} +\name{null2NA} +\alias{null2NA} \alias{nullToNA} \title{nullToNA} \usage{ +null2NA(x) + nullToNA(x) } \arguments{ diff --git a/man/try_na.Rd b/man/tryNA.Rd similarity index 89% rename from man/try_na.Rd rename to man/tryNA.Rd index 0f8fddeb..388fa600 100644 --- a/man/try_na.Rd +++ b/man/tryNA.Rd @@ -1,9 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/helpGeneric.R -\name{try_na} +\name{tryNA} +\alias{tryNA} \alias{try_na} \title{modified tryCatch function} \usage{ +tryNA(x) + try_na(x) } \arguments{ From c48b8aa7027e194589603864a2badac539712b4c Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Fri, 11 Apr 2025 09:24:06 -0400 Subject: [PATCH 16/35] remove package messaging --- .gitignore | 1 + vignettes/ASOIAF.Rmd | 5 +- vignettes/ASOIAF.html | 127 +++++++++++++++++------------------------- 3 files changed, 55 insertions(+), 78 deletions(-) diff --git a/.gitignore b/.gitignore index c2b69572..8faabc57 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ ASOIAF.ged .vscode/launch.json dataRelatedPairs_new2.csv +data-raw/ASOIAF_040725.ged diff --git a/vignettes/ASOIAF.Rmd b/vignettes/ASOIAF.Rmd index 0491b015..ddc07ebe 100644 --- a/vignettes/ASOIAF.Rmd +++ b/vignettes/ASOIAF.Rmd @@ -15,11 +15,12 @@ Just how related are Jon Snow and Daenerys Targaryen? This vignette walks throug We begin by loading the necessary packages and accessing the built-in `ASOIAF` pedigree dataset included with `BGmisc`. -```{r} +```{r echo=TRUE, message=FALSE, warning=FALSE} library(BGmisc) library(tidyverse) data(ASOIAF) ``` + The ASOIAF data contains character IDs, family identifiers, and parent-child links extracted from A Song of Ice and Fire lore. ```{r} @@ -127,7 +128,7 @@ This code creates new IDs for individuals with one known parent and a missing ot ## Visualize the Pedigree -```{r} +```{r, message=FALSE, warning=FALSE} #fixParents(id=df_got$ID, dadid=df_got$dadID, momid=df_got$momID, sex=df_got$sex, missid = NA) diff --git a/vignettes/ASOIAF.html b/vignettes/ASOIAF.html index e6f72f60..709281cc 100644 --- a/vignettes/ASOIAF.html +++ b/vignettes/ASOIAF.html @@ -355,36 +355,11 @@

Load Packages and Data

ASOIAF pedigree dataset included with BGmisc.

library(BGmisc)
-library(tidyverse)
-
## ── Attaching core tidyverse packages ────── tidyverse 2.0.0 ──
-## ✔ dplyr     1.1.4     ✔ readr     2.1.5
-## ✔ forcats   1.0.0     ✔ stringr   1.5.1
-## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
-## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
-## ✔ purrr     1.0.4     
-## ── Conflicts ──────────────────────── tidyverse_conflicts() ──
-## ✖ dplyr::between()     masks BGmisc::between()
-## ✖ dplyr::filter()      masks stats::filter()
-## ✖ dplyr::first()       masks BGmisc::first()
-## ✖ lubridate::hour()    masks BGmisc::hour()
-## ✖ lubridate::isoweek() masks BGmisc::isoweek()
-## ✖ dplyr::lag()         masks stats::lag()
-## ✖ dplyr::last()        masks BGmisc::last()
-## ✖ lubridate::mday()    masks BGmisc::mday()
-## ✖ lubridate::minute()  masks BGmisc::minute()
-## ✖ lubridate::month()   masks BGmisc::month()
-## ✖ lubridate::quarter() masks BGmisc::quarter()
-## ✖ lubridate::second()  masks BGmisc::second()
-## ✖ purrr::transpose()   masks BGmisc::transpose()
-## ✖ lubridate::wday()    masks BGmisc::wday()
-## ✖ lubridate::week()    masks BGmisc::week()
-## ✖ lubridate::yday()    masks BGmisc::yday()
-## ✖ lubridate::year()    masks BGmisc::year()
-## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
-
data(ASOIAF)
+library(tidyverse) +data(ASOIAF)

The ASOIAF data contains character IDs, family identifiers, and parent-child links extracted from A Song of Ice and Fire lore.

-
head(ASOIAF)
+
head(ASOIAF)
##   id famID momID dadID          name sex
 ## 1  1     1    NA    NA   Walder Frey   M
 ## 2  2     1    NA    NA   Perra Royce   F
@@ -398,11 +373,11 @@ 

Prepare and Validate Sex Codes

We use checkSex() to ensure that all individuals have valid sex codes, repairing as needed. This is important for correct pedigree plotting and downstream calculations.

-
df_got <- checkSex(ASOIAF,
-  code_male = 1,
-  code_female = 0,
-  verbose = FALSE, repair = TRUE
-)
+
df_got <- checkSex(ASOIAF,
+  code_male = 1,
+  code_female = 0,
+  verbose = FALSE, repair = TRUE
+)

Compute Relatedness Matrices

@@ -416,18 +391,18 @@

Compute Relatedness Matrices

for the common nuclear matrix. The direct method is much faster. The sparse argument is set to FALSE to return dense matrices.

-
add <- ped2com(df_got,
-  isChild_method = "partialparent",
-  component = "additive",
-  adjacency_method = "direct",
-  sparse = FALSE
-)
-
-cn <- ped2cn(df_got,
-  isChild_method = "partialparent",
-  adjacency_method = "indexed",
-  sparse = FALSE
-)
+
add <- ped2com(df_got,
+  isChild_method = "partialparent",
+  component = "additive",
+  adjacency_method = "direct",
+  sparse = FALSE
+)
+
+cn <- ped2cn(df_got,
+  isChild_method = "partialparent",
+  adjacency_method = "indexed",
+  sparse = FALSE
+)

Convert to Pairwise Format

@@ -437,12 +412,12 @@

Convert to Pairwise Format

relatedness. The function can return the entire matrix or just the lower triangular part, which is often sufficient for our purposes. We set writetodisk = FALSE to keep the data in memory.

-
df_links <- com2links(
-  writetodisk = FALSE,
-  ad_ped_matrix = add, cn_ped_matrix = cn,
-  drop_upper_triangular = TRUE
-)# %>%
-#  filter(ID1 != ID2)
+
df_links <- com2links(
+  writetodisk = FALSE,
+  ad_ped_matrix = add, cn_ped_matrix = cn,
+  drop_upper_triangular = TRUE
+)# %>%
+#  filter(ID1 != ID2)

Locate Jon and Daenerys

@@ -451,22 +426,22 @@

Locate Jon and Daenerys

the df_links dataframe where either ID1 or ID2 corresponds to Jon Snow, and then filter again to find the row where the other ID corresponds to Daenerys Targaryen.

-
# Find the IDs of Jon Snow and Daenerys Targaryen
-
-jon_id <- df_got %>%
-  filter(name == "Jon Snow") %>%
-  pull(ID)
-
-dany_id <- df_got %>%
-  filter(name == "Daenerys Targaryen") %>%
-  pull(ID)
+
# Find the IDs of Jon Snow and Daenerys Targaryen
+
+jon_id <- df_got %>%
+  filter(name == "Jon Snow") %>%
+  pull(ID)
+
+dany_id <- df_got %>%
+  filter(name == "Daenerys Targaryen") %>%
+  pull(ID)

We then filter the pairwise table to retrieve the row containing their relationship.

-
jon_dany_row <- df_links %>%
-  filter(ID1 == jon_id | ID2 == jon_id) %>%
-  filter(ID1 %in% dany_id| ID2 %in% dany_id)
-
-jon_dany_row 
+
jon_dany_row <- df_links %>%
+  filter(ID1 == jon_id | ID2 == jon_id) %>%
+  filter(ID1 %in% dany_id| ID2 %in% dany_id)
+
+jon_dany_row 
##   ID1 ID2     addRel cnuRel
 ## 1 206 211 0.31274414      0
 ## 2 211 304 0.01953125      0
@@ -481,14 +456,14 @@

Plotting the Pedigree with incomplete parental information

To facilitate plotting, we check for individuals with one known parent but a missing other. For those cases, we assign a placeholder ID to the missing parent.

-
df_repaired <- checkParentIDs(df_got,addphantoms=TRUE,
-                              repair=TRUE,
-                              parentswithoutrow=FALSE,
-                              repairsex=FALSE
-                              ) %>% mutate(fam=1,
-                                           affected = case_when(ID %in% c(jon_id,dany_id) ~ 1,
-                                                               TRUE ~ 0)
-                              )
+
df_repaired <- checkParentIDs(df_got,addphantoms=TRUE,
+                              repair=TRUE,
+                              parentswithoutrow=FALSE,
+                              repairsex=FALSE
+                              ) %>% mutate(fam=1,
+                                           affected = case_when(ID %in% c(jon_id,dany_id) ~ 1,
+                                                               TRUE ~ 0)
+                              )
## REPAIR IN EARLY ALPHA

This code creates new IDs for individuals with one known parent and a missing other. It checks if either momID or @@ -498,9 +473,9 @@

Plotting the Pedigree with incomplete parental information

Visualize the Pedigree

-
#fixParents(id=df_got$ID, dadid=df_got$dadID, momid=df_got$momID, sex=df_got$sex, missid = NA)
-
-plotPedigree(df_repaired,affected=df_repaired$affected,verbose=FALSE)
+
#fixParents(id=df_got$ID, dadid=df_got$dadID, momid=df_got$momID, sex=df_got$sex, missid = NA)
+
+plotPedigree(df_repaired,affected=df_repaired$affected,verbose=FALSE)

## Did not plot the following people: 85 88 125 142 228 229 258 259 274 275 305 336 357 381 388 405 409 418 420 424 428 451 487
## named list()
From 9641a042e25ae46be36446baaddf076599deec83 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Fri, 11 Apr 2025 09:28:45 -0400 Subject: [PATCH 17/35] Delete assignCoupleIds.Rd --- man/assignCoupleIds.Rd | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 man/assignCoupleIds.Rd diff --git a/man/assignCoupleIds.Rd b/man/assignCoupleIds.Rd deleted file mode 100644 index 95165581..00000000 --- a/man/assignCoupleIds.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helpPedigree.R -\name{assignCoupleIDs} -\alias{assignCoupleIDs} -\alias{assignCoupleIds} -\title{Assign Couple IDs} -\usage{ -assignCoupleIDs(df_Ngen) - -assignCoupleIds(df_Ngen) -} -\arguments{ -\item{df_Ngen}{The dataframe for the current generation, including columns for individual IDs and spouse IDs.} -} -\value{ -The input dataframe augmented with a 'coupleId' column, where each mated pair has a unique identifier. -} -\description{ -This subfunction assigns a unique couple ID to each mated pair in the generation. -Unmated individuals are assigned NA for their couple ID. -} From 9525061de57fe2111b9cb6ae290bbc7d64a28ccf Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Fri, 11 Apr 2025 17:48:40 -0400 Subject: [PATCH 18/35] allow NAs to behave --- R/checkParents.R | 27 ++++++++++++++++++++++----- R/cleanPedigree.R | 4 ++-- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/R/checkParents.R b/R/checkParents.R index b80669bf..7d142af1 100644 --- a/R/checkParents.R +++ b/R/checkParents.R @@ -145,7 +145,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, # Are any parents in both momID and dadID? momdad <- intersect(ped$dadID, ped$momID) - if (!is.na(momdad) && length(momdad) > 0) { + if (length(momdad) > 0&& !is.na(momdad)) { validation_results$parents_in_both <- momdad if (verbose) { cat(paste( @@ -185,21 +185,38 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, - if (!is.na(validation_results$female_var)) { + if (length(validation_results$female_var) > 0 && !is.na(validation_results$female_var)){ corrected_moms <- ped$ID[mom_indices[!is.na(mom_indices)]] ped$sex[mom_indices[!is.na(mom_indices)]] <- validation_results$female_var changes$corrected_mom_sex <- corrected_moms if (verbose && length(corrected_moms) > 0) { cat("Corrected sex of moms for:", paste(corrected_moms, collapse = ", "), "\n") } + } else { + corrected_moms <- ped$ID[mom_indices[!is.na(mom_indices)]] + ped$sex[mom_indices[!is.na(mom_indices)]] <- 0 + + changes$corrected_mom_sex <- corrected_moms + if (verbose && length(corrected_moms) > 0) { + cat("Corrected sex of moms for:", paste(corrected_moms, collapse = ", "), "\n") + } + } - if (!is.na(validation_results$male_var)) { + if (length(validation_results$male_var) > 0 && !is.na(validation_results$male_var)){ corrected_dads <- ped$ID[dad_indices[!is.na(dad_indices)]] ped$sex[dad_indices[!is.na(dad_indices)]] <- validation_results$male_var changes$corrected_dad_sex <- corrected_dads if (verbose && length(corrected_dads) > 0) { cat("Corrected sex of dads for:", paste(corrected_dads, collapse = ", "), "\n") } + } else { + corrected_dads <- ped$ID[dad_indices[!is.na(dad_indices)]] + ped$sex[dad_indices[!is.na(dad_indices)]] <- 1 + changes$corrected_dad_sex <- corrected_dads + if (verbose && length(corrected_dads) > 0) { + cat("Corrected sex of dads for:", paste(corrected_dads, collapse = ", "), "\n") + } + } } } @@ -218,7 +235,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, new_entry$ID <- new_id new_entry$dadID <- NA new_entry$momID <- NA - new_entry$sex <- validation_results$male_var + new_entry$sex <- if(length(validation_results$male_var) > 0 && !is.na(validation_results$male_var)) validation_results$male_var else 1 new_entries <- rbind(new_entries, new_entry) } @@ -231,7 +248,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, new_entry$ID <- new_id new_entry$dadID <- NA new_entry$momID <- NA - new_entry$sex <- validation_results$female_var + new_entry$sex <- if(length(validation_results$female_var) > 0 && !is.na(validation_results$female_var)) validation_results$female_var else 0 new_entries <- rbind(new_entries, new_entry) } diff --git a/R/cleanPedigree.R b/R/cleanPedigree.R index b03ca270..737b558c 100644 --- a/R/cleanPedigree.R +++ b/R/cleanPedigree.R @@ -16,9 +16,9 @@ standardizeColnames <- function(df, verbose = FALSE) { "fam" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)", "ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)", "gen" = "^(?:gen(?:s|eration)?)", - "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*)", + "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)", "patID" = "^(?:dat[\\.\\-_]?id|pat[\\.\\-_]?id|paternal[\\.\\-_]?(?:id)?)", - "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*)", + "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*|dame)", "matID" = "^(?:mat[\\.\\-_]?id|maternal[\\.\\-_]?(?:id)?)", "spID" = "^(?:s(?:pt)?id|spouse[\\.\\-_]?(?:id)?|partner[\\.\\-_]?(?:id)?|husb(?:and)?[\\.\\-_]?id|wife[\\.\\-_]?(?:id)?|pid[\\.\\-_]?spouse1?)", "twinID" = "^(?:twin[\\.\\-_]?(?:id)?)", From c18dbd758587b62d5a6af29422cd57795006ce95 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Fri, 11 Apr 2025 17:48:40 -0400 Subject: [PATCH 19/35] allow NAs to behave --- R/checkParents.R | 27 ++++++++++++++++++++++----- R/cleanPedigree.R | 4 ++-- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/R/checkParents.R b/R/checkParents.R index b80669bf..7d142af1 100644 --- a/R/checkParents.R +++ b/R/checkParents.R @@ -145,7 +145,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, # Are any parents in both momID and dadID? momdad <- intersect(ped$dadID, ped$momID) - if (!is.na(momdad) && length(momdad) > 0) { + if (length(momdad) > 0&& !is.na(momdad)) { validation_results$parents_in_both <- momdad if (verbose) { cat(paste( @@ -185,21 +185,38 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, - if (!is.na(validation_results$female_var)) { + if (length(validation_results$female_var) > 0 && !is.na(validation_results$female_var)){ corrected_moms <- ped$ID[mom_indices[!is.na(mom_indices)]] ped$sex[mom_indices[!is.na(mom_indices)]] <- validation_results$female_var changes$corrected_mom_sex <- corrected_moms if (verbose && length(corrected_moms) > 0) { cat("Corrected sex of moms for:", paste(corrected_moms, collapse = ", "), "\n") } + } else { + corrected_moms <- ped$ID[mom_indices[!is.na(mom_indices)]] + ped$sex[mom_indices[!is.na(mom_indices)]] <- 0 + + changes$corrected_mom_sex <- corrected_moms + if (verbose && length(corrected_moms) > 0) { + cat("Corrected sex of moms for:", paste(corrected_moms, collapse = ", "), "\n") + } + } - if (!is.na(validation_results$male_var)) { + if (length(validation_results$male_var) > 0 && !is.na(validation_results$male_var)){ corrected_dads <- ped$ID[dad_indices[!is.na(dad_indices)]] ped$sex[dad_indices[!is.na(dad_indices)]] <- validation_results$male_var changes$corrected_dad_sex <- corrected_dads if (verbose && length(corrected_dads) > 0) { cat("Corrected sex of dads for:", paste(corrected_dads, collapse = ", "), "\n") } + } else { + corrected_dads <- ped$ID[dad_indices[!is.na(dad_indices)]] + ped$sex[dad_indices[!is.na(dad_indices)]] <- 1 + changes$corrected_dad_sex <- corrected_dads + if (verbose && length(corrected_dads) > 0) { + cat("Corrected sex of dads for:", paste(corrected_dads, collapse = ", "), "\n") + } + } } } @@ -218,7 +235,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, new_entry$ID <- new_id new_entry$dadID <- NA new_entry$momID <- NA - new_entry$sex <- validation_results$male_var + new_entry$sex <- if(length(validation_results$male_var) > 0 && !is.na(validation_results$male_var)) validation_results$male_var else 1 new_entries <- rbind(new_entries, new_entry) } @@ -231,7 +248,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, new_entry$ID <- new_id new_entry$dadID <- NA new_entry$momID <- NA - new_entry$sex <- validation_results$female_var + new_entry$sex <- if(length(validation_results$female_var) > 0 && !is.na(validation_results$female_var)) validation_results$female_var else 0 new_entries <- rbind(new_entries, new_entry) } diff --git a/R/cleanPedigree.R b/R/cleanPedigree.R index b03ca270..737b558c 100644 --- a/R/cleanPedigree.R +++ b/R/cleanPedigree.R @@ -16,9 +16,9 @@ standardizeColnames <- function(df, verbose = FALSE) { "fam" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)", "ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)", "gen" = "^(?:gen(?:s|eration)?)", - "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*)", + "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)", "patID" = "^(?:dat[\\.\\-_]?id|pat[\\.\\-_]?id|paternal[\\.\\-_]?(?:id)?)", - "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*)", + "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*|dame)", "matID" = "^(?:mat[\\.\\-_]?id|maternal[\\.\\-_]?(?:id)?)", "spID" = "^(?:s(?:pt)?id|spouse[\\.\\-_]?(?:id)?|partner[\\.\\-_]?(?:id)?|husb(?:and)?[\\.\\-_]?id|wife[\\.\\-_]?(?:id)?|pid[\\.\\-_]?spouse1?)", "twinID" = "^(?:twin[\\.\\-_]?(?:id)?)", From 31f9a42777d5f2b72f4fd0b4a9422c9f92153c2f Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Sat, 12 Apr 2025 16:21:24 -0400 Subject: [PATCH 20/35] nicer plots --- man/assignCoupleIDs.Rd | 21 +++ vignettes/partial.Rmd | 143 +++++++++++------- vignettes/partial.html | 330 ++++++++++++++++++++++++++++------------- 3 files changed, 341 insertions(+), 153 deletions(-) create mode 100644 man/assignCoupleIDs.Rd diff --git a/man/assignCoupleIDs.Rd b/man/assignCoupleIDs.Rd new file mode 100644 index 00000000..95165581 --- /dev/null +++ b/man/assignCoupleIDs.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpPedigree.R +\name{assignCoupleIDs} +\alias{assignCoupleIDs} +\alias{assignCoupleIds} +\title{Assign Couple IDs} +\usage{ +assignCoupleIDs(df_Ngen) + +assignCoupleIds(df_Ngen) +} +\arguments{ +\item{df_Ngen}{The dataframe for the current generation, including columns for individual IDs and spouse IDs.} +} +\value{ +The input dataframe augmented with a 'coupleId' column, where each mated pair has a unique identifier. +} +\description{ +This subfunction assigns a unique couple ID to each mated pair in the generation. +Unmated individuals are assigned NA for their couple ID. +} diff --git a/vignettes/partial.Rmd b/vignettes/partial.Rmd index 45fb7ce6..52a9e45e 100644 --- a/vignettes/partial.Rmd +++ b/vignettes/partial.Rmd @@ -49,11 +49,13 @@ We compute the additive genetic relationship matrix using both the classic and p ped_add_partial_complete <- ped2com(df, isChild_method = "partialparent", component = "additive", - adjacency_method = "direct" + adjacency_method = "direct", + sparse = FALSE ) ped_add_classic_complete <- ped2com(df, isChild_method = "classic", - component = "additive", adjacency_method = "direct" + component = "additive", adjacency_method = "direct", + sparse = FALSE ) ``` @@ -67,23 +69,29 @@ library(corrplot) corrplot(as.matrix(ped_add_classic_complete), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Additive component - Classic method" -) + is.corr = FALSE, title = "Additive component - Classic method", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) + corrplot(as.matrix(ped_add_partial_complete), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Additive component - Partial parent method" -) + is.corr = FALSE, title = "Additive component - Partial parent method", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) ``` To verify this, we subtract one matrix from the other and calculate RMSE. The difference should be numerically zero. Indeed, it is `r sqrt(mean((ped_add_classic_complete-ped_add_partial_complete)^2))`. -```{r} -corrplot(as.matrix(ped_add_classic_complete - ped_add_partial_complete), +```{r,warning=FALSE} +corrplot((as.matrix(ped_add_classic_complete) - as.matrix(ped_add_partial_complete)), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE -) + is.corr = FALSE, order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) ``` @@ -101,12 +109,14 @@ df$momID[df$ID == 4] <- NA ped_add_partial_mom <- ped_add_partial <- ped2com(df, isChild_method = "partialparent", component = "additive", - adjacency_method = "direct" + adjacency_method = "direct", + sparse = FALSE ) ped_add_classic_mom <- ped_add_classic <- ped2com(df, isChild_method = "classic", - component = "additive", adjacency_method = "direct" + component = "additive", adjacency_method = "direct", + sparse = FALSE ) ``` @@ -119,30 +129,35 @@ The resulting additive matrices reflect this difference. The RMSE between the tw ```{r} corrplot(as.matrix(ped_add_classic), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Classic (mother removed)" -) - + is.corr = FALSE, title = "Classic (mother removed)", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) corrplot(as.matrix(ped_add_partial), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Partial (mother removed)" -) + is.corr = FALSE, title = "Partial (mother removed)", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) ``` We quantify the overall matrix difference: ```{r} -sqrt(mean((ped_add_classic - ped_add_partial)^2)) +sqrt(mean((as.matrix(ped_add_classic) - as.matrix(ped_add_partial))^2)) ``` Next, we compare each method to the matrix from the complete pedigree. This evaluates how much each method deviates from the correct additive structure. ```{r} -corrplot(as.matrix(ped_add_classic_complete - ped_add_classic), +corrplot(as.matrix(ped_add_classic_complete) - as.matrix(ped_add_classic), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE -) + is.corr = FALSE, + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) sqrt(mean((ped_add_classic_complete - ped_add_classic)^2)) ``` @@ -152,8 +167,10 @@ The RMSE between the true additive component and the classic method is `r sqrt(m ```{r} corrplot(as.matrix(ped_add_classic_complete - ped_add_partial), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE -) + is.corr = FALSE, + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) sqrt(mean((ped_add_classic_complete - ped_add_partial)^2)) ``` @@ -190,18 +207,22 @@ ped_add_classic_dad <- ped_add_classic <- ped2com(df, ``` -As we can see, the two matrices are different. The RMSE between the two matrices is `r sqrt(mean((ped_add_classic-ped_add_partial)^2))`. +As we can see, the two matrices are different. The RMSE between the two matrices is `r sqrt(mean((as.matrix(ped_add_classic)-as.matrix(ped_add_partial))^2))`. ```{r} corrplot(as.matrix(ped_add_classic_dad), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Classic (father removed)" -) + is.corr = FALSE, title = "Classic (father removed)", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) corrplot(as.matrix(ped_add_partial_dad), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Partial (father removed)" -) + is.corr = FALSE, title = "Partial (father removed)", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) ``` Again, we compare to the true matrix from the complete pedigree: @@ -210,8 +231,10 @@ Again, we compare to the true matrix from the complete pedigree: ```{r} corrplot(as.matrix(ped_add_classic_complete - ped_add_classic), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE -) + is.corr = FALSE, + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) sqrt(mean((ped_add_classic_complete - ped_add_classic)^2)) ``` @@ -220,8 +243,10 @@ sqrt(mean((ped_add_classic_complete - ped_add_classic)^2)) ```{r} corrplot(as.matrix(ped_add_classic_complete - ped_add_partial), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE -) + is.corr = FALSE, + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) sqrt(mean((ped_add_classic_complete - ped_add_partial)^2)) ``` @@ -362,28 +387,38 @@ fam1 <- inbreeding_list[[1]] corrplot(as.matrix(fam1$ped_add_classic_complete), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Classic - Complete" -) + is.corr = FALSE, title = "Classic - Complete", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) corrplot(as.matrix(fam1$ped_add_classic_mom), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Classic - Mom Missing" -) + is.corr = FALSE, title = "Classic - Mom Missing", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) corrplot(as.matrix(fam1$ped_add_partial_mom), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Partial - Mom Missing" -) + is.corr = FALSE, title = "Partial - Mom Missing", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) corrplot(as.matrix(fam1$ped_add_classic_dad), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Classic - Dad Missing" -) + is.corr = FALSE, title = "Classic - Dad Missing", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) corrplot(as.matrix(fam1$ped_add_partial_dad), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Partial - Dad Missing" -) + is.corr = FALSE, title = "Partial - Dad Missing", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) ``` @@ -392,23 +427,31 @@ To visualize the differences from the true matrix: ```{r} corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_classic_mom), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Classic Mom Diff from Complete" -) + is.corr = FALSE, title = "Classic Mom Diff from Complete", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_partial_mom), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Partial Mom Diff from Complete" -) + is.corr = FALSE, title = "Partial Mom Diff from Complete", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_classic_dad), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Classic Dad Diff from Complete" -) + is.corr = FALSE, title = "Classic Dad Diff from Complete", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_partial_dad), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Partial Dad Diff from Complete" -) + is.corr = FALSE, title = "Partial Dad Diff from Complete", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) ``` These plots show how each method responds to missing data, and whether it maintains consistency with the complete pedigree. We observe that the partial parent method typically introduces smaller deviations. If desired, this same diagnostic can be repeated for additional families, such as inbreeding_list[[2]]. diff --git a/vignettes/partial.html b/vignettes/partial.html index aaa1fbec..9a2e5535 100644 --- a/vignettes/partial.html +++ b/vignettes/partial.html @@ -377,12 +377,14 @@

Hazard Data Example

ped_add_partial_complete <- ped2com(df,
   isChild_method = "partialparent",
   component = "additive",
-  adjacency_method = "direct"
-)
-ped_add_classic_complete <- ped2com(df,
-  isChild_method = "classic",
-  component = "additive", adjacency_method = "direct"
-)
+ adjacency_method = "direct", + sparse = FALSE +) +ped_add_classic_complete <- ped2com(df, + isChild_method = "classic", + component = "additive", adjacency_method = "direct", + sparse = FALSE +)

The following plots display the full additive matrices. These matrices should be identical.

This can be confirmed visually and numerically.

@@ -392,25 +394,28 @@

Hazard Data Example

corrplot(as.matrix(ped_add_classic_complete), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Additive component - Classic method" -) -

+ is.corr = FALSE, title = "Additive component - Classic method", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +


-corrplot(as.matrix(ped_add_partial_complete),
-  method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Additive component - Partial parent method"
-)
-

+ +corrplot(as.matrix(ped_add_partial_complete), + method = "color", type = "lower", col.lim = c(0, 1), + is.corr = FALSE, title = "Additive component - Partial parent method", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +

To verify this, we subtract one matrix from the other and calculate RMSE. The difference should be numerically zero. Indeed, it is 0.

-
corrplot(as.matrix(ped_add_classic_complete - ped_add_partial_complete),
+
corrplot((as.matrix(ped_add_classic_complete) - as.matrix(ped_add_partial_complete)),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE
-)
-#> Warning in corrplot(as.matrix(ped_add_classic_complete -
-#> ped_add_partial_complete), : col.lim interval too wide, please set a suitable
-#> value
-

+ is.corr = FALSE, order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0))
+

Introducing Missingness: Remove a Parent

@@ -420,13 +425,15 @@

Introducing Missingness: Remove a Parent

ped_add_partial_mom <- ped_add_partial <- ped2com(df,
   isChild_method = "partialparent",
   component = "additive",
-  adjacency_method = "direct"
-)
-
-ped_add_classic_mom <- ped_add_classic <- ped2com(df,
-  isChild_method = "classic",
-  component = "additive", adjacency_method = "direct"
-)
+ adjacency_method = "direct", + sparse = FALSE +) + +ped_add_classic_mom <- ped_add_classic <- ped2com(df, + isChild_method = "classic", + component = "additive", adjacency_method = "direct", + sparse = FALSE +)

The two methods now treat individual 4 differently in the parent adjacency matrix. The classic method applies a fixed contribution because one parent remains. The partial parent method inflates the @@ -436,26 +443,31 @@

Introducing Missingness: Remove a Parent

between the two matrices is 0.009811.

corrplot(as.matrix(ped_add_classic),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Classic (mother removed)"
-)
-

-

-corrplot(as.matrix(ped_add_partial),
-  method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Partial (mother removed)"
-)
-

+ is.corr = FALSE, title = "Classic (mother removed)", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +

+
corrplot(as.matrix(ped_add_partial),
+  method = "color", type = "lower", col.lim = c(0, 1),
+  is.corr = FALSE, title = "Partial (mother removed)",
+  order = "hclust",
+  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
+  col = COL1('Reds', 100), mar=c(0,0,2,0))
+

We quantify the overall matrix difference:

-
sqrt(mean((ped_add_classic - ped_add_partial)^2))
+
sqrt(mean((as.matrix(ped_add_classic) - as.matrix(ped_add_partial))^2))
 #> [1] 0.009811047

Next, we compare each method to the matrix from the complete pedigree. This evaluates how much each method deviates from the correct additive structure.

-
corrplot(as.matrix(ped_add_classic_complete - ped_add_classic),
+
corrplot(as.matrix(ped_add_classic_complete) - as.matrix(ped_add_classic),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE
-)
-

+ is.corr = FALSE, + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0))
+


 sqrt(mean((ped_add_classic_complete - ped_add_classic)^2))
 #> [1] 0.02991371
@@ -463,9 +475,11 @@

Introducing Missingness: Remove a Parent

is 0.0299137.

corrplot(as.matrix(ped_add_classic_complete - ped_add_partial),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE
-)
-

+ is.corr = FALSE, + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0))
+


 sqrt(mean((ped_add_classic_complete - ped_add_partial)^2))
 #> [1] 0.02825904
@@ -499,32 +513,44 @@

Removing the Father Instead

two matrices is 0.009811.

corrplot(as.matrix(ped_add_classic_dad),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Classic (father removed)"
-)
-

+ is.corr = FALSE, title = "Classic (father removed)", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +


 corrplot(as.matrix(ped_add_partial_dad),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Partial (father removed)"
-)
-

+ is.corr = FALSE, title = "Partial (father removed)", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +

Again, we compare to the true matrix from the complete pedigree:

corrplot(as.matrix(ped_add_classic_complete - ped_add_classic),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE
-)
-

+ is.corr = FALSE, + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +


 sqrt(mean((ped_add_classic_complete - ped_add_classic)^2))
-#> [1] 0.02991371
+#> Warning in mean.default((ped_add_classic_complete - ped_add_classic)^2): +#> argument is not numeric or logical: returning NA +#> [1] NA
corrplot(as.matrix(ped_add_classic_complete - ped_add_partial),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE
-)
-

+ is.corr = FALSE, + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +


 sqrt(mean((ped_add_classic_complete - ped_add_partial)^2))
-#> [1] 0.02825904
+#> Warning in mean.default((ped_add_classic_complete - ped_add_partial)^2): +#> argument is not numeric or logical: returning NA +#> [1] NA

The partial parent method again yields a matrix closer to the full-data version.

@@ -638,7 +664,85 @@

Inbreeding Dataset: Family-Level Evaluation

ped_add_partial_mom = ped_add_partial_mom, ped_add_classic_mom = ped_add_classic_mom ) -} +} +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_dad)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_partial_mom)^2): +#> argument is not numeric or logical: returning NA +#> Warning in mean.default((ped_add_classic_complete - ped_add_classic_mom)^2): +#> argument is not numeric or logical: returning NA

Example: Family 1

To understand what these matrices look like, we visualize them for @@ -651,57 +755,75 @@

Example: Family 1

corrplot(as.matrix(fam1$ped_add_classic_complete), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Classic - Complete" -)
-

+ is.corr = FALSE, title = "Classic - Complete", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +


 corrplot(as.matrix(fam1$ped_add_classic_mom),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Classic - Mom Missing"
-)
-

+ is.corr = FALSE, title = "Classic - Mom Missing", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +


 corrplot(as.matrix(fam1$ped_add_partial_mom),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Partial - Mom Missing"
-)
-

+ is.corr = FALSE, title = "Partial - Mom Missing", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +


 corrplot(as.matrix(fam1$ped_add_classic_dad),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Classic - Dad Missing"
-)
-

+ is.corr = FALSE, title = "Classic - Dad Missing", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +


 corrplot(as.matrix(fam1$ped_add_partial_dad),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Partial - Dad Missing"
-)
-

+ is.corr = FALSE, title = "Partial - Dad Missing", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +

To visualize the differences from the true matrix:

corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_classic_mom),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Classic Mom Diff from Complete"
-)
-

+ is.corr = FALSE, title = "Classic Mom Diff from Complete", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +


 corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_partial_mom),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Partial Mom Diff from Complete"
-)
-

+ is.corr = FALSE, title = "Partial Mom Diff from Complete", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +


 corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_classic_dad),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Classic Dad Diff from Complete"
-)
-

+ is.corr = FALSE, title = "Classic Dad Diff from Complete", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +


 corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_partial_dad),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Partial Dad Diff from Complete"
-)
-

+ is.corr = FALSE, title = "Partial Dad Diff from Complete", + order = "hclust", + tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1('Reds', 100), mar=c(0,0,2,0)) +

These plots show how each method responds to missing data, and whether it maintains consistency with the complete pedigree. We observe that the partial parent method typically introduces smaller deviations. @@ -728,13 +850,14 @@

Summary

results %>%
   select(RMSE_diff_mom, RMSE_diff_dad) %>%
   summary()
-#>  RMSE_diff_mom      RMSE_diff_dad     
-#>  Min.   :0.001222   Min.   :0.001222  
-#>  1st Qu.:0.001869   1st Qu.:0.002036  
-#>  Median :0.002538   Median :0.002520  
-#>  Mean   :0.005763   Mean   :0.005786  
-#>  3rd Qu.:0.005625   3rd Qu.:0.005625  
-#>  Max.   :0.024221   Max.   :0.024221
+#> RMSE_diff_mom RMSE_diff_dad +#> Min. : NA Min. : NA +#> 1st Qu.: NA 1st Qu.: NA +#> Median : NA Median : NA +#> Mean :NaN Mean :NaN +#> 3rd Qu.: NA 3rd Qu.: NA +#> Max. : NA Max. : NA +#> NA's :8 NA's :8

In all families, both RMSE_diff_mom and RMSE_diff_dad are positive—indicating that the classic method produces larger the errors relative to the partial method. This @@ -742,9 +865,9 @@

Summary

father.

To verify this directly:

mean(results$RMSE_diff_mom > 0, na.rm = TRUE)
-#> [1] 1
+#> [1] NaN
 mean(results$RMSE_diff_dad > 0, na.rm = TRUE)
-#> [1] 1
+#> [1] NaN

These proportions show how often the partial method produces a lower RMSE across the dataset. This confirms the earlier findings: when pedigree data are incomplete, the partial parent method more faithfully @@ -756,13 +879,14 @@

Summary

-max_R_partial_dad, -max_R_classic_mom, -max_R_partial_mom, -max_R_classic ) %>% summary() -#> RMSE_partial_dad RMSE_partial_mom RMSE_classic_dad RMSE_classic_mom -#> Min. :0.04773 Min. :0.04773 Min. :0.04895 Min. :0.04895 -#> 1st Qu.:0.05570 1st Qu.:0.05349 1st Qu.:0.05774 1st Qu.:0.05555 -#> Median :0.06206 Median :0.06899 Median :0.06457 Median :0.07158 -#> Mean :0.07545 Mean :0.07686 Mean :0.08124 Mean :0.08262 -#> 3rd Qu.:0.08237 3rd Qu.:0.08323 3rd Qu.:0.08866 3rd Qu.:0.08866 -#> Max. :0.15547 Max. :0.15547 Max. :0.17969 Max. :0.17969 +#> RMSE_partial_dad RMSE_partial_mom RMSE_classic_dad RMSE_classic_mom +#> Min. : NA Min. : NA Min. : NA Min. : NA +#> 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA +#> Median : NA Median : NA Median : NA Median : NA +#> Mean :NaN Mean :NaN Mean :NaN Mean :NaN +#> 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA +#> Max. : NA Max. : NA Max. : NA Max. : NA +#> NA's :8 NA's :8 NA's :8 NA's :8 From c7320ebcf519d0b870d3a3fbbee3013f6bb07956 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Sun, 13 Apr 2025 15:05:39 -0400 Subject: [PATCH 21/35] more tests --- R/readGedcom.R | 7 +++++ R/summarizePedigree.R | 22 +++++++++++++-- tests/testthat/test-summarizePedigrees.R | 34 ++++++++++++++++++++++-- 3 files changed, 59 insertions(+), 4 deletions(-) diff --git a/R/readGedcom.R b/R/readGedcom.R index f9066c91..c0eea803 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -587,3 +587,10 @@ collapseNames <- function(verbose, df_temp) { } return(df_temp) } +#' @rdname readGedcom +#' @export +readGed <- readGedcom + +#' @rdname readGedcom +#' @export +readgedcom <- readGedcom diff --git a/R/summarizePedigree.R b/R/summarizePedigree.R index ea8f81e8..6cee4e5e 100644 --- a/R/summarizePedigree.R +++ b/R/summarizePedigree.R @@ -235,12 +235,11 @@ summarizePedigrees <- function(ped, famID = "famID", personID = "ID", ) } } - return(output) } -# Function to calculate summary statistics for all numeric variables +#' Function to calculate summary statistics for all numeric variables #' This function calculates summary statistics for all numeric variables in a data.table. It is supposed to be used internally by the \code{summarize_pedigree} function. #' @inheritParams summarizePedigrees #' @param data A data.table containing the pedigree data. @@ -352,6 +351,9 @@ summarizeMatrilines <- function(ped, famID = "famID", personID = "ID", ) } + + + #' Summarize the paternal lines in a pedigree #' @inheritParams summarizePedigrees #' @seealso [summarizePedigrees ()] @@ -432,3 +434,19 @@ findBiggest <- function(foo_summary_dt, nbiggest, n_foo) { )]) return(biggest_foo) } + +#' @rdname summarizePedigrees +#' @export +summarisePedigrees <- summarizePedigrees + +#' @rdname summarizeFamilies +#' @export +summariseFamilies <- summarizeFamilies + +#' @rdname summarizeMatrilines +#' @export +summariseMatrilines <- summarizeMatrilines + +#' @rdname summarizePatrilines +#' @export +summarisePatrilines <- summarizePatrilines diff --git a/tests/testthat/test-summarizePedigrees.R b/tests/testthat/test-summarizePedigrees.R index 5bc1d6ea..3fd62464 100644 --- a/tests/testthat/test-summarizePedigrees.R +++ b/tests/testthat/test-summarizePedigrees.R @@ -7,7 +7,13 @@ test_that("Counts the correct number people", { expect_equal(result_observed, result_expected) }) - +# Test: SummarizeFamilies is used when SummariseFamilies +test_that("SummarizeFamilies works like SummariseFamilies", { + df <- ped2fam(potter, famID = "newFamID", personID = "personID") + df_summarized <- summarizeFamilies(df, famID = "newFamID", personID = "personID") + df_summarised <- summariseFamilies(df, famID = "newFamID", personID = "personID") + expect_equal(df_summarised, df_summarized) +}) # Test Case 2: Multiple families test_that("summarizeFamilies() works with multiple families", { df <- ped2fam(inbreeding, famID = "newFamID", personID = "ID") @@ -72,7 +78,13 @@ test_that("summarizeMatrilines() works", { result_observed <- nrow(df_summarized$biggest_maternal) expect_equal(result_observed, nbiggest) }) - +# Test: SummarizeMatrilines is used when SummariseMatrilines +test_that("SummarizeMatrilines works like SummariseMatrilines", { + df <- ped2fam(potter, famID = "newFamID", personID = "personID") + df_summarized <- summarizeMatrilines(df, famID = "newFamID", personID = "personID") + df_summarised <- summariseMatrilines(df, famID = "newFamID", personID = "personID") + expect_equal(df_summarised, df_summarized) +}) # Test Case 5: Does this function work for summarizePatrilines test_that("summarizePatrilines() works", { nbiggest <- 4 @@ -98,6 +110,13 @@ test_that("summarizePatrilines() works", { expect_equal(result_observed, nbiggest) }) +# Test: summarizePatrilines is used when SummarisePatrilines +test_that("summarizePatrilines works like SummarisePatrilines", { + df <- ped2fam(potter, famID = "newFamID", personID = "personID") + df_summarized <- summarizePatrilines(df, famID = "newFamID", personID = "personID") + df_summarised <- summarisePatrilines(df, famID = "newFamID", personID = "personID") + expect_equal(df_summarised, df_summarized) +}) # Test Case 6: Handling of missing values in critical columns test_that("summarizePedigrees() handles missing values correctly", { df <- data.frame( @@ -137,6 +156,9 @@ test_that("summarizePedigrees() throws error on invalid column names", { expect_error(summarizePedigrees(df, byr = "unknown_column")) }) + + + # Test Case 9: Handling empty dataset # test_that("summarizePedigrees() handles empty dataset gracefully", { # df <- data.frame(ID = integer(), momID = integer(), dadID = integer(), famID = integer()) @@ -155,3 +177,11 @@ test_that("summarizePedigrees() works for single-entry pedigree", { expect_equal(nrow(df_summarized$family_summary), 1) expect_equal(df_summarized$oldest_families$byr_mean, 1920) }) + +# Test: summarizePedigrees is used when SummarisePedigrees +test_that("SummarizePedigrees works like SummarisePedigrees", { + df <- ped2fam(potter, famID = "newFamID", personID = "personID") + df_summarized <- summarizePedigrees(df, famID = "newFamID", personID = "personID") + df_summarised <- summarisePedigrees(df, famID = "newFamID", personID = "personID") + expect_equal(df_summarised, df_summarized) +}) From f45712fa7d116e6e37e60290d46a375d09ba9b7f Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Sun, 13 Apr 2025 15:07:10 -0400 Subject: [PATCH 22/35] aliases --- NAMESPACE | 6 ++++++ man/calculateSummaryDT.Rd | 4 +++- man/readGedcom.Rd | 24 ++++++++++++++++++++++++ man/summarizeFamilies.Rd | 19 +++++++++++++++++++ man/summarizeMatrilines.Rd | 19 +++++++++++++++++++ man/summarizePatrilines.Rd | 19 +++++++++++++++++++ man/summarizePedigrees.Rd | 21 +++++++++++++++++++++ 7 files changed, 111 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 972282d4..1f9bacee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,8 +33,10 @@ export(ped2maternal) export(ped2mit) export(ped2paternal) export(plotPedigree) +export(readGed) export(readGedcom) export(readWikifamilytree) +export(readgedcom) export(recodeSex) export(related_coef) export(relatedness) @@ -42,6 +44,10 @@ export(repairSex) export(resample) export(simulatePedigree) export(sizeAllGens) +export(summariseFamilies) +export(summariseMatrilines) +export(summarisePatrilines) +export(summarisePedigrees) export(summarizeFamilies) export(summarizeMatrilines) export(summarizePatrilines) diff --git a/man/calculateSummaryDT.Rd b/man/calculateSummaryDT.Rd index 45e67d3f..cb403b04 100644 --- a/man/calculateSummaryDT.Rd +++ b/man/calculateSummaryDT.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/summarizePedigree.R \name{calculateSummaryDT} \alias{calculateSummaryDT} -\title{This function calculates summary statistics for all numeric variables in a data.table. It is supposed to be used internally by the \code{summarize_pedigree} function.} +\title{Function to calculate summary statistics for all numeric variables +This function calculates summary statistics for all numeric variables in a data.table. It is supposed to be used internally by the \code{summarize_pedigree} function.} \usage{ calculateSummaryDT(data, group_var, skip_var, five_num_summary = FALSE) } @@ -20,6 +21,7 @@ the minimum, median, and maximum values.} A data.table containing the summary statistics for all numeric variables. } \description{ +Function to calculate summary statistics for all numeric variables This function calculates summary statistics for all numeric variables in a data.table. It is supposed to be used internally by the \code{summarize_pedigree} function. } \keyword{internal} diff --git a/man/readGedcom.Rd b/man/readGedcom.Rd index fdb158e1..7bab49b1 100644 --- a/man/readGedcom.Rd +++ b/man/readGedcom.Rd @@ -2,6 +2,8 @@ % Please edit documentation in R/readGedcom.R \name{readGedcom} \alias{readGedcom} +\alias{readGed} +\alias{readgedcom} \title{Read a GEDCOM File} \usage{ readGedcom( @@ -14,6 +16,28 @@ readGedcom( update_rate = 1000, ... ) + +readGed( + file_path, + verbose = FALSE, + add_parents = TRUE, + remove_empty_cols = TRUE, + combine_cols = TRUE, + skinny = FALSE, + update_rate = 1000, + ... +) + +readgedcom( + file_path, + verbose = FALSE, + add_parents = TRUE, + remove_empty_cols = TRUE, + combine_cols = TRUE, + skinny = FALSE, + update_rate = 1000, + ... +) } \arguments{ \item{file_path}{The path to the GEDCOM file.} diff --git a/man/summarizeFamilies.Rd b/man/summarizeFamilies.Rd index 8903eb93..2cb50761 100644 --- a/man/summarizeFamilies.Rd +++ b/man/summarizeFamilies.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/summarizePedigree.R \name{summarizeFamilies} \alias{summarizeFamilies} +\alias{summariseFamilies} \title{Summarize the families in a pedigree} \usage{ summarizeFamilies( @@ -21,6 +22,24 @@ summarizeFamilies( five_num_summary = FALSE, verbose = FALSE ) + +summariseFamilies( + ped, + famID = "famID", + personID = "ID", + momID = "momID", + dadID = "dadID", + matID = "matID", + patID = "patID", + byr = NULL, + founder_sort_var = NULL, + include_founder = FALSE, + nbiggest = 5, + noldest = 5, + skip_var = NULL, + five_num_summary = FALSE, + verbose = FALSE +) } \arguments{ \item{ped}{a pedigree dataset. Needs ID, momID, and dadID columns} diff --git a/man/summarizeMatrilines.Rd b/man/summarizeMatrilines.Rd index 2890b622..577204f6 100644 --- a/man/summarizeMatrilines.Rd +++ b/man/summarizeMatrilines.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/summarizePedigree.R \name{summarizeMatrilines} \alias{summarizeMatrilines} +\alias{summariseMatrilines} \title{Summarize the maternal lines in a pedigree} \usage{ summarizeMatrilines( @@ -21,6 +22,24 @@ summarizeMatrilines( five_num_summary = FALSE, verbose = FALSE ) + +summariseMatrilines( + ped, + famID = "famID", + personID = "ID", + momID = "momID", + dadID = "dadID", + matID = "matID", + patID = "patID", + byr = NULL, + include_founder = FALSE, + founder_sort_var = NULL, + nbiggest = 5, + noldest = 5, + skip_var = NULL, + five_num_summary = FALSE, + verbose = FALSE +) } \arguments{ \item{ped}{a pedigree dataset. Needs ID, momID, and dadID columns} diff --git a/man/summarizePatrilines.Rd b/man/summarizePatrilines.Rd index aed89bcd..27fd9494 100644 --- a/man/summarizePatrilines.Rd +++ b/man/summarizePatrilines.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/summarizePedigree.R \name{summarizePatrilines} \alias{summarizePatrilines} +\alias{summarisePatrilines} \title{Summarize the paternal lines in a pedigree} \usage{ summarizePatrilines( @@ -21,6 +22,24 @@ summarizePatrilines( five_num_summary = FALSE, verbose = FALSE ) + +summarisePatrilines( + ped, + famID = "famID", + personID = "ID", + momID = "momID", + dadID = "dadID", + matID = "matID", + patID = "patID", + byr = NULL, + founder_sort_var = NULL, + include_founder = FALSE, + nbiggest = 5, + noldest = 5, + skip_var = NULL, + five_num_summary = FALSE, + verbose = FALSE +) } \arguments{ \item{ped}{a pedigree dataset. Needs ID, momID, and dadID columns} diff --git a/man/summarizePedigrees.Rd b/man/summarizePedigrees.Rd index 3ed2f0a5..a4f6a6e9 100644 --- a/man/summarizePedigrees.Rd +++ b/man/summarizePedigrees.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/summarizePedigree.R \name{summarizePedigrees} \alias{summarizePedigrees} +\alias{summarisePedigrees} \title{Summarize Pedigree Data} \usage{ summarizePedigrees( @@ -23,6 +24,26 @@ summarizePedigrees( network_checks = FALSE, verbose = FALSE ) + +summarisePedigrees( + ped, + famID = "famID", + personID = "ID", + momID = "momID", + dadID = "dadID", + matID = "matID", + patID = "patID", + type = c("fathers", "mothers", "families"), + byr = NULL, + include_founder = FALSE, + founder_sort_var = NULL, + nbiggest = 5, + noldest = nbiggest, + skip_var = NULL, + five_num_summary = FALSE, + network_checks = FALSE, + verbose = FALSE +) } \arguments{ \item{ped}{a pedigree dataset. Needs ID, momID, and dadID columns} From d023b785d81f99e969e5e94176c6cf2403605a94 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 14 Apr 2025 14:07:33 -0400 Subject: [PATCH 23/35] default now is direct method --- R/convertPedigree.R | 16 +++++++++++++--- tests/testthat/test-convertPedigree.R | 6 +++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/R/convertPedigree.R b/R/convertPedigree.R index 9cab93f0..3d5ac7ca 100644 --- a/R/convertPedigree.R +++ b/R/convertPedigree.R @@ -31,7 +31,7 @@ ped2com <- function(ped, component, flatten.diag = FALSE, standardize.colnames = TRUE, transpose_method = "tcrossprod", - adjacency_method = "indexed", + adjacency_method = "direct", isChild_method = "classic", saveable = FALSE, resume = FALSE, @@ -615,8 +615,18 @@ ped2ce <- function(ped, iss <- c(mIDs$rID, dIDs$rID) jss <- c(mIDs$cID, dIDs$cID) } else if (component %in% c("common nuclear")) { - stop("Common Nuclear component is not yet implemented for direct method. Use index method.\n") + message("Common Nuclear component is not yet implemented for direct method. Using index method.\n") # change to warning and call indexed version + list_of_adjacency <- .adjIndexed(ped = ped, component = component, + saveable = saveable, resume = resume, + save_path = save_path, verbose = verbose, + lastComputed = lastComputed, nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, parList = parList, + lens = lens, save_rate_parlist = save_rate_parlist, + ... + ) + return(list_of_adjacency) } else if (component %in% c("mitochondrial")) { mIDs <- stats::na.omit(data.frame(rID = ped$ID, cID = ped$momID)) iss <- c(mIDs$rID) @@ -641,7 +651,7 @@ ped2ce <- function(ped, #' @param checkpoint_files a list of checkpoint files compute_parent_adjacency <- function(ped, component, - adjacency_method = "indexed", + adjacency_method = "direct", saveable, resume, save_path, verbose, lastComputed, nr, checkpoint_files, update_rate, diff --git a/tests/testthat/test-convertPedigree.R b/tests/testthat/test-convertPedigree.R index 11dfbeca..ec5af8b1 100644 --- a/tests/testthat/test-convertPedigree.R +++ b/tests/testthat/test-convertPedigree.R @@ -301,10 +301,10 @@ test_that("adjacency_method 'indexed', 'loop', and direct produce the same resu # common nuclear ped_common_indexed <- ped2com(hazard, component = "common nuclear", adjacency_method = "indexed") ped_common_loop <- ped2com(hazard, component = "common nuclear", adjacency_method = "loop") - # ped_common_direct <- ped2com(hazard, component = "common nuclear", adjacency_method = "direct") + ped_common_direct <- ped2com(hazard, component = "common nuclear", adjacency_method = "direct") expect_equal(ped_common_indexed, ped_common_loop, tolerance = tolerance) - # expect_equal(ped_common_loop, ped_common_direct, tolerance = tolerance) - # expect_equal(ped_common_indexed, ped_common_direct, tolerance = tolerance) + expect_equal(ped_common_loop, ped_common_direct, tolerance = tolerance) + expect_equal(ped_common_indexed, ped_common_direct, tolerance = tolerance) }) From b55ae50c22a2272ee71b2cfaaa8d80e0b5c6c579 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 14 Apr 2025 17:13:48 -0400 Subject: [PATCH 24/35] Update readGedcom.R subfactor --- NAMESPACE | 1 + R/helpPedigree.R | 11 +- R/readGedcom.R | 69 ++- R/readGedcom_alpha.R | 616 ++++++++++++++++++++++ man/compute_parent_adjacency.Rd | 2 +- man/determineSex.Rd | 2 +- man/ped2com.Rd | 2 +- man/postProcessGedcom.Rd | 34 ++ man/readGedcom.Rd | 3 + tests/testthat/test-readPedigrees_alpha.R | 231 ++++++++ 10 files changed, 946 insertions(+), 25 deletions(-) create mode 100644 R/readGedcom_alpha.R create mode 100644 man/postProcessGedcom.Rd create mode 100644 tests/testthat/test-readPedigrees_alpha.R diff --git a/NAMESPACE b/NAMESPACE index 1f9bacee..69e23307 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(SimPed) export(allGens) +export(assignCoupleIDs) export(calcAllGens) export(calcFamilySize) export(calculateRelatedness) diff --git a/R/helpPedigree.R b/R/helpPedigree.R index 3d054d4b..f9e7c855 100644 --- a/R/helpPedigree.R +++ b/R/helpPedigree.R @@ -41,13 +41,13 @@ createGenDataFrame <- function(sizeGens, genIndex, idGen) { #' @param sexR Numeric value indicating the sex ratio (proportion of males). #' @return Vector of sexes ("M" for male, "F" for female) for the offspring. #' @importFrom stats runif -determineSex <- function(idGen, sexR) { +determineSex <- function(idGen, sexR, code_male = "M", code_female = "F") { if (runif(1) > .5) { - sexVec1 <- rep("M", floor(length(idGen) * sexR)) - sexVec2 <- rep("F", length(idGen) - length(sexVec1)) + sexVec1 <- rep(code_male, floor(length(idGen) * sexR)) + sexVec2 <- rep(code_female, length(idGen) - length(sexVec1)) } else { - sexVec1 <- rep("F", floor(length(idGen) * (1 - sexR))) - sexVec2 <- rep("M", length(idGen) - length(sexVec1)) + sexVec1 <- rep(code_female, floor(length(idGen) * (1 - sexR))) + sexVec2 <- rep(code_male, length(idGen) - length(sexVec1)) } sexVec <- sample(c(sexVec1, sexVec2)) return(sexVec) @@ -60,6 +60,7 @@ determineSex <- function(idGen, sexR) { #' #' @param df_Ngen The dataframe for the current generation, including columns for individual IDs and spouse IDs. #' @return The input dataframe augmented with a 'coupleId' column, where each mated pair has a unique identifier. +#' @export assignCoupleIDs <- function(df_Ngen) { df_Ngen$coupleId <- NA_character_ # Initialize the coupleId column with NAs usedCoupleIds <- character() # Initialize an empty character vector to track used IDs diff --git a/R/readGedcom.R b/R/readGedcom.R index c0eea803..f271632c 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -55,6 +55,7 @@ readGedcom <- function(file_path, combine_cols = TRUE, skinny = FALSE, update_rate = 1000, + post_process = TRUE, ...) { # Checks if (!file.exists(file_path)) stop("File does not exist: ", file_path) @@ -300,6 +301,39 @@ readGedcom <- function(file_path, if (nrow(df_temp) != num_rows$num_indi_rows) { warning("The number of people found in the processed file does not match the number of individuals raw data") } + + if(post_process){ + if (verbose) { + print("Post-processing data frame") + } + # Remove the first row (empty) +df_temp <- postProcessGedcom( + df_temp = df_temp, + remove_empty_cols = remove_empty_cols, + combine_cols = combine_cols, + add_parents = add_parents, + skinny = skinny, + verbose = verbose + ) + + } + + return(df_temp) +} + +#' Post-process GEDCOM Data Frame +#' +#' @inheritParams readGedcom +#' @inheritParams mapFAMS2parents +#' @return A data frame with processed information. + +postProcessGedcom <- function(df_temp, + remove_empty_cols = TRUE, + combine_cols = TRUE, + add_parents = TRUE, + skinny = TRUE, + verbose = FALSE +){ # Add mom and dad ids if (add_parents) { if (verbose) { @@ -308,28 +342,29 @@ readGedcom <- function(file_path, df_temp <- processParents(df_temp, datasource = "gedcom") } - if (combine_cols) { - df_temp <- collapseNames(verbose = verbose, df_temp = df_temp) - } +if (combine_cols) { + df_temp <- collapseNames(verbose = verbose, df_temp = df_temp) +} - if (remove_empty_cols) { - # Remove empty columns - if (verbose) { - print("Removing empty columns") - } - df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)] +if (remove_empty_cols) { + # Remove empty columns + if (verbose) { + print("Removing empty columns") } - if (skinny) { - if (verbose) { - print("Slimming down the data frame") - } - df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)] - df_temp$FAMC <- NULL - df_temp$FAMS <- NULL + df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)] +} +if (skinny) { + if (verbose) { + print("Slimming down the data frame") } - return(df_temp) + df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)] + df_temp$FAMC <- NULL + df_temp$FAMS <- NULL } +return(df_temp) + +} #' Create a mapping of family IDs to parent IDs #' diff --git a/R/readGedcom_alpha.R b/R/readGedcom_alpha.R new file mode 100644 index 00000000..3cde78dc --- /dev/null +++ b/R/readGedcom_alpha.R @@ -0,0 +1,616 @@ +#' Read a GEDCOM File +#' +#' This function reads a GEDCOM file and parses it into a structured data frame of individuals. +#' +#' @param file_path The path to the GEDCOM file. +#' @param add_parents A logical value indicating whether to add parents to the data frame. +#' @param remove_empty_cols A logical value indicating whether to remove columns with all missing values. +#' @param combine_cols A logical value indicating whether to combine columns with duplicate values. +#' @param verbose A logical value indicating whether to print messages. +#' @param skinny A logical value indicating whether to return a skinny data frame. +#' @param update_rate numeric. The rate at which to print progress +#' @param ... Additional arguments to be passed to the function. +#' @return A data frame containing information about individuals, with the following potential columns: +#' - `id`: ID of the individual +#' - `momID`: ID of the individual's mother +#' - `dadID`: ID of the individual's father +#' - `sex`: Sex of the individual +#' - `name`: Full name of the individual +#' - `name_given`: First name of the individual +#' - `name_surn`: Last name of the individual +#' - `name_marriedsurn`: Married name of the individual +#' - `name_nick`: Nickname of the individual +#' - `name_npfx`: Name prefix +#' - `name_nsfx`: Name suffix +#' - `birth_date`: Birth date of the individual +#' - `birth_lat`: Latitude of the birthplace +#' - `birth_long`: Longitude of the birthplace +#' - `birth_place`: Birthplace of the individual +#' - `death_caus`: Cause of death +#' - `death_date`: Death date of the individual +#' - `death_lat`: Latitude of the place of death +#' - `death_long`: Longitude of the place of death +#' - `death_place`: Place of death of the individual +#' - `attribute_caste`: Caste of the individual +#' - `attribute_children`: Number of children of the individual +#' - `attribute_description`: Description of the individual +#' - `attribute_education`: Education of the individual +#' - `attribute_idnumber`: Identification number of the individual +#' - `attribute_marriages`: Number of marriages of the individual +#' - `attribute_nationality`: Nationality of the individual +#' - `attribute_occupation`: Occupation of the individual +#' - `attribute_property`: Property owned by the individual +#' - `attribute_religion`: Religion of the individual +#' - `attribute_residence`: Residence of the individual +#' - `attribute_ssn`: Social security number of the individual +#' - `attribute_title`: Title of the individual +#' - `FAMC`: ID(s) of the family where the individual is a child +#' - `FAMS`: ID(s) of the family where the individual is a spouse +#' @export +readGedcom.alpha <- function(file_path, + verbose = FALSE, + add_parents = TRUE, + remove_empty_cols = TRUE, + combine_cols = TRUE, + skinny = FALSE, + update_rate = 1000, + post_process = TRUE, + ...) { + + # Ensure the file exists and read all lines. + if (!file.exists(file_path)) { + stop("File does not exist: ", file_path) + } + if (verbose) message("Reading file: ", file_path) + lines <- readLines(file_path) + total_lines <- length(lines) + if (verbose) message("File is ", total_lines, " lines long") + + # Count pattern occurrences (pattern_rows remains used in subfunctions) + pattern_rows <- countPatternRows.alpha(data.frame(X1 = lines)) + + # List of variables to initialize + all_var_names <- unlist(list( + identifiers = c("id", "momID", "dadID"), + names = c("name", "name_given", "name_given_pieces", "name_surn", "name_surn_pieces", "name_marriedsurn", + "name_nick", "name_npfx", "name_nsfx"), + sex = c("sex"), + birth = c("birth_date", "birth_lat", "birth_long", "birth_place"), + death = c("death_caus", "death_date", "death_lat", "death_long", "death_place"), + attributes = c("attribute_caste", "attribute_children", "attribute_description", "attribute_education", + "attribute_idnumber", "attribute_marriages", "attribute_nationality", "attribute_occupation", + "attribute_property", "attribute_religion", "attribute_residence", "attribute_ssn", + "attribute_title"), + relationships = c("FAMC", "FAMS") + ), use.names = FALSE) + + # Split the file into blocks; each block corresponds to one individual. + blocks <- splitIndividuals.alpha(lines, verbose) + + # Parse each individual block into a record (a named list) + records <- lapply(blocks, parseIndividualBlock.alpha, + pattern_rows = pattern_rows, + all_var_names = all_var_names, verbose = verbose) + + # Remove any NULLs (if a block did not contain an individual id) + records <- Filter(Negate(is.null), records) + + if (length(records) == 0) { + warning("No people found in file") + return(NULL) + } + + # Convert the list of records to a data frame. + df_temp <- do.call(rbind, lapply(records, function(rec) { + as.data.frame(rec, stringsAsFactors = FALSE) + })) + + if (verbose) message("File has ", nrow(df_temp), " people") + + # Run post-processing if requested. + if (post_process) { + if (verbose) message("Post-processing data frame") + df_temp <- postProcessGedcom.alpha( + df_temp = df_temp, + remove_empty_cols = remove_empty_cols, + combine_cols = combine_cols, + add_parents = add_parents, + skinny = skinny, + verbose = verbose + ) + } + + return(df_temp) +} + +# --- SUBFUNCTIONS --- +#' Split GEDCOM Lines into Individual Blocks +#' +#' This function partitions the GEDCOM file (as a vector of lines) into a list of blocks, +#' where each block corresponds to a single individual starting with an "@ INDI" line. +#' +#' @param lines A character vector of lines from the GEDCOM file. +#' @param verbose Logical indicating whether to output progress messages. +#' @return A list of character vectors, each representing one individual. +splitIndividuals.alpha <- function(lines, verbose = FALSE) { + indi_idx <- grep("@ INDI", lines) + if (length(indi_idx) == 0) return(list()) + + blocks <- list() + for (i in seq_along(indi_idx)) { + start <- indi_idx[i] + end <- if (i < length(indi_idx)) indi_idx[i + 1] - 1 else length(lines) + block <- lines[start:end] + blocks[[length(blocks) + 1]] <- block + } + if (verbose) message("Found ", length(blocks), " individual blocks") + return(blocks) +} + +#' Initialize an Empty Individual Record +#' +#' Creates a named list with all GEDCOM fields set to NA. +#' +#' @param all_var_names A character vector of variable names. +#' @return A named list representing an empty individual record. +initializeRecord.alpha <- function(all_var_names) { + setNames(as.list(rep(NA_character_, length(all_var_names))), all_var_names) +} + +#' Parse a GEDCOM Individual Block +#' +#' Processes a block of GEDCOM lines corresponding to a single individual. +#' +#' @param block A character vector containing the GEDCOM lines for one individual. +#' @param pattern_rows A list with counts of lines matching specific GEDCOM tags. +#' @param all_var_names A character vector of variable names. +#' @param verbose Logical indicating whether to print progress messages. +#' @return A named list representing the parsed record for the individual, or NULL if no ID is found. +#' @keywords internal +parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbose = FALSE) { + record <- initializeRecord.alpha(all_var_names) + n_lines <- length(block) + + # Loop through the block by index so that we can look ahead for event details. + i <- 1 + while (i <= n_lines) { + line <- block[i] + + # Process individual identifier (e.g., "@ INDI ...") + if (grepl("@ INDI", line)) { + record$id <- stringr::str_extract(line, "(?<=@.)\\d*(?=@)") + i <- i + 1 + next + } + + # Special processing for full name using " NAME" tag. + if (grepl(" NAME", line) && pattern_rows$num_name_rows > 0) { + record <- parseNameLine.alpha(line, record) + i <- i + 1 + next + } + + # Process birth and death events by consuming multiple lines. + if (grepl(" BIRT", line) && pattern_rows$num_birt_rows > 0) { + record <- processEventLine.alpha("birth", block, i, record, pattern_rows) + i <- i + 1 # Skip further processing of this line. + next + } + if (grepl(" DEAT", line) && pattern_rows$num_deat_rows > 0) { + record <- processEventLine.alpha("death", block, i, record, pattern_rows) + i <- i + 1 + next + } + + # Process other tags using common mappings. + # Define mappings for name pieces (if not handled by NAME tag). + name_piece_mappings <- list( + list(tag = "GIVN", field = "name_given_pieces", mode = "replace"), + list(tag = "NPFX", field = "name_npfx", mode = "replace"), + list(tag = "NICK", field = "name_nick", mode = "replace"), + list(tag = "SURN", field = "name_surn_pieces", mode = "replace"), + list(tag = "NSFX", field = "name_nsfx", mode = "replace"), + list(tag = "_MARNM", field = "name_marriedsurn", mode = "replace") + ) + out <- applyTagMappings.alpha(line, record, pattern_rows, name_piece_mappings) + if (out$matched) { record <- out$record + i <- i + 1 + next } + + # Process attribute tags. + attribute_mappings <- list( + list(tag = "SEX", field = "sex", mode = "replace"), + list(tag = "CAST", field = "attribute_caste", mode = "replace"), + list(tag = "DSCR", field = "attribute_description", mode = "replace"), + list(tag = "EDUC", field = "attribute_education", mode = "replace"), + list(tag = "IDNO", field = "attribute_idnumber", mode = "replace"), + list(tag = "NATI", field = "attribute_nationality", mode = "replace"), + list(tag = "NCHI", field = "attribute_children", mode = "replace"), + list(tag = "NMR", field = "attribute_marriages", mode = "replace"), + list(tag = "OCCU", field = "attribute_occupation", mode = "replace"), + list(tag = "PROP", field = "attribute_property", mode = "replace"), + list(tag = "RELI", field = "attribute_religion", mode = "replace"), + list(tag = "RESI", field = "attribute_residence", mode = "replace"), + list(tag = "SSN", field = "attribute_ssn", mode = "replace"), + list(tag = "TITL", field = "attribute_title", mode = "replace") + ) + out <- applyTagMappings.alpha(line, record, pattern_rows, attribute_mappings) + if (out$matched) { record <- out$record + i <- i + 1 + next } + + # Process relationship tags, using a custom extractor. + relationship_mappings <- list( + list(tag = "FAMC", field = "FAMC", mode = "append", + extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)")), + list(tag = "FAMS", field = "FAMS", mode = "append", + extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)")) + ) + out <- applyTagMappings.alpha(line, record, pattern_rows, relationship_mappings) + if (out$matched) { record <- out$record + i <- i + 1 + next } + + # Optionally print progress for long records. + i <- i + 1 + } + + # If the record has no ID, return NULL. + if (is.na(record$id)) return(NULL) + return(record) +} + +#' Parse a Full Name Line +#' +#' Extracts full name information from a GEDCOM "NAME" line and updates the record accordingly. +#' +#' @param line A character string containing the name line. +#' @param record A named list representing the individual's record. +#' @return The updated record with parsed name information. +parseNameLine.alpha <- function(line, record) { + record$name <- extract_info.alpha(line, "NAME") + record$name_given <- stringr::str_extract(record$name, ".*(?= /)") + record$name_surn <- stringr::str_extract(record$name, "(?<=/).*(?=/)") + record$name <- stringr::str_squish(stringr::str_replace(record$name, "/", " ")) + return(record) +} + +#' Process Event Lines (Birth or Death) +#' +#' Extracts event details (e.g., date, place, cause, latitude, longitude) from a block of GEDCOM lines. +#' For "birth": expect DATE on line i+1, PLAC on i+2, LATI on i+4, LONG on i+5. +#' For "death": expect DATE on line i+1, PLAC on i+2, CAUS on i+3, LATI on i+4, LONG on i+5. +#' @param event A character string indicating the event type ("birth" or "death"). +#' @param block A character vector of GEDCOM lines. +#' @param i The current line index where the event tag is found. +#' @param record A named list representing the individual's record. +#' @param pattern_rows A list with counts of GEDCOM tag occurrences. +#' @return The updated record with parsed event information.# +# For "death": expect DATE on line i+1, PLAC on i+2, CAUS on i+3, LATI on i+4, LONG on i+5. +processEventLine.alpha <- function(event, block, i, record, pattern_rows) { + n_lines <- length(block) + if (event == "birth") { + if (i + 1 <= n_lines) record$birth_date <- extract_info.alpha(block[i+1], "DATE") + if (i + 2 <= n_lines) record$birth_place <- extract_info.alpha(block[i+2], "PLAC") + if (i + 4 <= n_lines) record$birth_lat <- extract_info.alpha(block[i+4], "LATI") + if (i + 5 <= n_lines) record$birth_long <- extract_info.alpha(block[i+5], "LONG") + } else if (event == "death") { + if (i + 1 <= n_lines) record$death_date <- extract_info.alpha(block[i+1], "DATE") + if (i + 2 <= n_lines) record$death_place <- extract_info.alpha(block[i+2], "PLAC") + if (i + 3 <= n_lines) record$death_caus <- extract_info.alpha(block[i+3], "CAUS") + if (i + 4 <= n_lines) record$death_lat <- extract_info.alpha(block[i+4], "LATI") + if (i + 5 <= n_lines) record$death_long <- extract_info.alpha(block[i+5], "LONG") + } + return(record) +} + +#' Apply Tag Mappings to a Line +#' +#' Iterates over a list of tag mappings and, if a tag matches the line, updates the record. +#' +#' @param line A character string from the GEDCOM file. +#' @param record A named list representing the individual's record. +#' @param pattern_rows A list with GEDCOM tag counts. +#' @param tag_mappings A list of lists. Each sublist should define: +#' - \code{tag}: the GEDCOM tag, +#' - \code{field}: the record field to update, +#' - \code{mode}: either "replace" or "append", +#' - \code{extractor}: (optional) a custom extraction function. +#' @return A list with the updated record (\code{record}) and a logical flag (\code{matched}). +applyTagMappings.alpha <- function(line, record, pattern_rows, tag_mappings) { + for (mapping in tag_mappings) { + extractor <- if (is.null(mapping$extractor)) NULL else mapping$extractor + result <- process_tag.alpha(mapping$tag, mapping$field, pattern_rows, line, record, + extractor = extractor, mode = mapping$mode) + record <- result$vars + if (result$matched) { + return(list(record = record, matched = TRUE)) + } + } + return(list(record = record, matched = FALSE)) +} + + +#' Extract Information from Line +#' +#' This function extracts information from a line based on a specified type. +#' @param line A character string representing a line from a GEDCOM file. +#' @param type A character string representing the type of information to extract. +#' @return A character string with the extracted information. +#' @keywords internal +extract_info.alpha <- function(line, type) { + stringr::str_squish(stringr::str_extract(line, paste0("(?<=", type, " ).+"))) +} + +#' Count GEDCOM Pattern Rows +#' +#' Counts the number of lines in a file (passed as a data frame with column "X1") +#' that match various GEDCOM patterns. +#' +#' @param file A data frame with a column \code{X1} containing GEDCOM lines. +#' @return A list with counts of specific GEDCOM tag occurrences. +countPatternRows.alpha <- function(file) { + pattern_counts <- sapply( + c( + "@ INDI", " NAME", " GIVN", " NPFX", " NICK", " SURN", " NSFX", " _MARNM", + " BIRT", " DEAT", " SEX", " CAST", " DSCR", " EDUC", " IDNO", " NATI", + " NCHI", " NMR", " OCCU", " PROP", " RELI", " RESI", " SSN", " TITL", + " FAMC", " FAMS", " PLAC", " LATI", " LONG", " DATE", " CAUS" + ), + function(pat) sum(grepl(pat, file$X1)) + ) + num_rows <- list( + num_indi_rows = pattern_counts["@ INDI"], + num_name_rows = pattern_counts[" NAME"], + num_givn_rows = pattern_counts[" GIVN"], + num_npfx_rows = pattern_counts[" NPFX"], + num_nick_rows = pattern_counts[" NICK"], + num_surn_rows = pattern_counts[" SURN"], + num_nsfx_rows = pattern_counts[" NSFX"], + num_marnm_rows = pattern_counts[" _MARNM"], + num_birt_rows = pattern_counts[" BIRT"], + num_deat_rows = pattern_counts[" DEAT"], + num_sex_rows = pattern_counts[" SEX"], + num_cast_rows = pattern_counts[" CAST"], + num_dscr_rows = pattern_counts[" DSCR"], + num_educ_rows = pattern_counts[" EDUC"], + num_idno_rows = pattern_counts[" IDNO"], + num_nati_rows = pattern_counts[" NATI"], + num_nchi_rows = pattern_counts[" NCHI"], + num_nmr_rows = pattern_counts[" NMR"], + num_occu_rows = pattern_counts[" OCCU"], + num_prop_rows = pattern_counts[" PROP"], + num_reli_rows = pattern_counts[" RELI"], + num_resi_rows = pattern_counts[" RESI"], + num_ssn_rows = pattern_counts[" SSN"], + num_titl_rows = pattern_counts[" TITL"], + num_famc_rows = pattern_counts[" FAMC"], + num_fams_rows = pattern_counts[" FAMS"], + num_plac_rows = pattern_counts[" PLAC"], + num_lati_rows = pattern_counts[" LATI"], + num_long_rows = pattern_counts[" LONG"], + num_date_rows = pattern_counts[" DATE"], + num_caus_rows = pattern_counts[" CAUS"] + ) + return(num_rows) +} + +#' Process a GEDCOM Tag +#' +#' Extracts and assigns a value to a specified field in `vars` if the pattern is present. +#' Returns both the updated variable list and a flag indicating whether the tag was matched. +#' +#' @param tag The GEDCOM tag (e.g., "SEX", "CAST", etc.). +#' @param field_name The name of the variable to assign to in `vars`. +#' @param pattern_rows Output from `countPatternRows()`. +#' @param line The GEDCOM line to parse. +#' @param vars The current list of variables to update. +#' @return A list with updated `vars` and a `matched` flag. +#' @keywords internal +process_tag.alpha <- function(tag, field_name, pattern_rows, line, vars, + extractor = NULL, mode = "replace") { + count_name <- paste0("num_", tolower(tag), "_rows") + matched <- FALSE + if (!is.null(pattern_rows[[count_name]]) && + pattern_rows[[count_name]] > 0 && + grepl(paste0(" ", tag), line)) { + value <- if (is.null(extractor)) { + extract_info.alpha(line, tag) + } else { + extractor(line) + } + if (mode == "append" && !is.na(vars[[field_name]])) { + vars[[field_name]] <- paste0(vars[[field_name]], ", ", value) + } else { + vars[[field_name]] <- value + } + matched <- TRUE + } + return(list(vars = vars, matched = matched)) +} + +#' Post-process GEDCOM Data Frame +#' +#' This function optionally adds parent information, combines duplicate columns, +#' and removes empty columns from the GEDCOM data frame. +#' +#' @param df_temp A data frame produced by \code{readGedcom()}. +#' @param remove_empty_cols Logical indicating whether to remove columns that are entirely missing. +#' @param combine_cols Logical indicating whether to combine columns with duplicate values. +#' @param add_parents Logical indicating whether to add parent information. +#' @param skinny Logical indicating whether to slim down the data frame. +#' @param verbose Logical indicating whether to print progress messages. +#' @return The post-processed data frame. +postProcessGedcom.alpha <- function(df_temp, + remove_empty_cols = TRUE, + combine_cols = TRUE, + add_parents = TRUE, + skinny = TRUE, + verbose = FALSE) { + if (add_parents) { + if (verbose) message("Processing parents") + df_temp <- processParents.alpha(df_temp, datasource = "gedcom") + } + if (combine_cols) { + df_temp <- collapseNames.alpha(verbose = verbose, df_temp = df_temp) + } + if (remove_empty_cols) { + if (verbose) message("Removing empty columns") + df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)] + } + if (skinny) { + if (verbose) message("Slimming down the data frame") + df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)] + df_temp$FAMC <- NULL + df_temp$FAMS <- NULL + } + return(df_temp) +} + +#' Process Parents Information from GEDCOM Data +#' +#' Adds parent IDs to the individuals based on family relationship data. +#' +#' @param df_temp A data frame produced by \code{readGedcom()}. +#' @param datasource Character string indicating the data source ("gedcom" or "wiki"). +#' @return The updated data frame with parent IDs added. +processParents.alpha <- function(df_temp, datasource) { + if (datasource == "gedcom") { + required_cols <- c("FAMC", "sex", "FAMS") + } else if (datasource == "wiki") { + required_cols <- c("id") + } else { + stop("Invalid datasource") + } + if (!all(required_cols %in% colnames(df_temp))) { + missing_cols <- setdiff(required_cols, colnames(df_temp)) + warning("Missing necessary columns: ", paste(missing_cols, collapse = ", ")) + return(df_temp) + } + family_to_parents <- mapFAMS2parents.alpha(df_temp) + if (is.null(family_to_parents) || length(family_to_parents) == 0) { + return(df_temp) + } + df_temp <- mapFAMC2parents.alpha(df_temp, family_to_parents) + return(df_temp) +} + +#' Create a Mapping from Family IDs to Parent IDs +#' +#' This function scans the data frame and creates a mapping of family IDs +#' to the corresponding parent IDs. +#' +#' @param df_temp A data frame produced by \code{readGedcom()}. +#' @return A list mapping family IDs to parent information. +mapFAMS2parents.alpha <- function(df_temp) { + if (!all(c("FAMS", "sex") %in% colnames(df_temp))) { + warning("The data frame does not contain the necessary columns (FAMS, sex)") + return(NULL) + } + family_to_parents <- list() + for (i in 1:nrow(df_temp)) { + if (!is.na(df_temp$FAMS[i])) { + fams_ids <- unlist(strsplit(df_temp$FAMS[i], ", ")) + for (fams_id in fams_ids) { + if (!is.null(family_to_parents[[fams_id]])) { + if (df_temp$sex[i] == "M") { + family_to_parents[[fams_id]]$father <- df_temp$id[i] + } else if (df_temp$sex[i] == "F") { + family_to_parents[[fams_id]]$mother <- df_temp$id[i] + } + } else { + family_to_parents[[fams_id]] <- list() + if (df_temp$sex[i] == "M") { + family_to_parents[[fams_id]]$father <- df_temp$id[i] + } else if (df_temp$sex[i] == "F") { + family_to_parents[[fams_id]]$mother <- df_temp$id[i] + } + } + } + } + } + return(family_to_parents) +} + +#' Assign momID and dadID based on family mapping +#' +#' This function assigns mother and father IDs to individuals in the data frame +#' based on the mapping of family IDs to parent IDs. +#' +#' @param df_temp A data frame containing individual information. +#' @param family_to_parents A list mapping family IDs to parent IDs. +#' @return A data frame with added momID and dad_ID columns. +#' @keywords internal +mapFAMC2parents.alpha <- function(df_temp, family_to_parents) { + df_temp$momID <- NA_character_ + df_temp$dadID <- NA_character_ + for (i in 1:nrow(df_temp)) { + if (!is.na(df_temp$FAMC[i])) { + famc_ids <- unlist(strsplit(df_temp$FAMC[i], ", ")) + for (famc_id in famc_ids) { + if (!is.null(family_to_parents[[famc_id]])) { + if (!is.null(family_to_parents[[famc_id]]$father)) { + df_temp$dadID[i] <- family_to_parents[[famc_id]]$father + } + if (!is.null(family_to_parents[[famc_id]]$mother)) { + df_temp$momID[i] <- family_to_parents[[famc_id]]$mother + } + } + } + } + } + return(df_temp) +} + +#' collapse Names +#' +#' This function combines the `name_given` and `name_given_pieces` columns in a data frame. +#' +#' @inheritParams readGedcom +#' @param df_temp A data frame containing the columns to be combined. +#' @return A data frame with the combined columns. +collapseNames.alpha <- function(verbose, df_temp) { + if (verbose) message("Combining Duplicate Columns") + + if (!all(is.na(df_temp$name_given_pieces)) | !all(is.na(df_temp$name_given))) { + result <- combine_columns.alpha(df_temp$name_given, df_temp$name_given_pieces) + df_temp$name_given <- result$combined + if (!result$retain_col2) df_temp$name_given_pieces <- NULL + } + + if (!all(is.na(df_temp$name_surn_pieces)) | !all(is.na(df_temp$name_surn))) { + result <- combine_columns.alpha(df_temp$name_surn, df_temp$name_surn_pieces) + df_temp$name_surn <- result$combined + if (!result$retain_col2) df_temp$name_surn_pieces <- NULL + } + return(df_temp) +} + +#' Combine Columns +#' +#' This function combines two columns, handling conflicts and merging non-conflicting data. +#' @param col1 The first column to combine. +#' @param col2 The second column to combine. +#' @return A list with the combined column and a flag indicating if the second column should be retained. +#' @keywords internal +# Helper function to check for conflicts and merge columns +combine_columns.alpha <- function(col1, col2) { + col1_lower <- stringr::str_to_lower(col1) + col2_lower <- stringr::str_to_lower(col2) + conflicts <- !is.na(col1_lower) & !is.na(col2_lower) & col1_lower != col2_lower + if (any(conflicts)) { + warning("Columns have conflicting values. They were not merged.") + return(list(combined = col1, retain_col2 = TRUE)) + } else { + combined <- ifelse(is.na(col1), col2, col1) + return(list(combined = combined, retain_col2 = FALSE)) + } +} + +# --- Exported Aliases --- +#' @rdname readGedcom.alpha +#' @export +readGed.alpha <- readGedcom.alpha +#' @rdname readGedcom.alpha +#' @export +readgedcom.alpha <- readGedcom.alpha diff --git a/man/compute_parent_adjacency.Rd b/man/compute_parent_adjacency.Rd index 9cd4311c..21974673 100644 --- a/man/compute_parent_adjacency.Rd +++ b/man/compute_parent_adjacency.Rd @@ -7,7 +7,7 @@ compute_parent_adjacency( ped, component, - adjacency_method = "indexed", + adjacency_method = "direct", saveable, resume, save_path, diff --git a/man/determineSex.Rd b/man/determineSex.Rd index c98644f6..c1c096af 100644 --- a/man/determineSex.Rd +++ b/man/determineSex.Rd @@ -4,7 +4,7 @@ \alias{determineSex} \title{Determine Sex of Offspring} \usage{ -determineSex(idGen, sexR) +determineSex(idGen, sexR, code_male = "M", code_female = "F") } \arguments{ \item{idGen}{Vector of IDs for the generation.} diff --git a/man/ped2com.Rd b/man/ped2com.Rd index 27f632f5..c47d5982 100644 --- a/man/ped2com.Rd +++ b/man/ped2com.Rd @@ -14,7 +14,7 @@ ped2com( flatten.diag = FALSE, standardize.colnames = TRUE, transpose_method = "tcrossprod", - adjacency_method = "indexed", + adjacency_method = "direct", isChild_method = "classic", saveable = FALSE, resume = FALSE, diff --git a/man/postProcessGedcom.Rd b/man/postProcessGedcom.Rd new file mode 100644 index 00000000..9d0c7b2c --- /dev/null +++ b/man/postProcessGedcom.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcom.R +\name{postProcessGedcom} +\alias{postProcessGedcom} +\title{Post-process GEDCOM Data Frame} +\usage{ +postProcessGedcom( + df_temp, + remove_empty_cols = TRUE, + combine_cols = TRUE, + add_parents = TRUE, + skinny = TRUE, + verbose = FALSE +) +} +\arguments{ +\item{df_temp}{A data frame containing information about individuals.} + +\item{remove_empty_cols}{A logical value indicating whether to remove columns with all missing values.} + +\item{combine_cols}{A logical value indicating whether to combine columns with duplicate values.} + +\item{add_parents}{A logical value indicating whether to add parents to the data frame.} + +\item{skinny}{A logical value indicating whether to return a skinny data frame.} + +\item{verbose}{A logical value indicating whether to print messages.} +} +\value{ +A data frame with processed information. +} +\description{ +Post-process GEDCOM Data Frame +} diff --git a/man/readGedcom.Rd b/man/readGedcom.Rd index 7bab49b1..a54cd5aa 100644 --- a/man/readGedcom.Rd +++ b/man/readGedcom.Rd @@ -14,6 +14,7 @@ readGedcom( combine_cols = TRUE, skinny = FALSE, update_rate = 1000, + post_process = TRUE, ... ) @@ -25,6 +26,7 @@ readGed( combine_cols = TRUE, skinny = FALSE, update_rate = 1000, + post_process = TRUE, ... ) @@ -36,6 +38,7 @@ readgedcom( combine_cols = TRUE, skinny = FALSE, update_rate = 1000, + post_process = TRUE, ... ) } diff --git a/tests/testthat/test-readPedigrees_alpha.R b/tests/testthat/test-readPedigrees_alpha.R new file mode 100644 index 00000000..4a5a4e98 --- /dev/null +++ b/tests/testthat/test-readPedigrees_alpha.R @@ -0,0 +1,231 @@ +test_that("readGedcom.alpha reads and parses a GEDCOM file correctly", { + # Create a temporary GEDCOM file for testing + gedcom_content <- c( + "0 HEAD", + "1 GEDC", + "2 VERS 5.5", + "2 FORM LINEAGE-LINKED", + "1 CHAR UTF-8", + "1 LANG English", + "0 @I1@ INDI", + "1 NAME John /Doe/", + "1 SEX M", + "1 BIRT", + "2 DATE 1 JAN 1900", + "2 PLAC Someplace", + "0 @I2@ INDI", + "1 NAME Jane /Smith/", + "1 SEX F", + "1 BIRT", + "2 DATE 2 FEB 1910", + "2 PLAC Anotherplace", + "1 NCHI 2" + ) + temp_file <- tempfile(fileext = ".ged") + writeLines(gedcom_content, temp_file) + + # Call readGedcom.alpha + df <- readGedcom.alpha(temp_file, verbose = TRUE, skinny = FALSE) + # note to self, the code is not reading in the 2nd person. and is also not reading in the birth date and place + # Check that the data frame has the expected structure + expect_true("id" %in% colnames(df)) + expect_true("name_given" %in% colnames(df)) + expect_true("name_surn" %in% colnames(df)) + expect_true("sex" %in% colnames(df)) + expect_true("birth_date" %in% colnames(df)) + expect_true("birth_place" %in% colnames(df)) + + # Check the contents of the data frame + expect_equal(nrow(df), 2) + expect_equal(df$name_given[1], "John") + expect_equal(df$name_surn[1], "Doe") + expect_equal(df$sex[1], "M") + expect_equal(df$birth_date[1], "1 JAN 1900") + expect_equal(df$birth_place[1], "Someplace") + expect_equal(df$name_given[2], "Jane") + expect_equal(df$name_surn[2], "Smith") + expect_equal(df$sex[2], "F") + expect_equal(df$birth_date[2], "2 FEB 1910") + expect_equal(df$birth_place[2], "Anotherplace") + + # Clean up temporary file + unlink(temp_file) +}) + +test_that("readGedcom.alpha combines duplicate columns correctly", { + # Create a temporary GEDCOM file for testing + gedcom_content <- c( + "0 @I1@ INDI", + "1 NAME John /Doe/", + "1 GIVN John", + "1 SEX M", + "0 @I2@ INDI", + "1 NAME Jane /Smith/", + "1 GIVN Jane", + "1 SEX F" + ) + temp_file <- tempfile(fileext = ".ged") + writeLines(gedcom_content, temp_file) + + # Call readGedcom.alpha with combine_cols = TRUE + df <- readGedcom.alpha(temp_file, verbose = TRUE, combine_cols = TRUE) + + # Check that the data frame has the expected structure + expect_true("name_given" %in% colnames(df)) + expect_false("name_given_pieces" %in% colnames(df)) + + # Check the contents of the data frame + expect_equal(nrow(df), 2) + expect_equal(df$name_given[1], "John") + expect_equal(df$name_given[2], "Jane") + + # Clean up temporary file + unlink(temp_file) +}) + +test_that("readGedcom.alpha removes empty columns correctly", { + # Create a temporary GEDCOM file for testing + gedcom_content <- c( + "0 @I1@ INDI", + "1 NAME John /Doe/", + "1 SEX M" + ) + temp_file <- tempfile(fileext = ".ged") + writeLines(gedcom_content, temp_file) + + # Call readGedcom.alpha with remove_empty_cols = TRUE + df <- readGedcom.alpha(temp_file, verbose = TRUE, remove_empty_cols = TRUE) + + # Check that empty columns are removed + expect_false("birth_date" %in% colnames(df)) + expect_false("birth_place" %in% colnames(df)) + + # Clean up temporary file + unlink(temp_file) +}) + +test_that("readGedcom.alpha handles skinny option correctly", { + # Create a temporary GEDCOM file for testing + gedcom_content <- c( + "0 @I1@ INDI", + "1 NAME John /Doe/", + "1 SEX M", + "1 FAMC @F1@", + "1 FAMS @F2@" + ) + temp_file <- tempfile(fileext = ".ged") + writeLines(gedcom_content, temp_file) + + # Call readGedcom.alpha with skinny = TRUE + df <- readGedcom.alpha(temp_file, verbose = TRUE, skinny = TRUE) + + # Check that FAMC and FAMS columns are removed + expect_false("FAMC" %in% colnames(df)) + expect_false("FAMS" %in% colnames(df)) + + # Clean up temporary file + unlink(temp_file) +}) + +test_that("processParents.alpha adds momID and dadID correctly", { + # Create a data frame for testing + df_temp <- data.frame( + id = c("I1", "I2", "I3"), + sex = c("M", "F", "M"), + FAMS = c("@F1@", "@F1@", NA), + FAMC = c(NA, NA, "@F1@"), + stringsAsFactors = FALSE + ) + + # Call processParents.alpha + df_temp <- processParents.alpha(df_temp, datasource = "gedcom") + + # Check the structure of the data frame + expect_true("momID" %in% colnames(df_temp)) + expect_true("dadID" %in% colnames(df_temp)) + + # Check the contents of the data frame + expect_equal(df_temp$momID[1], NA_character_) + expect_equal(df_temp$dadID[1], NA_character_) + expect_equal(df_temp$momID[2], NA_character_) + expect_equal(df_temp$dadID[2], NA_character_) + expect_equal(df_temp$momID[3], "I2") + expect_equal(df_temp$dadID[3], "I1") + + # Create a more complex data frame for testing + df_temp <- data.frame( + id = c("I1", "I2", "I3", "I4", "I5"), + sex = c("M", "F", "M", "F", "M"), + FAMS = c("@F1@", "@F1@", "@F2@", "@F2@", "@F3@"), + FAMC = c(NA, NA, "@F1@", "@F1@", "@F2@"), + stringsAsFactors = FALSE + ) + + # Call processParents.alpha + df_temp <- processParents.alpha(df_temp, datasource = "gedcom") + + # Check the contents of the data frame + expect_equal(df_temp$momID[3], "I2") + expect_equal(df_temp$dadID[3], "I1") + expect_equal(df_temp$momID[4], "I2") + expect_equal(df_temp$dadID[4], "I1") + expect_equal(df_temp$momID[5], "I4") + expect_equal(df_temp$dadID[5], "I3") +}) + +test_that("if file does not exist, readGedcom.alpha throws an error", { + # Call readGedcom.alpha with a non-existent file + expect_error(readGedcom.alpha("nonexistent.ged")) +}) + +test_that("readGedcom.alpha parses death event correctly", { + # Test that a GEDCOM file with a death event is parsed correctly. + gedcom_content <- c( + "0 @I1@ INDI", + "1 NAME John /Doe/", + "1 SEX M", + "1 DEAT", + "2 DATE 31 DEC 2000", + "2 PLAC Lastplace", + "2 CAUS Old age", + "2 LATI 12.3456", + "2 LONG -65.4321" + ) + temp_file <- tempfile(fileext = ".ged") + writeLines(gedcom_content, temp_file) + + df <- readGedcom.alpha(temp_file, verbose = TRUE) + + expect_true("death_date" %in% colnames(df)) + expect_true("death_place" %in% colnames(df)) + expect_true("death_caus" %in% colnames(df)) + expect_true("death_lat" %in% colnames(df)) + expect_true("death_long" %in% colnames(df)) + + expect_equal(df$death_date[1], "31 DEC 2000") + expect_equal(df$death_place[1], "Lastplace") + expect_equal(df$death_caus[1], "Old age") + expect_equal(df$death_lat[1], "12.3456") + expect_equal(df$death_long[1], "-65.4321") + + unlink(temp_file) +}) + +test_that("readGedcom.alpha handles incomplete individual records gracefully", { + # Test that an individual record missing a NAME line is handled without error. + gedcom_content <- c( + "0 @I1@ INDI", + "1 SEX M" + # No NAME or BIRT information. + ) + temp_file <- tempfile(fileext = ".ged") + writeLines(gedcom_content, temp_file) + + df <- readGedcom.alpha(temp_file, verbose = TRUE) + + # Expect one record with missing name fields. + expect_equal(nrow(df), 1) + expect_true(is.null(df$name[1])) + + unlink(temp_file) +}) From 2541415bb113667cb2b7cedbd0a1e881a2077bfc Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 14 Apr 2025 19:46:46 -0400 Subject: [PATCH 25/35] Update readGedcom.R --- R/readGedcom.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/readGedcom.R b/R/readGedcom.R index f271632c..8d800a66 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -48,7 +48,7 @@ #' - `FAMC`: ID(s) of the family where the individual is a child #' - `FAMS`: ID(s) of the family where the individual is a spouse #' @export -readGedcom <- function(file_path, +readGedcom.legacy <- function(file_path, verbose = FALSE, add_parents = TRUE, remove_empty_cols = TRUE, From 65affa3dd2f09262798d3632cf5b1b591c5f5b04 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 14 Apr 2025 19:51:32 -0400 Subject: [PATCH 26/35] rename --- R/{readGedcom.R => readGedcomlegacy.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{readGedcom.R => readGedcomlegacy.R} (100%) diff --git a/R/readGedcom.R b/R/readGedcomlegacy.R similarity index 100% rename from R/readGedcom.R rename to R/readGedcomlegacy.R From 1ee34bab6060cf75177645a8a75edc9e70082526 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 14 Apr 2025 19:51:58 -0400 Subject: [PATCH 27/35] rename --- R/{readGedcom_alpha.R => readGedcom.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{readGedcom_alpha.R => readGedcom.R} (100%) diff --git a/R/readGedcom_alpha.R b/R/readGedcom.R similarity index 100% rename from R/readGedcom_alpha.R rename to R/readGedcom.R From 294186ba69f2a6c1d3e23e9af4792071ee48d527 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 14 Apr 2025 19:59:28 -0400 Subject: [PATCH 28/35] Update readGedcomlegacy.R --- R/readGedcomlegacy.R | 84 ++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 45 deletions(-) diff --git a/R/readGedcomlegacy.R b/R/readGedcomlegacy.R index 8d800a66..b8697908 100644 --- a/R/readGedcomlegacy.R +++ b/R/readGedcomlegacy.R @@ -47,7 +47,7 @@ #' - `attribute_title`: Title of the individual #' - `FAMC`: ID(s) of the family where the individual is a child #' - `FAMS`: ID(s) of the family where the individual is a spouse -#' @export +#' @internal readGedcom.legacy <- function(file_path, verbose = FALSE, add_parents = TRUE, @@ -69,7 +69,7 @@ readGedcom.legacy <- function(file_path, } # Count the number of rows containing specific patterns - num_rows <- countPatternRows(file) + num_rows <- countPatternRows.legacy(file) # List of variables to initialize var_names <- list( @@ -128,51 +128,51 @@ readGedcom.legacy <- function(file_path, # names if (num_rows$num_name_rows > 0 && grepl(" NAME", tmpv)) { - vars$name <- extract_info(tmpv, "NAME") + vars$name <- extract_info.legacy(tmpv, "NAME") vars$name_given <- stringr::str_extract(vars$name, ".*(?= /)") vars$name_surn <- stringr::str_extract(vars$name, "(?<=/).*(?=/)") vars$name <- stringr::str_squish(stringr::str_replace(vars$name, "/", " ")) next } # PERSONAL_NAME_PIECES := NAME | NPFX | GIVN | NICK | SPFX | SURN | NSFX - result <- process_tag("GIVN", "name_given_pieces", num_rows, tmpv, vars) + result <- process_tag.legacy("GIVN", "name_given_pieces", num_rows, tmpv, vars) vars <- result$vars if (result$matched) next # npfx := Name Prefix - result <- process_tag("NPFX", "name_npfx", num_rows, tmpv, vars) + result <- process_tag.legacy("NPFX", "name_npfx", num_rows, tmpv, vars) vars <- result$vars if (result$matched) next # NICK := Nickname - result <- process_tag("NICK", "name_nick", num_rows, tmpv, vars) + result <- process_tag.legacy("NICK", "name_nick", num_rows, tmpv, vars) vars <- result$vars if (result$matched) next # surn := Surname - result <- process_tag("SURN", "name_surn_pieces", num_rows, tmpv, vars) + result <- process_tag.legacy("SURN", "name_surn_pieces", num_rows, tmpv, vars) vars <- result$vars if (result$matched) next # nsfx := Name suffix - result <- process_tag("NSFX", "name_nsfx", num_rows, tmpv, vars) + result <- process_tag.legacy("NSFX", "name_nsfx", num_rows, tmpv, vars) vars <- result$vars if (result$matched) next - result <- process_tag("_MARNM", "name_marriedsurn", num_rows, tmpv, vars) + result <- process_tag.legacy("_MARNM", "name_marriedsurn", num_rows, tmpv, vars) vars <- result$vars if (result$matched) next # Birth event related information if (num_rows$num_birt_rows > 0 && grepl(" BIRT", tmpv)) { if (num_rows$num_date_rows > 0 && i + 1 <= file_length) { - vars$birth_date <- extract_info(file[1][[1]][[i + 1]], "DATE") + vars$birth_date <- extract_info.legacy(file[1][[1]][[i + 1]], "DATE") if (num_rows$num_plac_rows > 0 && i + 2 <= file_length) { - vars$birth_place <- extract_info(file[1][[1]][[i + 2]], "PLAC") + vars$birth_place <- extract_info.legacy(file[1][[1]][[i + 2]], "PLAC") if (num_rows$num_lati_rows > 0 && i + 4 <= file_length) { - vars$birth_lat <- extract_info(file[1][[1]][[i + 4]], "LATI") + vars$birth_lat <- extract_info.legacy(file[1][[1]][[i + 4]], "LATI") if (num_rows$num_long_rows > 0 && i + 5 <= file_length) { - vars$birth_long <- extract_info(file[1][[1]][[i + 5]], "LONG") + vars$birth_long <- extract_info.legacy(file[1][[1]][[i + 5]], "LONG") } } } @@ -184,15 +184,15 @@ readGedcom.legacy <- function(file_path, # the ifs are nested so that there is no need to check if you've already run out of if (num_rows$num_deat_rows > 0 && grepl(" DEAT", tmpv)) { if (num_rows$num_date_rows > 0 && i + 1 <= file_length) { - vars$death_date <- extract_info(file[1][[1]][[i + 1]], "DATE") + vars$death_date <- extract_info.legacy(file[1][[1]][[i + 1]], "DATE") if (num_rows$num_plac_rows > 0 && i + 2 <= file_length) { - vars$death_place <- extract_info(file[1][[1]][[i + 2]], "PLAC") + vars$death_place <- extract_info.legacy(file[1][[1]][[i + 2]], "PLAC") if (num_rows$num_caus_rows > 0 && i + 3 <= file_length) { - vars$death_caus <- extract_info(file[1][[1]][[i + 3]], "CAUS") + vars$death_caus <- extract_info.legacy(file[1][[1]][[i + 3]], "CAUS") if (num_rows$num_lati_rows > 0 && i + 4 <= file_length) { - vars$death_lat <- extract_info(file[1][[1]][[i + 4]], "LATI") + vars$death_lat <- extract_info.legacy(file[1][[1]][[i + 4]], "LATI") if (num_rows$num_long_rows > 0 && i + 5 <= file_length) { - vars$death_long <- extract_info(file[1][[1]][[i + 5]], "LONG") + vars$death_long <- extract_info.legacy(file[1][[1]][[i + 5]], "LONG") } } } @@ -258,7 +258,7 @@ readGedcom.legacy <- function(file_path, # g7:INDI-TITL A formal designation used by an individual in connection with positions of royalty or other social status, such as Grand Duke. c("TITL", "attribute_title") )) { - result <- process_tag(tag_field[1], tag_field[2], num_rows, tmpv, vars) + result <- process_tag.legacy(tag_field[1], tag_field[2], num_rows, tmpv, vars) vars <- result$vars if (result$matched) next } @@ -266,7 +266,7 @@ readGedcom.legacy <- function(file_path, # relationship data # g7:INDI-FAMC ## The family in which an individual appears as a child. It is also used with a g7:FAMC-STAT substructure to show individuals who are not children of the family. See FAMILY_RECORD for more details. - result <- process_tag("FAMC", "FAMC", num_rows, tmpv, vars, + result <- process_tag.legacy("FAMC", "FAMC", num_rows, tmpv, vars, extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)"), mode = "append" ) @@ -275,7 +275,7 @@ readGedcom.legacy <- function(file_path, # FAMS (Family spouse) g7:FAMS # The family in which an individual appears as a partner. See FAMILY_RECORD for more details. - result <- process_tag("FAMS", "FAMS", num_rows, tmpv, vars, + result <- process_tag.legacy("FAMS", "FAMS", num_rows, tmpv, vars, extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)"), mode = "append" ) @@ -307,7 +307,7 @@ readGedcom.legacy <- function(file_path, print("Post-processing data frame") } # Remove the first row (empty) -df_temp <- postProcessGedcom( +df_temp <- postProcessGedcom.legacy( df_temp = df_temp, remove_empty_cols = remove_empty_cols, combine_cols = combine_cols, @@ -323,11 +323,11 @@ df_temp <- postProcessGedcom( #' Post-process GEDCOM Data Frame #' -#' @inheritParams readGedcom -#' @inheritParams mapFAMS2parents +#' @inheritParams readGedcom.legacy +#' @inheritParams mapFAMS2parents.legacy #' @return A data frame with processed information. -postProcessGedcom <- function(df_temp, +postProcessGedcom.legacy <- function(df_temp, remove_empty_cols = TRUE, combine_cols = TRUE, add_parents = TRUE, @@ -339,11 +339,11 @@ postProcessGedcom <- function(df_temp, if (verbose) { print("Processing parents") } - df_temp <- processParents(df_temp, datasource = "gedcom") + df_temp <- processParents.legacy(df_temp, datasource = "gedcom") } if (combine_cols) { - df_temp <- collapseNames(verbose = verbose, df_temp = df_temp) + df_temp <- collapseNames.legacy(verbose = verbose, df_temp = df_temp) } if (remove_empty_cols) { @@ -374,7 +374,7 @@ return(df_temp) #' @return A list mapping family IDs to parent IDs. #' @keywords internal #' -mapFAMS2parents <- function(df_temp) { +mapFAMS2parents.legacy <- function(df_temp) { if (!all(c("FAMS", "sex") %in% colnames(df_temp))) { warning("The data frame does not contain the necessary columns (FAMS, sex)") return(NULL) @@ -413,7 +413,7 @@ mapFAMS2parents <- function(df_temp) { #' @param family_to_parents A list mapping family IDs to parent IDs. #' @return A data frame with added momID and dad_ID columns. #' @keywords internal -mapFAMC2parents <- function(df_temp, family_to_parents) { +mapFAMC2parents.legacy <- function(df_temp, family_to_parents) { df_temp$momID <- NA_character_ df_temp$dadID <- NA_character_ for (i in 1:nrow(df_temp)) { @@ -441,7 +441,7 @@ mapFAMC2parents <- function(df_temp, family_to_parents) { #' @param df_temp A data frame containing information about individuals. #' @return A data frame with added momID and dadID columns. #' @keywords internal -processParents <- function(df_temp, datasource) { +processParents.legacy <- function(df_temp, datasource) { # Ensure required columns are present if (datasource == "gedcom") { required_cols <- c("FAMC", "sex", "FAMS") @@ -457,11 +457,11 @@ processParents <- function(df_temp, datasource) { return(df_temp) } - family_to_parents <- mapFAMS2parents(df_temp) + family_to_parents <- mapFAMS2parents.legacy(df_temp) if (is.null(family_to_parents) || length(family_to_parents) == 0) { return(df_temp) } - df_temp <- mapFAMC2parents(df_temp, family_to_parents) + df_temp <- mapFAMC2parents.legacy(df_temp, family_to_parents) return(df_temp) } @@ -474,7 +474,7 @@ processParents <- function(df_temp, datasource) { #' @param type A character string representing the type of information to extract. #' @return A character string with the extracted information. #' @keywords internal -extract_info <- function(line, type) { +extract_info.legacy <- function(line, type) { stringr::str_squish(stringr::str_extract(line, paste0("(?<=", type, " ).+"))) } @@ -509,7 +509,7 @@ combine_columns <- function(col1, col2) { #' @return A list with the number of rows containing each pattern. #' @keywords internal #' -countPatternRows <- function(file) { +countPatternRows.legacy <- function(file) { # Count the number of rows containing specific patterns pattern_counts <- sapply( c( @@ -569,14 +569,14 @@ countPatternRows <- function(file) { #' @return A list with updated `vars` and a `matched` flag. #' @keywords internal #' -process_tag <- function(tag, field_name, pattern_rows, line, vars, +process_tag.legacy <- function(tag, field_name, pattern_rows, line, vars, extractor = NULL, mode = "replace") { count_name <- paste0("num_", tolower(tag), "_rows") matched <- FALSE if (!is.null(pattern_rows[[count_name]]) && pattern_rows[[count_name]] > 0 && grepl(paste0(" ", tag), line)) { - value <- if (is.null(extractor)) extract_info(line, tag) else extractor(line) + value <- if (is.null(extractor)) extract_info.legacy(line, tag) else extractor(line) if (mode == "append" && !is.na(vars[[field_name]])) { vars[[field_name]] <- paste0(vars[[field_name]], ", ", value) @@ -593,10 +593,10 @@ process_tag <- function(tag, field_name, pattern_rows, line, vars, #' #' This function combines the `name_given` and `name_given_pieces` columns in a data frame. #' -#' @inheritParams readGedcom +#' @inheritParams readGedcom.legacy #' @param df_temp A data frame containing the columns to be combined. - -collapseNames <- function(verbose, df_temp) { +#' @keywords internal +collapseNames.legacy <- function(verbose, df_temp) { if (verbose) { print("Combining Duplicate Columns") } @@ -622,10 +622,4 @@ collapseNames <- function(verbose, df_temp) { } return(df_temp) } -#' @rdname readGedcom -#' @export -readGed <- readGedcom -#' @rdname readGedcom -#' @export -readgedcom <- readGedcom From 8244984df4aead2370965479be4bc3ea7afedf7f Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 14 Apr 2025 20:05:14 -0400 Subject: [PATCH 29/35] updating tests --- tests/testthat/test-readPedigrees.R | 73 ++++--- tests/testthat/test-readPedigrees_alpha.R | 245 +++------------------- 2 files changed, 70 insertions(+), 248 deletions(-) diff --git a/tests/testthat/test-readPedigrees.R b/tests/testthat/test-readPedigrees.R index a6d6bdd0..b48bf74a 100644 --- a/tests/testthat/test-readPedigrees.R +++ b/tests/testthat/test-readPedigrees.R @@ -180,44 +180,55 @@ test_that("if file does not exist, readGedcom throws an error", { -# readWikifamilytree - -test_that("readWikifamilytree reads a string correctly", { - # Create a temporary WikiFamilyTree file for testing - # Example usage - family_tree_text <- "{{familytree/start |summary=I have a brother Joe and a little sister: my mom married my dad, and my dad's parents were Grandma and Grandpa; they had another child, Aunt Daisy.}} -{{familytree | | | | GMa |~|y|~| GPa | | GMa=Gladys|GPa=Sydney}} -{{familytree | | | | | | | |)|-|-|-|.| }} -{{familytree | | | MOM |y| DAD | |DAISY| MOM=Mom|DAD=Dad|DAISY=[[Daisy Duke]]}} -{{familytree | |,|-|-|-|+|-|-|-|.| | | }} -{{familytree | JOE | | ME | | SIS | | | JOE=My brother Joe|ME='''Me!'''|SIS=My little sister}} -{{familytree/end}}" +test_that("readGedcom parses death event correctly", { + # Test that a GEDCOM file with a death event is parsed correctly. + gedcom_content <- c( + "0 @I1@ INDI", + "1 NAME John /Doe/", + "1 SEX M", + "1 DEAT", + "2 DATE 31 DEC 2000", + "2 PLAC Lastplace", + "2 CAUS Old age", + "2 LATI 12.3456", + "2 LONG -65.4321" + ) + temp_file <- tempfile(fileext = ".ged") + writeLines(gedcom_content, temp_file) - temp_file <- tempfile(fileext = ".txt") - writeLines(family_tree_text, temp_file) + df <- readGedcom(temp_file, verbose = TRUE) + expect_true("death_date" %in% colnames(df)) + expect_true("death_place" %in% colnames(df)) + expect_true("death_caus" %in% colnames(df)) + expect_true("death_lat" %in% colnames(df)) + expect_true("death_long" %in% colnames(df)) - result <- readWikifamilytree(text = family_tree_text) - result2 <- readWikifamilytree(file_path = temp_file) + expect_equal(df$death_date[1], "31 DEC 2000") + expect_equal(df$death_place[1], "Lastplace") + expect_equal(df$death_caus[1], "Old age") + expect_equal(df$death_lat[1], "12.3456") + expect_equal(df$death_long[1], "-65.4321") - expect_equal( - result$summary, - "I have a brother Joe and a little sister: my mom married my dad, and my dad's parents were Grandma and Grandpa; they had another child, Aunt Daisy." - ) + unlink(temp_file) +}) - expect_equal( - result2$summary, - "I have a brother Joe and a little sister: my mom married my dad, and my dad's parents were Grandma and Grandpa; they had another child, Aunt Daisy." +test_that("readGedcom handles incomplete individual records gracefully", { + # Test that an individual record missing a NAME line is handled without error. + gedcom_content <- c( + "0 @I1@ INDI", + "1 SEX M" + # No NAME or BIRT information. ) -}) + temp_file <- tempfile(fileext = ".ged") + writeLines(gedcom_content, temp_file) + df <- readGedcom(temp_file, verbose = TRUE) -# read E:/Dropbox/Lab/Research/Projects/2024/BGMiscJoss/BGmisc_main/data-raw/Targaryen tree Dance.txt + # Expect one record with missing name fields. + expect_equal(nrow(df), 1) + expect_true(is.null(df$name[1])) -# test_that("readWikifamilytree reads a file correctly", { -# Create a temporary WikiFamilyTree file for testing -# Example usage -# family_tree_file_path <- "data-raw/Targaryen tree Dance.txt" # system.file("extdata", "Targaryen tree Dance.txt", package = "BGmisc") + unlink(temp_file) +}) -# result <- readWikifamilytree(file_path=family_tree_file_path) -# }) diff --git a/tests/testthat/test-readPedigrees_alpha.R b/tests/testthat/test-readPedigrees_alpha.R index 4a5a4e98..d73e5810 100644 --- a/tests/testthat/test-readPedigrees_alpha.R +++ b/tests/testthat/test-readPedigrees_alpha.R @@ -1,231 +1,42 @@ -test_that("readGedcom.alpha reads and parses a GEDCOM file correctly", { - # Create a temporary GEDCOM file for testing - gedcom_content <- c( - "0 HEAD", - "1 GEDC", - "2 VERS 5.5", - "2 FORM LINEAGE-LINKED", - "1 CHAR UTF-8", - "1 LANG English", - "0 @I1@ INDI", - "1 NAME John /Doe/", - "1 SEX M", - "1 BIRT", - "2 DATE 1 JAN 1900", - "2 PLAC Someplace", - "0 @I2@ INDI", - "1 NAME Jane /Smith/", - "1 SEX F", - "1 BIRT", - "2 DATE 2 FEB 1910", - "2 PLAC Anotherplace", - "1 NCHI 2" - ) - temp_file <- tempfile(fileext = ".ged") - writeLines(gedcom_content, temp_file) - - # Call readGedcom.alpha - df <- readGedcom.alpha(temp_file, verbose = TRUE, skinny = FALSE) - # note to self, the code is not reading in the 2nd person. and is also not reading in the birth date and place - # Check that the data frame has the expected structure - expect_true("id" %in% colnames(df)) - expect_true("name_given" %in% colnames(df)) - expect_true("name_surn" %in% colnames(df)) - expect_true("sex" %in% colnames(df)) - expect_true("birth_date" %in% colnames(df)) - expect_true("birth_place" %in% colnames(df)) - # Check the contents of the data frame - expect_equal(nrow(df), 2) - expect_equal(df$name_given[1], "John") - expect_equal(df$name_surn[1], "Doe") - expect_equal(df$sex[1], "M") - expect_equal(df$birth_date[1], "1 JAN 1900") - expect_equal(df$birth_place[1], "Someplace") - expect_equal(df$name_given[2], "Jane") - expect_equal(df$name_surn[2], "Smith") - expect_equal(df$sex[2], "F") - expect_equal(df$birth_date[2], "2 FEB 1910") - expect_equal(df$birth_place[2], "Anotherplace") +# readWikifamilytree - # Clean up temporary file - unlink(temp_file) -}) - -test_that("readGedcom.alpha combines duplicate columns correctly", { - # Create a temporary GEDCOM file for testing - gedcom_content <- c( - "0 @I1@ INDI", - "1 NAME John /Doe/", - "1 GIVN John", - "1 SEX M", - "0 @I2@ INDI", - "1 NAME Jane /Smith/", - "1 GIVN Jane", - "1 SEX F" - ) - temp_file <- tempfile(fileext = ".ged") - writeLines(gedcom_content, temp_file) +test_that("readWikifamilytree reads a string correctly", { + # Create a temporary WikiFamilyTree file for testing + # Example usage + family_tree_text <- "{{familytree/start |summary=I have a brother Joe and a little sister: my mom married my dad, and my dad's parents were Grandma and Grandpa; they had another child, Aunt Daisy.}} +{{familytree | | | | GMa |~|y|~| GPa | | GMa=Gladys|GPa=Sydney}} +{{familytree | | | | | | | |)|-|-|-|.| }} +{{familytree | | | MOM |y| DAD | |DAISY| MOM=Mom|DAD=Dad|DAISY=[[Daisy Duke]]}} +{{familytree | |,|-|-|-|+|-|-|-|.| | | }} +{{familytree | JOE | | ME | | SIS | | | JOE=My brother Joe|ME='''Me!'''|SIS=My little sister}} +{{familytree/end}}" - # Call readGedcom.alpha with combine_cols = TRUE - df <- readGedcom.alpha(temp_file, verbose = TRUE, combine_cols = TRUE) + temp_file <- tempfile(fileext = ".txt") + writeLines(family_tree_text, temp_file) - # Check that the data frame has the expected structure - expect_true("name_given" %in% colnames(df)) - expect_false("name_given_pieces" %in% colnames(df)) - # Check the contents of the data frame - expect_equal(nrow(df), 2) - expect_equal(df$name_given[1], "John") - expect_equal(df$name_given[2], "Jane") + result <- readWikifamilytree(text = family_tree_text) + result2 <- readWikifamilytree(file_path = temp_file) - # Clean up temporary file - unlink(temp_file) -}) - -test_that("readGedcom.alpha removes empty columns correctly", { - # Create a temporary GEDCOM file for testing - gedcom_content <- c( - "0 @I1@ INDI", - "1 NAME John /Doe/", - "1 SEX M" + expect_equal( + result$summary, + "I have a brother Joe and a little sister: my mom married my dad, and my dad's parents were Grandma and Grandpa; they had another child, Aunt Daisy." ) - temp_file <- tempfile(fileext = ".ged") - writeLines(gedcom_content, temp_file) - - # Call readGedcom.alpha with remove_empty_cols = TRUE - df <- readGedcom.alpha(temp_file, verbose = TRUE, remove_empty_cols = TRUE) - - # Check that empty columns are removed - expect_false("birth_date" %in% colnames(df)) - expect_false("birth_place" %in% colnames(df)) - # Clean up temporary file - unlink(temp_file) -}) - -test_that("readGedcom.alpha handles skinny option correctly", { - # Create a temporary GEDCOM file for testing - gedcom_content <- c( - "0 @I1@ INDI", - "1 NAME John /Doe/", - "1 SEX M", - "1 FAMC @F1@", - "1 FAMS @F2@" + expect_equal( + result2$summary, + "I have a brother Joe and a little sister: my mom married my dad, and my dad's parents were Grandma and Grandpa; they had another child, Aunt Daisy." ) - temp_file <- tempfile(fileext = ".ged") - writeLines(gedcom_content, temp_file) - - # Call readGedcom.alpha with skinny = TRUE - df <- readGedcom.alpha(temp_file, verbose = TRUE, skinny = TRUE) - - # Check that FAMC and FAMS columns are removed - expect_false("FAMC" %in% colnames(df)) - expect_false("FAMS" %in% colnames(df)) - - # Clean up temporary file - unlink(temp_file) }) -test_that("processParents.alpha adds momID and dadID correctly", { - # Create a data frame for testing - df_temp <- data.frame( - id = c("I1", "I2", "I3"), - sex = c("M", "F", "M"), - FAMS = c("@F1@", "@F1@", NA), - FAMC = c(NA, NA, "@F1@"), - stringsAsFactors = FALSE - ) - - # Call processParents.alpha - df_temp <- processParents.alpha(df_temp, datasource = "gedcom") - # Check the structure of the data frame - expect_true("momID" %in% colnames(df_temp)) - expect_true("dadID" %in% colnames(df_temp)) +# read E:/Dropbox/Lab/Research/Projects/2024/BGMiscJoss/BGmisc_main/data-raw/Targaryen tree Dance.txt - # Check the contents of the data frame - expect_equal(df_temp$momID[1], NA_character_) - expect_equal(df_temp$dadID[1], NA_character_) - expect_equal(df_temp$momID[2], NA_character_) - expect_equal(df_temp$dadID[2], NA_character_) - expect_equal(df_temp$momID[3], "I2") - expect_equal(df_temp$dadID[3], "I1") - - # Create a more complex data frame for testing - df_temp <- data.frame( - id = c("I1", "I2", "I3", "I4", "I5"), - sex = c("M", "F", "M", "F", "M"), - FAMS = c("@F1@", "@F1@", "@F2@", "@F2@", "@F3@"), - FAMC = c(NA, NA, "@F1@", "@F1@", "@F2@"), - stringsAsFactors = FALSE - ) +# test_that("readWikifamilytree reads a file correctly", { +# Create a temporary WikiFamilyTree file for testing +# Example usage +# family_tree_file_path <- "data-raw/Targaryen tree Dance.txt" # system.file("extdata", "Targaryen tree Dance.txt", package = "BGmisc") - # Call processParents.alpha - df_temp <- processParents.alpha(df_temp, datasource = "gedcom") - - # Check the contents of the data frame - expect_equal(df_temp$momID[3], "I2") - expect_equal(df_temp$dadID[3], "I1") - expect_equal(df_temp$momID[4], "I2") - expect_equal(df_temp$dadID[4], "I1") - expect_equal(df_temp$momID[5], "I4") - expect_equal(df_temp$dadID[5], "I3") -}) - -test_that("if file does not exist, readGedcom.alpha throws an error", { - # Call readGedcom.alpha with a non-existent file - expect_error(readGedcom.alpha("nonexistent.ged")) -}) - -test_that("readGedcom.alpha parses death event correctly", { - # Test that a GEDCOM file with a death event is parsed correctly. - gedcom_content <- c( - "0 @I1@ INDI", - "1 NAME John /Doe/", - "1 SEX M", - "1 DEAT", - "2 DATE 31 DEC 2000", - "2 PLAC Lastplace", - "2 CAUS Old age", - "2 LATI 12.3456", - "2 LONG -65.4321" - ) - temp_file <- tempfile(fileext = ".ged") - writeLines(gedcom_content, temp_file) - - df <- readGedcom.alpha(temp_file, verbose = TRUE) - - expect_true("death_date" %in% colnames(df)) - expect_true("death_place" %in% colnames(df)) - expect_true("death_caus" %in% colnames(df)) - expect_true("death_lat" %in% colnames(df)) - expect_true("death_long" %in% colnames(df)) - - expect_equal(df$death_date[1], "31 DEC 2000") - expect_equal(df$death_place[1], "Lastplace") - expect_equal(df$death_caus[1], "Old age") - expect_equal(df$death_lat[1], "12.3456") - expect_equal(df$death_long[1], "-65.4321") - - unlink(temp_file) -}) - -test_that("readGedcom.alpha handles incomplete individual records gracefully", { - # Test that an individual record missing a NAME line is handled without error. - gedcom_content <- c( - "0 @I1@ INDI", - "1 SEX M" - # No NAME or BIRT information. - ) - temp_file <- tempfile(fileext = ".ged") - writeLines(gedcom_content, temp_file) - - df <- readGedcom.alpha(temp_file, verbose = TRUE) - - # Expect one record with missing name fields. - expect_equal(nrow(df), 1) - expect_true(is.null(df$name[1])) - - unlink(temp_file) -}) +# result <- readWikifamilytree(file_path=family_tree_file_path) +# }) From ff6947316bc14253ec2b2633904745f718058092 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 14 Apr 2025 20:07:02 -0400 Subject: [PATCH 30/35] rename --- R/readGedcom.R | 98 +++++++++--------- R/readGedcomlegacy.R | 2 +- data-raw/benchged.R | 31 ++++++ data/royal92.rda | Bin 69068 -> 67612 bytes man/applyTagMappings.Rd | 27 +++++ man/collapseNames.Rd | 3 + man/collapseNames.legacy.Rd | 17 +++ man/combine_columns.Rd | 8 +- man/countPatternRows.Rd | 10 +- man/countPatternRows.legacy.Rd | 18 ++++ man/extract_info.legacy.Rd | 20 ++++ man/initializeRecord.Rd | 17 +++ man/mapFAMC2parents.legacy.Rd | 21 ++++ man/mapFAMS2parents.Rd | 10 +- man/mapFAMS2parents.legacy.Rd | 18 ++++ man/parseIndividualBlock.Rd | 24 +++++ man/parseNameLine.Rd | 19 ++++ man/postProcessGedcom.Rd | 17 +-- man/postProcessGedcom.legacy.Rd | 34 ++++++ man/processEventLine.Rd | 27 +++++ man/processParents.Rd | 11 +- man/processParents.legacy.Rd | 18 ++++ man/process_tag.legacy.Rd | 35 +++++++ man/readGedcom.Rd | 1 - man/readGedcom.legacy.Rd | 78 ++++++++++++++ man/splitIndividuals.Rd | 20 ++++ ...dPedigrees_alpha.R => test-readWikiTree.R} | 0 27 files changed, 509 insertions(+), 75 deletions(-) create mode 100644 data-raw/benchged.R create mode 100644 man/applyTagMappings.Rd create mode 100644 man/collapseNames.legacy.Rd create mode 100644 man/countPatternRows.legacy.Rd create mode 100644 man/extract_info.legacy.Rd create mode 100644 man/initializeRecord.Rd create mode 100644 man/mapFAMC2parents.legacy.Rd create mode 100644 man/mapFAMS2parents.legacy.Rd create mode 100644 man/parseIndividualBlock.Rd create mode 100644 man/parseNameLine.Rd create mode 100644 man/postProcessGedcom.legacy.Rd create mode 100644 man/processEventLine.Rd create mode 100644 man/processParents.legacy.Rd create mode 100644 man/process_tag.legacy.Rd create mode 100644 man/readGedcom.legacy.Rd create mode 100644 man/splitIndividuals.Rd rename tests/testthat/{test-readPedigrees_alpha.R => test-readWikiTree.R} (100%) diff --git a/R/readGedcom.R b/R/readGedcom.R index 3cde78dc..109377e1 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -47,7 +47,7 @@ #' - `FAMC`: ID(s) of the family where the individual is a child #' - `FAMS`: ID(s) of the family where the individual is a spouse #' @export -readGedcom.alpha <- function(file_path, +readGedcom <- function(file_path, verbose = FALSE, add_parents = TRUE, remove_empty_cols = TRUE, @@ -67,7 +67,7 @@ readGedcom.alpha <- function(file_path, if (verbose) message("File is ", total_lines, " lines long") # Count pattern occurrences (pattern_rows remains used in subfunctions) - pattern_rows <- countPatternRows.alpha(data.frame(X1 = lines)) + pattern_rows <- countPatternRows(data.frame(X1 = lines)) # List of variables to initialize all_var_names <- unlist(list( @@ -85,10 +85,10 @@ readGedcom.alpha <- function(file_path, ), use.names = FALSE) # Split the file into blocks; each block corresponds to one individual. - blocks <- splitIndividuals.alpha(lines, verbose) + blocks <- splitIndividuals(lines, verbose) # Parse each individual block into a record (a named list) - records <- lapply(blocks, parseIndividualBlock.alpha, + records <- lapply(blocks, parseIndividualBlock, pattern_rows = pattern_rows, all_var_names = all_var_names, verbose = verbose) @@ -110,7 +110,7 @@ readGedcom.alpha <- function(file_path, # Run post-processing if requested. if (post_process) { if (verbose) message("Post-processing data frame") - df_temp <- postProcessGedcom.alpha( + df_temp <- postProcessGedcom( df_temp = df_temp, remove_empty_cols = remove_empty_cols, combine_cols = combine_cols, @@ -132,7 +132,7 @@ readGedcom.alpha <- function(file_path, #' @param lines A character vector of lines from the GEDCOM file. #' @param verbose Logical indicating whether to output progress messages. #' @return A list of character vectors, each representing one individual. -splitIndividuals.alpha <- function(lines, verbose = FALSE) { +splitIndividuals <- function(lines, verbose = FALSE) { indi_idx <- grep("@ INDI", lines) if (length(indi_idx) == 0) return(list()) @@ -153,7 +153,7 @@ splitIndividuals.alpha <- function(lines, verbose = FALSE) { #' #' @param all_var_names A character vector of variable names. #' @return A named list representing an empty individual record. -initializeRecord.alpha <- function(all_var_names) { +initializeRecord <- function(all_var_names) { setNames(as.list(rep(NA_character_, length(all_var_names))), all_var_names) } @@ -167,8 +167,8 @@ initializeRecord.alpha <- function(all_var_names) { #' @param verbose Logical indicating whether to print progress messages. #' @return A named list representing the parsed record for the individual, or NULL if no ID is found. #' @keywords internal -parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbose = FALSE) { - record <- initializeRecord.alpha(all_var_names) +parseIndividualBlock <- function(block, pattern_rows, all_var_names, verbose = FALSE) { + record <- initializeRecord(all_var_names) n_lines <- length(block) # Loop through the block by index so that we can look ahead for event details. @@ -185,19 +185,19 @@ parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbo # Special processing for full name using " NAME" tag. if (grepl(" NAME", line) && pattern_rows$num_name_rows > 0) { - record <- parseNameLine.alpha(line, record) + record <- parseNameLine(line, record) i <- i + 1 next } # Process birth and death events by consuming multiple lines. if (grepl(" BIRT", line) && pattern_rows$num_birt_rows > 0) { - record <- processEventLine.alpha("birth", block, i, record, pattern_rows) + record <- processEventLine("birth", block, i, record, pattern_rows) i <- i + 1 # Skip further processing of this line. next } if (grepl(" DEAT", line) && pattern_rows$num_deat_rows > 0) { - record <- processEventLine.alpha("death", block, i, record, pattern_rows) + record <- processEventLine("death", block, i, record, pattern_rows) i <- i + 1 next } @@ -212,7 +212,7 @@ parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbo list(tag = "NSFX", field = "name_nsfx", mode = "replace"), list(tag = "_MARNM", field = "name_marriedsurn", mode = "replace") ) - out <- applyTagMappings.alpha(line, record, pattern_rows, name_piece_mappings) + out <- applyTagMappings(line, record, pattern_rows, name_piece_mappings) if (out$matched) { record <- out$record i <- i + 1 next } @@ -234,7 +234,7 @@ parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbo list(tag = "SSN", field = "attribute_ssn", mode = "replace"), list(tag = "TITL", field = "attribute_title", mode = "replace") ) - out <- applyTagMappings.alpha(line, record, pattern_rows, attribute_mappings) + out <- applyTagMappings(line, record, pattern_rows, attribute_mappings) if (out$matched) { record <- out$record i <- i + 1 next } @@ -246,7 +246,7 @@ parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbo list(tag = "FAMS", field = "FAMS", mode = "append", extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)")) ) - out <- applyTagMappings.alpha(line, record, pattern_rows, relationship_mappings) + out <- applyTagMappings(line, record, pattern_rows, relationship_mappings) if (out$matched) { record <- out$record i <- i + 1 next } @@ -267,8 +267,8 @@ parseIndividualBlock.alpha <- function(block, pattern_rows, all_var_names, verbo #' @param line A character string containing the name line. #' @param record A named list representing the individual's record. #' @return The updated record with parsed name information. -parseNameLine.alpha <- function(line, record) { - record$name <- extract_info.alpha(line, "NAME") +parseNameLine <- function(line, record) { + record$name <- extract_info(line, "NAME") record$name_given <- stringr::str_extract(record$name, ".*(?= /)") record$name_surn <- stringr::str_extract(record$name, "(?<=/).*(?=/)") record$name <- stringr::str_squish(stringr::str_replace(record$name, "/", " ")) @@ -287,19 +287,19 @@ parseNameLine.alpha <- function(line, record) { #' @param pattern_rows A list with counts of GEDCOM tag occurrences. #' @return The updated record with parsed event information.# # For "death": expect DATE on line i+1, PLAC on i+2, CAUS on i+3, LATI on i+4, LONG on i+5. -processEventLine.alpha <- function(event, block, i, record, pattern_rows) { +processEventLine <- function(event, block, i, record, pattern_rows) { n_lines <- length(block) if (event == "birth") { - if (i + 1 <= n_lines) record$birth_date <- extract_info.alpha(block[i+1], "DATE") - if (i + 2 <= n_lines) record$birth_place <- extract_info.alpha(block[i+2], "PLAC") - if (i + 4 <= n_lines) record$birth_lat <- extract_info.alpha(block[i+4], "LATI") - if (i + 5 <= n_lines) record$birth_long <- extract_info.alpha(block[i+5], "LONG") + if (i + 1 <= n_lines) record$birth_date <- extract_info(block[i+1], "DATE") + if (i + 2 <= n_lines) record$birth_place <- extract_info(block[i+2], "PLAC") + if (i + 4 <= n_lines) record$birth_lat <- extract_info(block[i+4], "LATI") + if (i + 5 <= n_lines) record$birth_long <- extract_info(block[i+5], "LONG") } else if (event == "death") { - if (i + 1 <= n_lines) record$death_date <- extract_info.alpha(block[i+1], "DATE") - if (i + 2 <= n_lines) record$death_place <- extract_info.alpha(block[i+2], "PLAC") - if (i + 3 <= n_lines) record$death_caus <- extract_info.alpha(block[i+3], "CAUS") - if (i + 4 <= n_lines) record$death_lat <- extract_info.alpha(block[i+4], "LATI") - if (i + 5 <= n_lines) record$death_long <- extract_info.alpha(block[i+5], "LONG") + if (i + 1 <= n_lines) record$death_date <- extract_info(block[i+1], "DATE") + if (i + 2 <= n_lines) record$death_place <- extract_info(block[i+2], "PLAC") + if (i + 3 <= n_lines) record$death_caus <- extract_info(block[i+3], "CAUS") + if (i + 4 <= n_lines) record$death_lat <- extract_info(block[i+4], "LATI") + if (i + 5 <= n_lines) record$death_long <- extract_info(block[i+5], "LONG") } return(record) } @@ -317,10 +317,10 @@ processEventLine.alpha <- function(event, block, i, record, pattern_rows) { #' - \code{mode}: either "replace" or "append", #' - \code{extractor}: (optional) a custom extraction function. #' @return A list with the updated record (\code{record}) and a logical flag (\code{matched}). -applyTagMappings.alpha <- function(line, record, pattern_rows, tag_mappings) { +applyTagMappings <- function(line, record, pattern_rows, tag_mappings) { for (mapping in tag_mappings) { extractor <- if (is.null(mapping$extractor)) NULL else mapping$extractor - result <- process_tag.alpha(mapping$tag, mapping$field, pattern_rows, line, record, + result <- process_tag(mapping$tag, mapping$field, pattern_rows, line, record, extractor = extractor, mode = mapping$mode) record <- result$vars if (result$matched) { @@ -338,7 +338,7 @@ applyTagMappings.alpha <- function(line, record, pattern_rows, tag_mappings) { #' @param type A character string representing the type of information to extract. #' @return A character string with the extracted information. #' @keywords internal -extract_info.alpha <- function(line, type) { +extract_info <- function(line, type) { stringr::str_squish(stringr::str_extract(line, paste0("(?<=", type, " ).+"))) } @@ -349,7 +349,7 @@ extract_info.alpha <- function(line, type) { #' #' @param file A data frame with a column \code{X1} containing GEDCOM lines. #' @return A list with counts of specific GEDCOM tag occurrences. -countPatternRows.alpha <- function(file) { +countPatternRows <- function(file) { pattern_counts <- sapply( c( "@ INDI", " NAME", " GIVN", " NPFX", " NICK", " SURN", " NSFX", " _MARNM", @@ -407,7 +407,7 @@ countPatternRows.alpha <- function(file) { #' @param vars The current list of variables to update. #' @return A list with updated `vars` and a `matched` flag. #' @keywords internal -process_tag.alpha <- function(tag, field_name, pattern_rows, line, vars, +process_tag <- function(tag, field_name, pattern_rows, line, vars, extractor = NULL, mode = "replace") { count_name <- paste0("num_", tolower(tag), "_rows") matched <- FALSE @@ -415,7 +415,7 @@ process_tag.alpha <- function(tag, field_name, pattern_rows, line, vars, pattern_rows[[count_name]] > 0 && grepl(paste0(" ", tag), line)) { value <- if (is.null(extractor)) { - extract_info.alpha(line, tag) + extract_info(line, tag) } else { extractor(line) } @@ -441,7 +441,7 @@ process_tag.alpha <- function(tag, field_name, pattern_rows, line, vars, #' @param skinny Logical indicating whether to slim down the data frame. #' @param verbose Logical indicating whether to print progress messages. #' @return The post-processed data frame. -postProcessGedcom.alpha <- function(df_temp, +postProcessGedcom <- function(df_temp, remove_empty_cols = TRUE, combine_cols = TRUE, add_parents = TRUE, @@ -449,10 +449,10 @@ postProcessGedcom.alpha <- function(df_temp, verbose = FALSE) { if (add_parents) { if (verbose) message("Processing parents") - df_temp <- processParents.alpha(df_temp, datasource = "gedcom") + df_temp <- processParents(df_temp, datasource = "gedcom") } if (combine_cols) { - df_temp <- collapseNames.alpha(verbose = verbose, df_temp = df_temp) + df_temp <- collapseNames(verbose = verbose, df_temp = df_temp) } if (remove_empty_cols) { if (verbose) message("Removing empty columns") @@ -474,7 +474,7 @@ postProcessGedcom.alpha <- function(df_temp, #' @param df_temp A data frame produced by \code{readGedcom()}. #' @param datasource Character string indicating the data source ("gedcom" or "wiki"). #' @return The updated data frame with parent IDs added. -processParents.alpha <- function(df_temp, datasource) { +processParents <- function(df_temp, datasource) { if (datasource == "gedcom") { required_cols <- c("FAMC", "sex", "FAMS") } else if (datasource == "wiki") { @@ -487,11 +487,11 @@ processParents.alpha <- function(df_temp, datasource) { warning("Missing necessary columns: ", paste(missing_cols, collapse = ", ")) return(df_temp) } - family_to_parents <- mapFAMS2parents.alpha(df_temp) + family_to_parents <- mapFAMS2parents(df_temp) if (is.null(family_to_parents) || length(family_to_parents) == 0) { return(df_temp) } - df_temp <- mapFAMC2parents.alpha(df_temp, family_to_parents) + df_temp <- mapFAMC2parents(df_temp, family_to_parents) return(df_temp) } @@ -502,7 +502,7 @@ processParents.alpha <- function(df_temp, datasource) { #' #' @param df_temp A data frame produced by \code{readGedcom()}. #' @return A list mapping family IDs to parent information. -mapFAMS2parents.alpha <- function(df_temp) { +mapFAMS2parents <- function(df_temp) { if (!all(c("FAMS", "sex") %in% colnames(df_temp))) { warning("The data frame does not contain the necessary columns (FAMS, sex)") return(NULL) @@ -541,7 +541,7 @@ mapFAMS2parents.alpha <- function(df_temp) { #' @param family_to_parents A list mapping family IDs to parent IDs. #' @return A data frame with added momID and dad_ID columns. #' @keywords internal -mapFAMC2parents.alpha <- function(df_temp, family_to_parents) { +mapFAMC2parents <- function(df_temp, family_to_parents) { df_temp$momID <- NA_character_ df_temp$dadID <- NA_character_ for (i in 1:nrow(df_temp)) { @@ -569,17 +569,17 @@ mapFAMC2parents.alpha <- function(df_temp, family_to_parents) { #' @inheritParams readGedcom #' @param df_temp A data frame containing the columns to be combined. #' @return A data frame with the combined columns. -collapseNames.alpha <- function(verbose, df_temp) { +collapseNames <- function(verbose, df_temp) { if (verbose) message("Combining Duplicate Columns") if (!all(is.na(df_temp$name_given_pieces)) | !all(is.na(df_temp$name_given))) { - result <- combine_columns.alpha(df_temp$name_given, df_temp$name_given_pieces) + result <- combine_columns(df_temp$name_given, df_temp$name_given_pieces) df_temp$name_given <- result$combined if (!result$retain_col2) df_temp$name_given_pieces <- NULL } if (!all(is.na(df_temp$name_surn_pieces)) | !all(is.na(df_temp$name_surn))) { - result <- combine_columns.alpha(df_temp$name_surn, df_temp$name_surn_pieces) + result <- combine_columns(df_temp$name_surn, df_temp$name_surn_pieces) df_temp$name_surn <- result$combined if (!result$retain_col2) df_temp$name_surn_pieces <- NULL } @@ -594,7 +594,7 @@ collapseNames.alpha <- function(verbose, df_temp) { #' @return A list with the combined column and a flag indicating if the second column should be retained. #' @keywords internal # Helper function to check for conflicts and merge columns -combine_columns.alpha <- function(col1, col2) { +combine_columns <- function(col1, col2) { col1_lower <- stringr::str_to_lower(col1) col2_lower <- stringr::str_to_lower(col2) conflicts <- !is.na(col1_lower) & !is.na(col2_lower) & col1_lower != col2_lower @@ -608,9 +608,9 @@ combine_columns.alpha <- function(col1, col2) { } # --- Exported Aliases --- -#' @rdname readGedcom.alpha +#' @rdname readGedcom #' @export -readGed.alpha <- readGedcom.alpha -#' @rdname readGedcom.alpha +readGed <- readGedcom +#' @rdname readGedcom #' @export -readgedcom.alpha <- readGedcom.alpha +readgedcom <- readGedcom diff --git a/R/readGedcomlegacy.R b/R/readGedcomlegacy.R index b8697908..e7e04a82 100644 --- a/R/readGedcomlegacy.R +++ b/R/readGedcomlegacy.R @@ -47,7 +47,7 @@ #' - `attribute_title`: Title of the individual #' - `FAMC`: ID(s) of the family where the individual is a child #' - `FAMS`: ID(s) of the family where the individual is a spouse -#' @internal +#' @keywords internal readGedcom.legacy <- function(file_path, verbose = FALSE, add_parents = TRUE, diff --git a/data-raw/benchged.R b/data-raw/benchged.R new file mode 100644 index 00000000..c0d0eef9 --- /dev/null +++ b/data-raw/benchged.R @@ -0,0 +1,31 @@ +library(microbenchmark) +library(Matrix) +# library(BGmisc) +# data("hazard") +library(tidyverse) + + + +# Run benchmarking for "loop" and "indexed" methods in ped2com() +benchmark_results <- microbenchmark( + reg = { + readGedcom("data-raw/royal92.ged") + }, + alpha = { + readGedcom.alpha("data-raw/royal92.ged") + }, + times = 5 # Run each method 100 times +) + +summary(benchmark_results) + +lm(benchmark_results$time ~ benchmark_results$expr) %>% + summary() +# Print benchmark results +print(benchmark_results) + +# Optional: Save results to CSV for later analysis +write.csv(summary(benchmark_results), + "benchmark_results.csv", + row.names = FALSE +) diff --git a/data/royal92.rda b/data/royal92.rda index 4a10b174c582e692825f6afe8e14ff1b069d9978..678261bd01b418c34dea9b0a8f61ed568ac576f1 100644 GIT binary patch literal 67612 zcmV(tKvQ&2UKVgRpfkl* zs)A-(1?mtq`wK1=P|2Vhnvh!g`SsYX4|fbsicZp6kJg_Fs9KxM*|3n!St4?ABQ zGOXX2>_XHrw%M=CH`Ek{MrR(DqDw%&()~Q3JW3eYe?D=Xr2$bJxqRaiBfPdsiJf%>0sk++1HQ@nuK%m-~Bwj%ohQqqA9!X8kacjQf z&RWSBfEO57e;WR6jpP96L*C&LMF3ME#u%vAz2#(l<(333V7N-)lH`y?t(J%9@yo4z z4NQFcs)K+pPp)3Q7t(y+p6a)Ht8DiMN@B!THRwQyD9vK}KnE4avDkZWabxU#8<`uI zgm$$x1Fzgq2@nu4ueSR6j$y%QUB&mQp{NRS*iRP!1ex@$m+vFHTRb$oH1N~~_(W9B1e~St5sZtJ zGSFM`2f0Mu4{wuJIr@qZ-EZDxfYmML!lJoTXw3`>ISJMma2n0~=`p#8mM)xUPby(- zao1Y%N44IvPZZL(A#7Th$!>%iVsD$=hWjT(8RM?f?*aouu;5oJX{5W24-ky+tLQx- z9ZPKNUjRx+xLAUrbI4NYCWl$mMji6dkGI8RCp@8|^FmZJg3egn|D-O62BMXW>+v$C zk1xnnnLwqX7SpQ_jQ|>12npb9?-YFfI zRK17DV%gE!{S4m8Gh&tpJDHj{fV@$`uycOSX;{9Y>x%uL2}^n^%82`4X-JvNrHaB! zSAa`+MqUi#mxpCyC5|wrpLR{)cp(c_2d{bayeKh}O*;j4{&CeRpKuE}C~rmA1e%~E z5(6uJwQ>AiOyhY7>VHYNLi!N{j#v^Qe5->QV39+~FbSdzdV+GT#-ljR-35yw->FbA zg<+b-{9<26Ybh{EbU0_0DEluFMhR7z9&%-=SFc?t`4c41Nf0+7mVaLjfX^_$Ic3S2 z8hGtilTF3*bf-r{v~N^L)j0zub74)*K=B0gk`QssSijzb< zo*?lGIHj?Sm0{kqW&VzB&Rf6Z7Y;{9dy06pCh=MtII>PXtk-evj1?y}Gg#cEe~6o# z!LLMG{IU1*OIzOHUelEZ3wWKTPe&?>$Q~%Ba{ca0AI5ej0c}1R*7{ZdCivy@th%aP55Yez`qWfB;=Sn@0^dMB z>^((wik=eQRxXu(&LA_6gwcOm>29Qks>U*oEK^`)@Z6HlORJ8Mfq~Za_-ub<5h_Uq z`k$HHUw2UCPSwxKb>Os|x$c_98OOBjDHxS&qM|14wUDaU3Y9_Z_^ijF3S!B>?ZqO%j6w z!ow8&Jf*K`wKq@0K#9Ntz90N>_&voBd#-ROes2Hjf2sFf7{=AGhy{MCkDQ zUZ;Jr%AV-ZB>*lsXzigabF!t;p+#!@^5*IQ%IF}=k2#uwbswxzGXNo!q_`Lp==;{) z2Py%!Hsov#xsd{?7Xruq{wY0m-_IaGpQxszfEHiboO z;146%Fo|)lt%fzuk=J+*_D34*@)ea}_rPF<`+EYW!ZsJL!vX7GSC6dMH}}?;&3_EE zI3K0G*n5nt&tCs-f-uVP7=E&=aq6S54WG2@w$!9jp+7-)gWIq>5^F>pmmilX-P!{#8zzj`#E!2!tAIbVN;w9V+) zhMGfIuO(%?2#>u8l~>SPC>vwGz$*Rq_VVNTF~sroo6w|!)xJm~9(YEixJ-QF^$7w# z%8FjS?hvy4o%c*-H0y8jLu{+5U+Y&(GlBkbecR5wJ`qWt7e{*ez~T%ae{>wf7&x|Z zPSQa}w?`2_?)wBs%B~4?&{{}%4-Jcm!nCem>B-==#Y5u@=XCKg)$JGk@WjvV)?(Gf z49yoDMI%Ey+^wU05qaIiJ6*oG2EkUOK0xFEH)SBhh3EwWTys-V3Nnr&Y(V%CRk2PO zVl%jmP%#eMlyPf9ojE+SAYq=fMB@l1**bkExUp&Tq_n%xQ>O!ajTtT^h(bsoOc@5!g;oDFe@LeIz@V9N?xT$BN(!B4Fr zJg%1;4+;zOPYiT zPi|q|r1nr<5Dw=1&y@(dJ)BrznkuX|3LxrkO%?w2G$e8|S9Rt86}{>xMg9wrse3YuD;a0jryH9i-i=@-v4S z8P__;t-62_*oDMhIHVIi!hxMRH_&tToXiIAFz+U7e7MWDTpN z;<(Jn*#pS=7S+f=BW_Pgc*RhGUYU)6i;G`Gs`NShkREM?l8Egq@0CXbD$3Eh82`;& z!(o#5^2B8x`A4(Ar5zd&bM2_f<&py_0YMW@WNIr4%z%L*Q+8@x)xOW^F4`!GGPbX} zhXHDEPL@=wWVKYG_mq*!&J+TPXG;1VD921)Xugb}q$qQ%u-cN|=HBRz*-fCKMJ*R=O%n&OY;WGb1wI99W&rvFLT{4c3dgipPxTbVw zC&fV0UpOyD@AQIx4gcix+I5WWoZ>4B3!ePgzVx%Q%5K@LxdoM;WB8t~k9cobzwTY| zP}e=t2-BKZFGdJ8kZs=a(T>|v&O}NFEXvG<{0>Q6AI~Q_VKhJg-uugG3&_!f>Dx#{%0I5qrQlqR7{wo*J$sfO=*ywylsT23_eX}9hxY);9!##Wqw_yl~sqgv#hUV#zaigQK ztE^oMpL&R2e(x|ZMDVf+jxjV*2=|)+H1OU9bFJ*gBBulIKX^3$g1eJBMQnI>HQVNg z0{oA*Nb!|3Ne>`vN$H-S`s!TX@8gq#8SomxvkvA*CBORQ=)j7u{V|8ICU(tdv4E@! z+gD?a`#tcpd=35zl!1gvFX^7Q_~0*)U8a?>=5&nE`liY%J6mOp&jjN&a9IU8qvU#$ zZlCLpc$^vMfGeW)%7;L4TCqtQDrh>5onrlwp)?c!+OvCFHOL*35FVg9*kF1FtY{{x zVw0;`MCjNo!G$hUK3Wk>F-w)Ng@cJov~4oMOgg?yXIb)<-*4-a&K%4%HEQ!7QaE^-H$Cy0%U?v3php7zG7IC8U z)Ha0bB%>|!1Nw0c>q!e5g5L*6!7V#^Fckx3uIN&fLM1;^yj-?Ni)PzXA5B2Rp}adq z&|#mcS@ol=5Q{W5ANB_0I${QOdOgx~MVG%*AQ6E12)Gn32or#nVtzwgFfsnh6}<2F zSU{JuooRT&oa-q`e2_-)+j|oK0@;_Wo8};+1F;)MpDDSUCM*T@F0qrE8*RC->U4h% zari(o+-Ob8(Hl$GS*Fv64-c`TNWn!m5$3_$z=fK&dz<@>$EQ?Tk}D~dts9+GycdBF z67~b+*0}k7z2R(kStQc0OrZLTy;ResStq5w@kzgRAxAr(HZ?+J)!6AcT2`KHmP{Oo zbfGXrq^%nBohqv(w0@Q$*Uye&6UqmKiPcYYm+XaUy~zEoQUvcC#&}%FhFRw=DUI}H z49)zLV*d8}@=ZZYsU(nq;v zm71Q5_TO@l0qwVZc;LgfsbJ`L9!TT+$VtVHx4mkiCo#IMImYlA7BREorP%h){S|T@ zgsR%$Dh6W#SLXCUXE9k%KN#Z`!gXAvuFY6sE z9BH!b3eonY4FmIfMOckmE=|9%bdA@)7!DQKEl%t5RH@9z)V*2ZO+#N?TL7hT&{6fA zbeg;k;-*Xi4PU%b6SJT^A}mHa%DJog6~73U-_8KvfkL@z7_#ie-YQnhw}5yytyV0c z+kjk{@-SZk{cPBG)mGM{VX4!ht7{ny;$Jgh7LNCkF6gq+ibi0CfKL`Y&nS8pad5)w zuXil0{YW#}{RNbS9OQ`vE%j{0SF5Xppv`{E0sCt*jA?xV!7w_)yrNVn$~x>P<$c=E zJwNJDC!-{nGaH9R#T*zX<&fRn7)eeSlitdZV*5=BF^zUx6i09Ne5cbHC+9A!*t%VZ zH1F-r!7#pD_w>skFyrD4`CLT*o!?^B9ZCx-tF8date7W6EiSOc%*yX$FN-(ObJVFD zYYo5~8BNL8=(ngFALTrU&6t>&yCLU5hTaTv@@iNrx_|g7ggQ3E=#EkUz)Q?`9`u7&geL&PgWFdqCEWI-SLo&dzd+L>fApN2CJXwd~*6WYi)qaVRU z4ZyvFv6VEs%OYE%It$ac+Y?YSiH{M)s=)^?J7L{@*g0Wmx$3EBV`O!}f+=t_w@R^_ z>bBMW+~9n8?~fkst{OCq=nl3hd&V!{7QYa7whD*_CEuM3VS#euzL~=9#6rFH6sy3q|F5THHl8Bgu)Ks4@5uza+4Pa-5wR z&A>Pi<4`tmP8f0~bCNE4s$t3|8u-X8XNNf-NHw1MD%lKShw%xkL!FXJ!@ZAy@H*>3 zHi%J1R=Xc}maC-EEflISU}x!^f^IXmcwwHkuO%J8RwQ#K&(V`(AvljIR-o^gU&?tu zZBvumb3xCaYT->#J34{5kylLCy(Q1;PQz*TUzR$*HE}GoKO|tn>Xjb!6R1yCt1r}) zs|z;r|7Bw0^~Udka2E>d9zdBYr!qCf^$_OF3SfvE$*hnaG_98vo*_i)@0nHENgn5- zPCjmnXK`|2bxDt>HT5Bb#BkzhN{GBOa`6mJJKBe>h(ajVJrkG11`Eva1k)%Nvbn-< zG)hD1!H=VdcM1MrCrptXu>=4-DeC>~MVBz7WnZW4vEfAUqn3hcZ+qh_8GSj)@o;d> z42HsyOh{W5P7k(PLAXetK`RX1*psnw`TN%|mMDSuX@a?!gz zf3dgMb^=AL`TR)4pScd4{V21@Q?)IVl*-eF3vO}~Zbiz{zBGJx3$f4i+HbWI`*E{b zRY>G7ROWxeyqjS8on<}KR#iz|2alKRPhkscRI<){2fotsd|9{SQ(51pyP$INNJXX8 z<8iC^HF-&gF+Y%R85#Rlp4#icpYzZ(Bhux*6>C>O(_r8pEQ26fPg3t|HOfD}MwWf_ zSE&Oyc)NlSA1Q^~%IQcdF=$HBX^teo-@$Db93avsFmR9^S|F6%GxE#Y3^c4^Jy|YNQFkDF0 zDY->^;587m=x#3ywJ-N!b(eSO=qo4@=OewT`Y04QwNC(AUg^KA`=5_y-D5FhHOnm3 zVBnoJq{zf%>A6v^#-dOoBjcY@eo|Yg;AD-68xLordB<1A8#0R);X3(ndq6y6$I7;-8TyS!%c&kpZ<6@7Ijy%^25;uT=S`+&G;^lNy@T)C;%MjI7 zr@yUfUZx$Sr_Z=wg76>CO%vaf&o;9M_h23biYifqx8}CziY|$xR>NA?u5bhg8n|Nk zL&b28T1-wD7z zML5{xiz8-;44YZnX^jQ{kB(8))&ou!K*4DqW|0Dwd-gT=EnRtVZ|w%9%j3xd4`ML+ zJiJM@rWY?Op&uC9W@99%=p*FT5yD{xNsH^C|IAy?OK(%v$|sa?ure!M1u1JZ)hIyE z8i~|$i!KSf^FMoz5kD<#3kxd^e<`~XF~J~5-;(kfazYv);6_=UEKXdNV1Y7}L>Glb z`0w7#)$Op6-UaJvT~M+M8{0xevNB*3x2yhhTV_70h5tu~2>1sg840^v=J#aK7gz4E zNxl`5?#CkRe@~`c6G8)WpKdVNIxbcYwB|p>2B1yX#be^pp*zyzmQ2DY^{8xKEorbn z>N~Q@q4oQ4)WFgQ6$T;0t3+NV2Yd?7WECMWT#4V~Em8?oTp3A7409f6$< zQt%6yWT^`B+PU$W5&A7{Pu(|!VshtA~7m|6Vs&D*bhG|X5N5k*YFNpK3WihZ%i;JazOz@-e=JXu-i+} zL>`;084GJC*L6aOn0-3|e&FVXmu_Z2(o=H5Ni%1AR^Xy<>>zo^rs#f|QBlKW{sDn! zIL;7T=Sp)cQ)xzSy?h)08f++#CR827tmg1kd~2llOMS_>hWpb{Kl5d{GMFt0wUTqQ z^Jy!tmUtj-oq-d^zm+vEwut#<^%YdLsf%79P~AJQX*%;HA7*GH%zmErV{aO69rj&d zE||$WXf~R=_lT)9<+19b9|JilrqI%aQ2$qwm2`ZKK`r+zZ;lHTXVOD zx{||aJPF@7J~t}#V%=lEqpDDWf(L-|bc4P1srUsa6H$8;-#hUGLD?wsI#a7n)~3kv zjPXyrkCQL`!}Fe>k45gkc&gX?Xdj(}=go;Bm~N#mtGX+Jy7&ISpx*>QuT%C0i}zpZ z!mzK~WN1NlE-LHvlK))J?_=rG=2keLY}+SGHt!YB_qr>jj~#?kgXGNmz8s>38j<7C zl(hh!vB)B*{787P1B#E%;U_t|D5x=L!ha6M(UIFy0+-sXz4tCoY!fB|HA{RF{Ao*v z|HXLjbjut&v_}&!W&Yh9v)+g-7^UJCnjqYtW>w+usUC{00Qma(g#rjqR}XpKHcPaU8AHTa{HlK4 zdAYucqiB2*U%ia59jdE$ByC*6uqv~W6vS|8252)}=hG_3xhV4beI;W=>TS={^ieu>o^(xZxt<4I(z_BcIm%vZcSa zfC9!by@|+~s>7NUa@`qu@gJPyK%M*_J;^JhVVL5Mkb&R}qE0}sPj-ZXZW3Xpf#atr z{$UmtG}f$DpOn>WR#AXGcBZdb2YaHp6Nfg~M!-d3(2;x>p&`EPZEn6Cxw*sMi~35e zIY-=gdtIQ-ZcI&*EY2`y1qd?yhO6Db&aquH>Yp|zFA0rnLw&YzH6Z~0@ni=NcI8(w z!0Fz5G>=^|Vq%wpuMjH83>M=JO6;U57ZI@JMv@WL*C$*E#WS@Zfg=EvW}a6)h=+%o zrtbb2ACm&?G%-=hnfi>vN7v<&WIpbXDb0KpSaQFYv198=0LA~tsQyFDCqNv_`UFQN zF7RXz^U4k$_UcIVhx;?SBG<@IfbWXCUX{k@$ze#<YpD%N^4Ea@P&=$YY zMhohC>dm;awlB2b8qoXpSEVHG4d8yj9C?xR^QTN_nct*R`NZY=6JW|jBGlZCGFv2Au zVI_34?PL?-!hG?)=OwHV)DwT~1p}g40rkk{+`6P+DV8ZzDo^JM8`gP=WjZLPf{Ugt z7jim88eIR5`~Ji`?6PV<70n2loLGbGo0g5jgP0>ydo}YhfuMc)2 z?PRfZ%uMF21&`Y)F%ykJb{ad-hNu$<5_qpsA9Me*8?PsUe)HO5w)o+?t!+_@jVtzI zNjjqiBjz01(-}SYXjLRGvn`&RgnYxH+%&J!{U__zu?*x7gBp*gZ#0!pVD&9&^x$4u z%und;@=6v@T~XEBVQH%hsR4$#e)%HmzrJee z3st7PNy%Rc{YkOOGcA3^wkL;u!AY-atCwNANBOg1p^Y=iLZbQAeWfc22R)c0=@?F@ zTklgJ)E~MPmg>a`DWL|8R3!%nGOcNoBJN6JIL{IUeIPc1-}#W|BQwfvPReLZF>|n= zv4E^RmB_mSSvux2@x)-4pN4LfSE97bpU14=fn$4pfWA24;b6z}ndqNoXOPbAaj6`U zybH1FI3)g~kH^2I(dXme>?)(ke8VFCf;NK27M#XW9j3(oDW#u-3BJ4q_Qmm!4OTVEWjW zdS)&8>hwjQwRM@EWI&w4fRwLCah};d9ryXP9n~nh7 zu@YvvqWsz4OnS?}%1gBMbNo;eAv>gL?rGl=exQoH=XtUY4qt->DxF|UVeUXt`zYQC z^HtnBvPYePt_T&8n<%N3#^A*#0I~tVkfR_d8^0_exlNkS9SA0hngl-M53*r{h!i|i z?BV!&+>^x!?mTUiiT|F->Jt&!N!5nZT^9nd`?Z8YArwI`C^70|sQQDbtiPjlwyy&1 z(|~qThBndyXKZSN-q01E#&Wyg*aG(P^~M{>68}*)5m|Mkau!G7MKwBl=t?Xg;l08^ zGluY}2U*7y#Ewr*SW9@t{~w`LlKK+z_8!Q#RS*-sStFZ^DI76Dh=8Iy$Lgx8&=ukg zq*VZ9krIXi=#i%dQ-bFp?&ODBIQE7Rg*SL8p6X-*`ot007f}?d!mStx)zEKWPX~lXj@d#LIJ|60!uMn>;4a zHM4g|z9IT1`Q$1GF%K@2q%J!Z$@!=h@dqrXdfn*hzMm+~ujisIrJC-V;bmN7yRrCo z|Ng&-IU0E<84v^;Gs36m+F=!&;${E{lZCc(iR)1DEy{_p)lIx#t1p0k(su+U14Ep1 zO<(tK-xWe4TO7MTtOYa30&w_86ug@|i)|3$9-iVa~ct{O@qzf2H; z@YxmRl~wIXuy5deg3TI(Tq0MV*n(j+99*a@;5Fq!kO(U7#qW|RXVtXfdxuIPcVoQcw5YDSZW7g(Q^70L~Iw5RHa$qXmXP{?{a$S zC=cj;*gDR@B&rpn0SfGv0;~#qvLTj9)U00>FoHeMKtDBRBy0|GgfZ5Es9EVp&i*7M zRGf38DT2^h%07x$yOk#Y0F0X(uQ7#v}{+V@^M}$;cSRw~E#+!p6-a*kL>F6(FSjtD25IU6Fn}L!5M5 zieB`luAxs7p6f%}e6F%heDV@beB`_-CKB_Qy6kfuQb7*DeBt1B4>)IChGpk)x4p?q zrbpZvS+79A62Ep3>JzKqrfy;;EuM#A^_W-k*PuoS4kjIxj0YxzHWMY3h+`*2%Asf>e0RLv$>o2k1th*j<40%e>h8yr9htn3Y#{`l^A^JXP z`}%AG2BS^&nd@xe0MG0l&Hv14oM)Lwh0hE$8yq>DQ^%%K2HSu=@xMWa*4b>r1Gxrd ziyZ~K0O3#FOv2K z(&KZ-IY2+QVTuCjFyGeBu*TZ72m;wet-j<^v(6`6<`gBgLaBCe=E@bM^#wcn^joJB zD`DZqueJ#^Y`UW`0sM*F=oKJ1=5>WSQ$OJU=QvbF$VUz~ZS9!#t!QRwHbbK;aI&!3 z%24DsTkF)$NJiY3h^YF*8%RJcaOK!`)YW--w!L8vhKGSn{@6$bA}n%-xam)&bB2jKmG$cW47XCu zJ@ym>F8x0HC*WM92(uZ$pnBjS*?~H7dNNLGJhl))pAqhdMloikDzLlO#X=hotQS{j zFnxNVBzzxS5m(L=FBQIvAy-P9{y{(#N_STWeG#&E_5hqa1p)Wv$<#^k$bR~%&a{AT zAJ->Y(Z2{jPi=8MfwS!=n|rsG_MK!of1O4{IJu$lizQgCvN=xQKn7&MVx=Jm=Eq8x z3Pq-Bp$u0K+2sh-sVDETOXLx4ROiT*&dfB{`a5_2gn9GF=MxYKIU$y{T!LUL4AQaG zoyyjk=n51v7hPm2MrSxyOhJ2xw+t#|Ft8bj>?EM-7dRdHs)H!2iD?*4HHNdE!tkbi z^!Uc5O-+07-;sDMa$gi5^iUQi>IfGMzEoH2RW z$NkmXFqZBcUarYYOe^M@VJiJ3H-IWrHwds+u(%zud8JHB4hBPLY!yTR6^_|6=p?&~ z`G(e*1Lv`JEruLJVxt^u6o}fqMbupLew;}K#5&e}_4bDDmA*+)kMK)07*#fS`?iy^ zVRq#DOTl5=hoX|!8!mKPK|3IS0UHyXkL^AyuE*kfh7)wc4iv0aWAa&5fQn(6b_=JL z#dYxUxM8H;AiwZ-DmHUKU&CP*x&>VO1eD7C63yieR{V1jNv4JDD_h6*ZT?u=p!47Q zja-jja(yv=wI&<+f8C{ON71`-3_x=x_7t}E^|)rX3JNVrj&pli-{}3$XQSl(a9sFc z+Ow}*FgjFp=?CZ5`_A*+y@7JOp2@sk7nLR<-^jkT)m#9W&zzv%varm@04$0IIdxg59 zId^9Sf?iue73($_1^k7{4F-CEVT!T^Ozx`gr`W6xQv=rFbe@lZ)h{ZkueSnb#lE$=FxYO z+g)mYuhXY(li8(8@T*B$Gz`eIlg1+2DIvv1f!%ph%UXZNlf-@YXmuV0-}%|;!WjfB zdz%+vdfiMg?))hU?}SS*yx9nBEw-H5B|Aokxbx;N2iSYn!O6Li&94kR`GoeGG}AeJ zqofka<|x7d%|kjmxlp{If3(QPvQjpOS3NizXZ;4P^U9)CaACPQ_jYLFTpa{fs zk>wFt#%C-b=c9>CCb0HOiBE}4O3nmZMZvT{H;YezV}!BLuH0aap(tYl3ns#~BQj;Z zZwrOL>U42~A|oQ55H<}?rQ0g4wliBV&IN>$ZU%N2hQnj>LT`9N$EDCnwR&gY!HgzB zKxAW&*Cxjvt3HumTLci9H8N+YaOaAL%BDIa0PNvW1Tbil1)?=#_7fgP77*tHmwr^EzsbWLwjUrg z1x=Uvs{UWGd^;`!WOimY30^9gfsNL5EiLlv|HUKpMNQ_x4CE$fSgvbb z-XI8CJ$KAjcSPUJr_1Ro6Fi;kX6ygp3(G4B(8&=sDe&3NUm({mso7A-*xEwy-Vb{3 zIk1U{Emozf|G9rgDl68lJ4N?TQ#YTZbjKx12&clCIQ*0d3oFB^?9YJXY985t=~r6& z%^Gp-iCWLXN1*i54S`+CQTl#spS3HI;U*BE1PT(QT&DyQzs=tqjrI2D?&Z1X<-#YC zmOi{SL{$_r18IhDc(|~9pgh0g?Nvo2V-Tr%i{}H$sV{ZYXylQMc3 z^cH2eo6T_gK6C6Il}j~+J=MR*9M>CV99LofXi$bwLHE!yfq4$h1sMbF7h`vX^4kdbo$<* zlrTC2rxg_SAR%_~(~d<{6i@*kMR&3J$k{S^9(%(IV1jdbjx*7VqaI*-8tkvqBm*4I zI-l|HWCnHHcULk&dmRUeO*{T1(z5MKGz>a|FvV9AM9c`}nDPg5*9?sreTqwPy=)kL z$bFY*B!uId@b9nP_4C4;7Z7;euo$YezP?L0kjBBwizCDRB3DdRL`ZcB+(kmh*)}TDP}`Et|a<2qE7vF zAwfdoXv?x#+7{G7L*YIV%+x=9VIqa16W|d*9Npo#CUNLNoze1;l8Gme#lpKb zEf}BGp54BjMA4JLx%{B1^1oLUB}8UaCN=nHr7x{c1hco0#e@<^BY$FaW&km%OlG$T z3+-WfF1{v*B~Ufhe3X9}8)>0-JY}U1-NBeM!;j2&dGR+_w1k2$y+MvNu;%8ryj4tb zztQH3*yXap5wifovUc%Q;JUZ6NK}quhj<=S!mGsVX!MKLjX2)DPo? zT3xV&r%&iqwo5fENljqF6o!Enwb|Uk^!(|MI6q>Eieh)-YE@=A>|Ldd zI22cwUAUK>NYc#n^pdXW9A=@#2x@*BJXGX;G; z@s!PHbqzIiT07Ck>b2mBoH6MWmtq7NAKG2Hn`^fg1t{N)p)Ax&4<&xO0SzoFA_zJnKv0q1-+n5W-NXIbn;kt~rB zkbHjQDhKeNQlqP=>=foD7#cw2TFZ2&cT07LuT_(p6?CTBbeDsfb2vnJf}og1r(a&C zM^$q=15?-@mZd748<01;`HK~V4UpZK`gK2{Uw&Wtzt;IfEWLxOc`#|VL5M~U7nTW@ zOXC@Ob>?C+b(c~@BSIheAxfwXm)n|c{A8fPm0E3}DpOqeBakAD@+-;Tx*7v=ElGTW z+=K?sM!S_DPM>e4K@N7HN+$9Nu5seJ@UiC;tnF#$L?y?PF9xvzo3Oh3$$mjH%Bb&e zFcONfCVQ|B8##0p(yUMZn(%G9ca>fBKmI(7{6t~ok)jvt!RV^9j3FeWB~9kzY`;=Q z?%0Nz=W|o-7YQ68*(Nybt)&1$!n{ww3kQv!snI>XPl|bp&aT9k70Pj6>%#=OXCiC) z&CDiYctOwT=z4_UoKTICSN&g|0*PU1;xjzi?Iix zwH4EnkYSZ&=a5k`O$G)9cFg)pBpy*AKDK>Zs{lA7oDN0oEby zg+)?UBxX`yP3|BjzqINb{1S2n{^Tb&=i3Pb2G%FBcsu-Hdo*F1TA1_hKPtw%25d124t;1yE#S1@aQeK1?P_}r&0Qlsc6qMTOgL^N?cxj-#j{$6ooJX>(7 zVIet7WJp5-#^$pS3HKiuv)6N90-S~Xxyy<}I@JlrdW&N%ED(h8aXO@f>;qDnztGPq zzsd8Nt}>&K=^opmyc6Pq7nDeD_9A1-QhSSq?X6~ICV+;}U(r?_D+vuXxL3TmjojDD zf|AW(aUv3Z7|a;6s^1nHKdK(#f^mxCxw%zaQrVSmB`~#imaPB3J}}us*NwvkW*-l| z1C5U(_80<9<$DBn$(16rsw4vef?6>GwGr&N1<721^xz9-n9Ukgcm4u@5bY-X=hU3gOhK+!DJqu?9L%z_bh}d zyu`UYl{ctC7xOrF#&79Oj2Al%Z?+X(r!MG+iAg;s2xATQFb+nD;N~b50d&GIUhs5I zd)sv?aWCkJ-*0K^0L5B1WoaL+PveaX6HcF+)@b+uJVd3+%5;^`M~G_8!=C?s`?<3e zC_`pk&RiqGRoA@ltw#xwm6+8r1qgWQ*gd*7d%;yz^C|RNFuSU|W2>~pV+`--#Bq_) z0AIAO_MlE0GwEO~0rgH)#=`@vexNuRYcR^@w<}KaqKb9`P2B=jPr1tb+fQx=xjZLFooV|6|Vm7 zjKc9EAe31tZ)meYKujw*+68&s(@aMYgN^Cp8|5G@v z(V1FCDs)m6mQi~TRJGvDmwCHF5sn9n^a&ZfG%T?GrT=vB?g8tSQ`EN27P>aa(`cB- z-7jPJf$cgna-kEjJew^$=T)dxK4tLNCYwG19=q^Uw?7vDdu(AD(|sx!CzxLMwPV!= z@M#-}$~Cnc)3z7=e-7QBaE|o?$N0J0RHq3K#P5270o5zNBh%@WWr1sM!5z@S+n+a+ zqUjmXl+*#5>yPRW@BDsCq8S%T3&XeY%x%#Vyp88v;u*zt93>bN=8@PywAtqEk7@lF z0jF|~$<7CPQA<%qFSRL!=mXEe4&JB~2vNY3#k=L-vzaZLY}%ZMfzr4yc6Ig;8`9q$ zwDP}p`a0D*2u!cN#wGV*hx0tL8%>)Tik}U@KjoL*_Bb(|=iuu4*YZjRrWMB9Hbr=X zvdvs4Jp3aQfz2QcC&~No8Sh`S8&A=|ij0R{P!;SXQ2zs9dA>giY%n(@ZFm&OCOxx( zFPb=qn;Q=-dCh?#kiXN0)-=ob?SErWEErV#OEJw}lLO2E49;UaAl-}nAR_0JLS0=; z)-Gx+LH=;^#**Auz<5ooOBnP)CzCnT^NdpQo14Nfk50K!NPH9a+`Nsi7bM?2pW7d; zc{x)g>oj{zByA6XQ^P8pVRB{BMRw=8fq{RHEq57XrD85~dj#AmgdZx?zK?7m#Wx_B zY-u*uSc%lnhZwvBB8`aoA^aORF_<^+?N^7F<|{!BeVheTA92ItsjMJoq^KlJw+oXP z{E_%sQ6@qog7c%5Q^ye7?r>c`wosXNt4wz9e;j5f;jSl4LL(cf!VM@r(2&Y<<#Lt! z*3fe+NY;3_O$YaG4={NFNH7IJXx_a}?d?oa1)IyidmGj>7UHtEFO4$|_%+8dWnP>| z1iuLP7s>p@T1GzXt;*O}`7P_%Vfxz17v!wHnSH`8YljBFnRWkL522uN)*p=mlpFgv zTVB)FBex5uYOzyACNR86IjWlTo=WvsIKqIY67b2dF;87IIP-l*Sw1i^+yLO>9tqzg z@ZR44LW@W4NBh`5|17dlJWDm8((Wy|cwP6rjzy?w?DkQZ)Z+Szc|l_pn+S}N{b;!F z+dLm1-p3Kkc+Cpb0E#oDW;1;vfr_efBwAuBmnKRpQrH^1L_WwfE*%A!G$GUO-Y3)p zc42OJj2}q?@ryIBs6861`_s5%r_9u^ptS`qkPy%Q5<}l~v=papX5o=3s|R>q%)F4s zY*R(}I2lR~4|*(>DE1{u(Tht4QS!F+L3QAKOmXCoeD%u{W^jf4z22Y_a@2* zS!%(SP6a%GEN1`cvXWvTEHV~N-ss3ufiKYsldaq=7I*K-W!Pg0Y#w_}T@=jy@L5?} zpV=+HV~}UgWxgxDU+Vkb6zdti?&RbZc}M3vCo?}buR-$mNK#4F=5l|+)~X1d0EsNf-xSEr zs@m+%w#j|YE8pmmQdDTa%M55jW?1C|(&9f^*J>?`^}2tILo?v}ZLMCN^1WD@*b2V* zt5&1Ll~~8RO8dN4+RuIMd}uH@suW;X6gsJo*4{KGbVAOLC;O?-4sFx6F3HH)72tU_ z&sgRFe3D`{T*NrBpmUH(F@>iG3Orx|B+C3@YisNgX$YIFGoF>SnmAe#k5D^53G)92 zawk3Oi?*WnJ#>lvt~(8MG0?}>I*WCO+RTv(FORs`tjm@@)<(j(? z*>&hJ2g1l4N)7*x7k6voxxB6BYgn!=mdK4i+MsRqpz#u*o;$`<7lCRzzaJTgU zRd#NO7za;uX66WYv2*58c+FfM((c}7&euzMu@EUBnM$5zOnCVGHEKu?9rJ{q#<@;$ z&=h@Piw^HF7O{4Br3wbs6Qo^nDDiM+!4!}~Q#1!=k4_7-`%DXjHLecFpEBeQ)-1R9 zF)zqN1ihoinD;zqN<7=jW6|t$g`cf=z ze~D)o6bFJh3LzW_W(W^tXDOvi6kS#@G~Nl_7Al$*7a)82LHpa!jR zwB-O+D6Q&;P9rTyPJ?#ag;-Wgh?&^_c;N{pzYMy@7dk34*-)Q!lkFia zxp`tTh$6=r@f*{T3j5LlTSZX|Ox>8gTnbq1nan)j%dsdZ5#+i*8gbhzPXtWB_FQ@1 zE#(6#m>Ape5Z8vzjWoG?9FO}Pt`(dc~blER`Fjaf^)2EqpKONmhb(7Sa6b$S#||K zVKv!vnpp2ucA^etyW3k1$Y=m3l)RRqULOR9!l^f?WGx-Mh2F^Mb1c~EP|<<7T`aHx zVf+qb=d{7xHAwwDA)I>4$>ElAy$_Zme+{XSC0MrNkZO_VElTT`C~{Xk<|8ci)d}Vy z{F^{E(qw_tGmvqGsFi$G*dR=jL~GCwdsiYlQrk_mIdO)y5qRurmk(VCz|FxJm;luU@ z{IYscoaAEiNuLOvdUK90kS< zKD5j2)o3duGt!w{_frtw-bssObqn$Hzl$5j{w7rflhV;InA0wlUmE z&!0_hgzvUAjCEedl~Eeb|O+4GXu}%Nw!;UvVS%Q;mv%P zvq{l+A98#)`)M?Ch!p}WGS?P@?Vdk+dH}$vglt!k_W79-ShI0NvaP1G6@C6<4a=(OLy+E((q%d6h$%{~55X;G`Q z^~~0^JSs67h1F_HWVejp^oXXW?^I{_bEc7eFOCT$3oB0$YEEVN7P9`OB#k~Eg+_-) z=qI!#S=L20Ema=j3um?3KseYp{QBT`>*A1D-iF|Kz^ME{?$~_HyFPR=9JR-1-u;8; zT`ItUB#h{_xVYQpwvZdKO-*0ezDl%hPm|EBN^vsA91gf|kzyV(BW(Jeotz-Dv4N$3 z7ml*8Pja|n?_{mHZmtT9K=V&9nd84c)(waBb6 zOO3BeOOC(2U2NU0QH^%`oTXR6D&a#uzoty#$yZHSv*;tmS}Y^xHfpHFz*cEp>&4wR z1Ui|9qnQRBZ^r7joE$SDO0&0ZB?yuvg>lRUZ;rYR{uVZpfEGKkr=_NRMEIFNtMqu4 z#oAyOt*(M=ucd165sds&XQ4hfhNSP4s2x|Lt~x86POe1=t~sW;fY|pfQRAk}^KHJh zQ(Q|m<$J8J0V5WExPVuW-@L;_Dv|tzk~>BuE1aeoI&=LMgA3WH;g(;1XrhuWEke*% zb;f$-oskP!VWlqM`ps_2`V!CeDsHDveszkQ_EVGlwnL?LKLe-nxi#t|Qx#KS<$%-N zx46qZcSc{K&A%2JORIysz~UpoyYqE5A5UR6NM%nr2#xdQ8b+N0{~jt=bq>Uj_Jx&k zSCm1AcvId5QbkME(&SSXFw6M?*beb*GQ|)$kky;q2it5h3Q#{M4R0$`fEq`s2={?b z(o{(5lCqg$C8$2guove^)NemnYc=bF2{KrdMIM$M4Kv~jLIb^L36tagX-Ag1s2B=; zx8BtfrODZCzW==?zBS+_NUYiAvM-1Qeh8~X&25`HC1PHoIPZz75@)7H*oZD(Edo4u z77-1{f{(<-SOC(1AvUvNp!yr#ekiCxgqp_t@l(sZ0%JN5m~5-BG~!0%9!~{s-{t|Y zU-Edqfs%)d0j|sM9_fnI%UI!uT?)( zQVOoMfG{N^C7=d;Pgr6RbqRJHyeGwcus71nMH8^K@2wSSd{A67QSJ)i7ni zHK10sE@85Q`n1@v5;uVq8z*B1)C?Bj4FH|HvM__z*oq zVs$VY4Nqrn-wyEGZS;CI6`Kvkf96OMecB4Pal6TbzFgn2MRMhlgKCn;JK5flU9izA z)X1R%7>~-xw&HZj_?A`})kCX6U3`zhW4R?y-z_xXuX84MFUMoJa5(h8M7o6bXanO| z&}q(MB+W)lK3jo_DHL6RI*t$6o(pDU13&V50Qe^R7yGoMQTYIH9(V*N1KnV>Lf0mG zRJ`vkI{`M&wgXUDgfGt4fuftebjTm>sM^g;R9Abj=yG+JkpLLRbpqMx*)OBnl;`!7 zVre}|$o^yvhTZZ>c6(jWoOMb87NcN!kd2GTnF;2ci!_p3{Bs=KR-1c9R-7DrdF_tM zE^k$)ku~L>b(~c<%AxZHtC%ObW_3yETsFiv&6tf89-4ZEvW}RmueY~{s{{#}2eWJe zw+Ii+!;VP;HR3v+!RTsT81>00Q>O+yXw|UyH1?60zvCRU=3l^R$he6JoXEt{q?YKg zp^=qmZ3e=$dTv~G^TCsL@PKlU8PH0zKq)huXNl~8k@g%2Ue=TDFny<~;2^dJPXLii zJt~m|J1bspjUn#cBH=_w1?D~VFnGUc6TLq{dPjxgz#LVSW-&_%?G6prI*BVZLW1vo z*=g4bkeuiS+Pw?~%m}bl1FOus)i^UbnL_P}n6l9!5QGz~L4uD&W_Z9qG=pP(yu4sp zI(!;S{LeJ=wqvWID2v-I1^Tu2iQ}DtPa_pk z;@}Q*djP{2UTJFWV)>ehWgT%Xr_*{rfm>_~WU#<|jn)o^SRqIehuLCh;TmDDN)v_C zx$)+sbN{1&cpDoSMH|y*X*AG)lZBh1_7F?0lf<-;H)}jT7t3N9PrVq|OoV33a#vPSujQbEVMww%4P2EGY)4ve>;9y+AVi9$?__DKoC~>(F$#gj78RZx-P(?D)*3u=g>0w5Dohq^?#$*CziZas}05V)kkznq#$DFk%V8noF zGeYRD;hkwz#UTHP8NzurF>XY-KG}%5hGYwv+>eOo5DUj4=>6Qq*ZP*sx?&K`_B>x& z%fhsfSTWr0A^c`NKnh3KNy5QizGk$LmuNNfOrDkw8f-RwV(Is5U^jsEO86kW_>x(f z{*?SoKJbK{D+thSj8jV+s=!7}Mt_LZ_S)JCfTzPH z3|zjX0$b{rYyo%9LaHV41rlS7>!2XPRB0AfeX26dhx7sCBt;}wQcgdrwDl!D0Pqkk zm&E*PM%Zn&BBF$WYPPP~DoZ?}6jIOZ!;Z4%z7|d15`&|}8R%m^=ssC^t)=8_EtMrE z-3mg(xAQ`zi70-&F~wUvKhLFw3}m5lsLzZ;>8`8!=9k)wHC}bPxH!y5+kv1s0-t)x zGk$?lug0~7<7$ZMRyeB%C~A~|Olih$xyqN(PR|7?%AqDIB4B(_LGzC;lFVDtvAOp} zGHOLnmJ|_&c}LF~WGO1r83D!daPKIt2FXL23L>x~3V;>Xs(Uqj-=4l6 z)-%o`us4($z1;7cGW~KcI_I9m+=?1YLPDquW2_qaKEeiS5OX|Y@xB+jL3|S z3x%HKn#d((gi|~I69$Rs4uv(6UChlgkL{1Ybh}f^gp;4R6*=k_!_-eR8oAnbX3xDq z$`;NJcbocZ>Q>&Lid3B{`E-{Ev)K3Belv%_Q&*wH7<&_?gB+~aq`(U*F-)Fz+J8<= zT5@O7gc*I_j-Fy6Gp$>x+~jL2?liJ%`)4J`zH;D=Rw)ZwTftMLTFjp?WAJGq|3ctBwUdb%FZ;F=W1KpwkRZ|-fd69f1y-~6IR8Td07K`u>QUeY}A z=W1G+s1v5Llp!vO)E>b=bKa5etUgkYH)*OZk@{?i-gxF-RZ8jLC=a{gB?#-Wo>r&D zPf_tiR%*pS2!gkU0C=Cs&+!OBVQlRIiGnPn}KJc9thd))!;mjCm|h`Yz!iJwQCbc zb_y0DvhF*x<&wSmlRVDV_4qq%&^CcFbwK}BofaCGk1GK)?KPgO&_^MMojRLS5!85fe^GbHl^-MznUn`|vKbmJN(}}Aw?#(42!=kHha8H4i z)})Y&M@-4i{EBe_s{2KrP6Y&(ihE)y#YwId1D++!!UKt_ak)t9)V23KMFyVTys4QHcHpijWg%Y zZeA+HeoE7cQz-Xn!T;V8S@y|yi5YdvS}-=6Xo{O58pl5n&*^iMHzuIl4?PcPq8i4- z+z3+e;ZVczV}20@w4QboCXc9#hgyV(bOApc_*<`dQ zc1SFe{VMsOLZq;{i2QVnvJFA1uyM%%*`B%&P{3VVdhI*wxn}0@x7s4nEg959+h(;q z3)su{84bqC;Fg8x9IT$uEQb~UiTa^*s1GxJXYD6;X7`o3lcPP?cmA*|mJWSV$tMSK z=vZ=InbZ?KT6vdfYvs;)t1^emN%F>!ZP-1wDSyEKc&-BZcr-G7Et(Yg0&Z!Fzm&-y z+5`m-1|I_yMnW}8X=#(S%w;neCQ*3WYg$BIIEngo(GXLg6%~oL?9CBCaghg~(=#Ww zS!FJhzuIEGQA$jXH&&=xy|!K;z*{9_QPjDA`rllqMNdkj*aC^CU4H~3vAOmrTwu|9 znn7fC)%O|;o@#b0L4bV|96l2p&cAL}CK>q7ko=dR`sT%mHM2(GM3_9VgDBHd%UdQC z-*r_Ju2cB53YSi7W!OBo#1183q75$9H_83h0=0?bc}tdARX8JUVWin6C@|4b7y)!< zX^32V=Km2L?Ne8%o`6wW>^66wZsw3vjH|5StldxF$Xb~Bhwc2GI`V+7u5R461#6s{ zyELI#A91#S3;5+CRw{2u#eQ~e`2)cKS}>4Dl?l*_^WlvRfi;#g@|G}EMY z{#fTrIMU^X#3Aj3v-v}IeaL7;=1u+_c%}MxnNqhk$dnqcK_BjAd}Od;uwvCl{}V^< zcr#D5WmSxbG^A9a0#q)7RI1N^wnoI%cgA60#H3>3S_zQY3J1gvJ=B~~{9>p}af(L! zK65{0JPcp9K($HHA_`8)^C*PT-SjWNmK(f8N~?yPE2Wg_kD2|W-(z_XDTCFy z8{LgpHn}GB2@Ezuhz5Zc8@j(f@9(QqxG{|nTYlzoP$z*3!Udw+RPi>Uo6iLBb_EE- z#>n^juP$DO8#f>1$$EhtaU6jCswJ_Fr-5YR6q$prDOx*dilDzU6-~g{kg7ycg5gWl z}$(YWA8JOt}b#8RHJb;O8?~qa(8fAAt^JVxx8r9W^@=@LW>QwNCq#hV zCPKp!Xvrqb6y?RAfWG4XFn^lJ1%J#56Q8<_P5_(8UcIj|!jT}>>m3Y2HY;4GS%L?N zYBVb{>5_Mjhwt6>ez)J}+b*&p91I)G?>yo5_9Y>rl-Uh*JDhSRab`MG6gIg4_7jIC zcr$JF&PiWn2FpVeXc24|N+Y;Q2ORM&!uvaDtjv0_P$k#ux<9vOka`0Ud(3gXe!RY3lg;L$~dgPcKs|5E%0#E2bBmG`QvrDqVRwm$NCqMV+CDxH{R>eKZ{lO_%lqn)Y6u?gAWHd zYSQY-hsEcMN}kT`$;F;3R`OXY5dt?IN;q-9)a#|NV}xgkx;`LXy71a|?)SNo)=8M2 ze!5lRP~Ae?a7vus!SqZiqy1&MsXMRB>F-R>{h{{LUWnmz>f7Y0F+#7$n8?taJN%^- z0{Uf=^4_0J7*{K!l@L20lgi4nva>?%HvK-rNN?5Pd=Yg7sQEG~6S*1zu>8SU320VS zo|l*3bp&qPM+7Pcl(L>&KMYz_=!czTyAYfjC$x0k4NtPmPi)=eE%t~70a9Cv0w%K& z(&MhD+Ka7+EZkt@4I8j!9v)gcwOa5j?|_{ROT6{8;f1gA-W}sBI_cw}E)6as!w$gY zL>1?Nr>m9`m$E9YX?p>1Hki zA_#IDdZ%K#3LT#&ohC3A5gt|5s4vZm?snlNJe z7(6m%EA(}ONg+-*Pq4(^E00{0Ocrfyf1kfv2xIny{F$>w))6Qqb!vVTz-xkUfDF<3 z;{K{jfbDgORC!NG%2g`YWG_ml{gN$)p^KDy3>lU9go#w7V?2irx?5egk?Xgqc$ekm z*_lp`^cUu69{S)Po^y8&WZXs0ORcWQkbGyzL40uvEZ~6jFgO*vbs9#?VDu@kwm z<$OocsN#+Q$)1pB87L4vYVr3rt~&x&ab(PCMC7B?6%~g8LAz<&>eO&Fl^>jDN7qx| zR%Gkq{!jfYvbfdKTHUf(HsYM;ZP*TXr@sA7fi{R8(&#`2YP1G>Du&-j({ltb=JF(U z6NDyS-NU6kfXs|C)8(*QFUA*4$3Rgqlu4Jxda+PYY$n+A(I1mcO0=Pitg?{8u+gby zc=MHFHRJwr*NdQ+=hO~@h&}*AY=J}_xf>rA-TRs}bu~^;RJi%cD)2l=PB~A+PC@*J ztiSJK0rWtGN%c@Jab-4x{<3f0eWYpq0|nbRoM@M)^QAAVsIDQNl7J5<@fb4 zKyWJ9&950dkq>y%4O0)}TI~Q_aR$kSmBC+;4&%QW2hGp)PXl9FBSyxO%CvE~d>)ue zXp4~N!AHvhUcN`9YHhYP13FX584VF`090j4Cu5E3LbeMLzs|*={7@+WEB1#LGt~tk zq;_8%EN^FO2deJU2`pR0d;6YzZ|;_B!~T?=*_h&D`)O1deXmAl_iiMqg}ZjR|6{+J z(e@!$6dcSnkU{xDfbx?i=J!em+qvOcAho=P`xOKtJ{E#}fUA8HJ18a%npQfQcv~$+N~5bvTY5RZJ=*OK-WgHl9v(h5Hf|iuuVxQx9uh3NdNP3IR@@AR zwFxD2QC&xTBVpEz^Tk{?`=E3Ms2i2(2Mw`zn=st=tH}OgLVxHbFGlcf!`_=-GntlE z=GPb#Eilv&jhP5)6YzW%)nCdYLRQ>(;C6~ixeK2$ZT0JY&svBOW4P{DP0R zK!CVvDjJw5no;Aw-V0>)9-Z5i2+AR!qwGdY$hnSfz_4n_h-0&_o@$r12|*k2 zt4g0)I`Puk+~-9e(lkc#_JYV4YTHq$`{sipid!)3jTN@4(19NHe|W;E3_0n+exk%F zDrd$7`P>hg zu=@oRvnXPG;kU^r1b^bz_70I&r-(3iFJ)d6=a(3Q7w$LClde_%s`h^>Z5w{oi%4P2 zkk7o<-I(y*uASm52fy|3rs@9bxlc7zLDLYCbmdggMAxFg&;Gkg#2o!AvE#w&M}HkOi=g1OYgcn0CgwN?XD&AvW2OP_ zM1-{JwqMT|YPpiTda-2*j881=?T6`|Zg1(Qg6!77*_sT^ShkKf6_$_w*gu_oh1h)#Kk3LC`0}k&TQL%5UIkTp$ ztgJrS)h+o08SBT#WHA*rRjguM-<(UXV@E?N%L@-R^V@3|nEX^TnPSiM=9d@9jvImDBtt5t9r0?~!xuQBI(ngoyFk7)ZLa3rE?oLRnt1#e z%Z3$Q(+j~e3fLj(xrBfXXqFTM0H~sMGr|nuaq2ijS3lYybVR=!_8@}q;R5A)WtM;w za8`0n*vHV**v^wuaUU`_8--g!>IR1L%#(Sc7U0=UHR#)j@Pl@2PO_)V9MUJ1O15Qv zaE}vke^stQx?fo{*B)(?H6^B+i7?mK+3*L~zbcw?P-Q+u|Gql^C5&{QOrBfc@lOA;!62zk?Hw zt@K0!*)AQGj(@QpY#=EiOC|5Ih(c1T((a;rf9pjr`tJB=f-q4%s-UyuS4bLH3|iBT zyZGrE z{0xV)Og_)eFq6!%CuJ`%xDJ4$Xj4-ZCEWC!kPY4IE&^YW+v2zHG1U@=6PdKGPP7J% zg|;|jg_GI;4o7#lN}OGRl#m6$yqr`o^z{tQ_SG5tcqJ0ACQhT&X&UwFmckub?q|m* z%U*w>T}&!!ECIzfD;VIfOZQvY{DjE1X?XnB$NO;pAbuy;RQHiMZoea zU3~>)dUSU)UfPywo~OB{5(*&{tIk#6ZhZ=Z%J&}@g%$(sdONaVH_l&AzhFf@zLL0i z@yTUjfiWm@>luRx4tF5Pc41o{Ep0T?w@vLm%L(ze(SQiLS*AV64NXN^9&0|^!y4hQ zi=kDhyi`WacK@r_Zg+d-Qt)a+GTvqS<7N%E}FrW9BNo)@Jo9C+TPx-ZBA)DKO^1lg?}=pM7S zU%2(rJftw7RB8O)1;W=%Pto4B3%fN{R(GJp0W^e3%faVjtO>;*z&tPV3HRV>Vm&Jo z3ZCb7M$7RV6?fxVdOEI&K#HI;kLz23?-@g?v}uYo^?+s`*U)(g>UzCTc-OGn{g4oS z>mko}`|VG>8ip5)If|P|Xm*TNuOQPV&BSTLiJ>5iX4)w;9};3Nb}-uWml4})K0g0k z9`ah^??f4;iQv141?#Redf*3tb*%0t>^@`R^C{g3EUbT~Q1u($7leRYAzI&fEG@Fm zuz2{mP(G?MzxvK8wljgnJ6L|U5|UZGTzBdhv*;f#7y=ALcA}Vr-tRW z+miYuWx?D?b4i??UW>VkqpMyze()S$>COX-2U^BzcUmkHmc(lG;J?qyx=bXGo?Zh& zpi*^Zjw&POSlIz~CX*8bfBRJ8@^(Kc{s3(M5{Fuy=zuZ{Okq_2aIxXD=7~N%4(cR0 zAiH3tPQWSv*TAr0`|DvVzO=)e4yeD`jNc*TAi_M)NaMWORRy7l8@)vqUQg2@i7QM@L z5FhD~sk2+)<^MlF2}a6d$Ju}0hWjRN6L*wy+O5?&*h=R?POfaPE<|d?W-8ufw-|qJ z=e_YmrBNZ|nlIb>Ck3`VjCA-k-w9p z&-;6hm*bD3c1yVh`inbNd%B|P{X{+h4C4aenRYcWy4=n)iM?#k1GQ4i*&AnN&~{86^btcmzdS4PJ6B{JyZl*-eAzoIe-@)M9BS45TYm z>jY~{joTB(h@0GjfbublkZl9B4RBvLn3|+!W>UU}xmXEOnZp3$;|W_M4vC-mim8A) z&4*7+lKoXCxdd0-Zt#A!oGf6f49Hs6^C0#`WZYTYI7l)(IY3und>hIXL-}ZVEx7dG z(bNb&qo;qyOBI<9^i}rc?d+}AY5C6|9hIc;spDqkKa^}z1V4RQ)WPo(Wo!yMmxUHo$352Rm64yotr4xAlDo)Xtc1?-AS_In-n zo)t#B$`qP&CF>^8Jjl^pe%8Pn%ApA$*$7gFgT$xnI&HBc1Zk6zs$@&DxCK)v`TxDW z@?-&6545$VZ+(ZCodUyL7ej}hJ^meL1!c-h7UxK$K|fCOymE|emX}N-Z_$Lig#}$v zvD0rP$|-U$X=Vx+09z?G&I(Z}A_Ek-d8Id7U2tfP9xATR_lA`HDQM1?S7F|ky4T-i z*tbwJ!y!B)XdM>qw+7WZ605{C%MWfgymfDJa)r3!gxwV_Fv0uzNLVGQW~9%Na{OBb zfbB2a-g1sv2)9;o)O{nlx|8mHK!6XlFLmc3%Uz4s{EB~_{36wW-7Fq=(~TtaN?4EM zED+yfuOe8l&D0BB5as7kgYsQEt$U>9?x;fsy+O)Rg6^;ArtL%_15q%3qiJxj@+FpC zk%yg&s$p@wzY&RJD>_5d$m>ZH^XpZ1&&Z~st^SYaY{{4Hevy8EP>@aI40)+s9%Ur^ z&yC;~c8+NtAttv?Y1OZI&z)ja&mj^C-Rq4$`2KwZrwhoTx$uztF6`QhVq$*(FT=L> ze}IF9z@2ejF{z*ggMmZ4wzX}=l7Y2*S6Gi9dm)}2C*BqGx42J?hT0PDx^r10U)a_Q zZid$bJq%`%vJzm`T6HC<%<^-SOh=JCag}X{Zm*@+;GjwEExNh255(6Q+gs1V@G>0# z2dlE?V`?3AtD8oVA0P@ya64WR&WH)^R3Z1fp6cWD_aq z_AiI{@G#ouH%UIVT7c6u2M-K@MV{wWTc z_75dP+q1Roj^(1c_AqaivavHmSYKf#(8mnkuqyA%goqmpe>9OUd~PGiqO#44Di6v^ z2byTpB_cHs5^`UrNIF#!Qm#*lY?sRcdI-M=&)4!=y$oju=CIeY0O*{6C17MQO>{m= za+r}%Hw5MB!iNyuq$1vUkec{Yt#ZbP^$)`(_tA|sCZ6hgFuiog=#qWx=r}1Z^7tvc z<)w-IWgD^iOBw_5J#OS#B1&i)1PhW_7euZ!!3Ah05H~iyoSZj)`2tr2WDwsrg&isF z{c>~M)y!svwzv`b(zYC=ANByPu-oSiqGaGl?~q3Vr#3GVrG#mDK3B*@?;7O&O?Diz zxWxj_LjnBN?h}ini!t|H&p=MO#X$TlZBx4Mv6F9)qC~lvyO0TDfLO)1h+225luYM z70;j-M4U}9nhZT%--Q)p1ND(a*#kFNfH?YE_o7zV-sXhi^ps{0a=4d7y6au*kETlA zX?k}_|K+)$P{sEnOEKviONk!MD99%Zz(@lkRYu&Iy`m5fh4c&BJNYUE+OGk6!&^5I z)Se)w%=$i|QS`3$cWg_BIn?Ee@P_d+rij|7erIv#;|0F=VsuI@QH@{fMM=5q2ChFy zRbJ%OS_>GlR)f5x*=HjorZ-$xy zKxDTPGziziMHTGL-|(KQMoye`3FtNj-IF;V>puR$DRLGcp(z@@bnS#X@QX~|MCDQA zV6V$h2*;mrn<#Jbx^Bx54D3o^PsR>^zjE3|B=htBRHQqNZ0Iq~NVMjbw1?2I#gl1Wc-Zp$}xZzKE@^VzY9C=sCW}Mm*2P)^3mw1}Q}yTpa^B4YKEvV0#|u_^-uz z!wwsa4yBjeJ8H;~1@6Q3dmpf&AuznI{oP7=PLb{-W zD_mX*f>?M*K=Mm%MF1-lDFz}*Uf%LpYH#fR5W^q*%Udb|k@t6IkYhdABosMR2x67- z^`GP%Wr#c~feOGWNfc~|0gYg#^B`i)M-N|>Va?<}bTS^du~2G7ir=_;f(QbzEL9LE zHfAQ_s)CN5xi>nB?=0u-;FW7w%mQ?)%AR-E{HErb zyd#jE@xXv2S&Ah#Lq=QWhSJPJh)}eO2l3QbPS`5ghvoSa&BjzDhsaoi0;=dceD;4a zL!8iw%NX8#em6X0Lc6T76`8_kbgzrEA$9&eh~MQlj|5XqSqU>)?+N{6800 zH@l+FK5R@$6-wHw%2(NDJ_Z`&&-G151JWI58tdmEHnR=B?9ds=TVT66+0<`#x`A?8 zo*cTB4l&X?(e`M|<;?jDO=rG-Ng!As-8Gen?`#MgYxU}z=HsL@GV(T~ZxrtxiJQ|q z#ay>jM%{7@xUMeTCpb&>%CiVhT#~$=U@fs^Y1yBHE`0AwZZuX0OuU|VHBxN6W_=6Rs|p{hD}nsKF#SLr{Ulb;l8| zvYus_CU+YCv_}GHN_4-WhF9-CTBg%J%#z*R0b%-xLv1`&t2o+=5Iu#X`)=D#*fx5) z`wpA{s?d#aH#@I>>nkkUG{4?i%Slv2DAl`U3k(TSSGo8f(-}kE?iCHI)Bu_0e3Q6; zlD~Myvi|M7wbO==z5S__V|BWYv4FgbYm03fMA;n$laAjhN+96$1c{&3AhtgJhSS07 zN53Gg4e8wNT_TZxW>5&KzYQZZ3MJg&#=ajZAayaAMCtOUNpBQKCAr!An zF>Q5+>$L?@bH54H3(p2bLX{Re_pmc3kGOSjMdHeciH`8GRNzNODG0I<# zbdkp~wQjPPNMdNl+2eN?#5m;>m?$+^qHYbqx|y6Kg&r|Fe-p~5LnOOz^f4!9UnoEe zATUpig5XZv<5$dUWd<~KOx3TAqH-HIy|q<`SvWC((;_g#wAcHa3i3I|CNFYx>=A#{ zx}%BNE&0*@=!^o9x)=nTFiD?8lhU&%XPjqr!+hODfgK$@$dlqqb>&dk7n z>2?pVvLH%$1|OeX#!|Q+Ul)QdU<8Oq4r|fDs)6??8aybxooH~}=e8s}I$S!POea*o zAeTIz0x+l?;_$)VJffj7L3A;BlwKG_@z!dC&KojWMG_y zAdOp z8#$f9Ye{=uHQKNgXgd4h@O#sy0RuNL@1|Z3UumsYEf~hTmN9-hIRB->1=|*#8BFW>dA9ELrsHumG&*!0Gj91I7@pk8ZtRB|EZrPtqu2if zGQ_t;??^3~O;$MLg27)#6%&g7t>|<$PmBCq)9zLDx24_hQpr}Uas2={%$-Fw@gKVo z;jBmD{$RbT63vr@Bb^$Lmz^~!QUc%AT(q+KGD>>z>4^pIlR-Iecp4>%i&GG7{EsPe zu#-yP$N8_uL8;(R93~E2Cs?9k%Z#9rrZkc9vrn;B^wJoGKZ?o2 z#Gt8RVLl|W9oNI0kkjuwS0ns9;st9az(K|r~KWDK$LWDw43BS{HpYA&ba z;gdrbS*#D**GNMwB5*D|>UhPb<)P4QWag;02#CCJX?$0|D>4(t*PhZEa0nc1(4mK` z@2gU1=Rai6;97cSu<_>36joKo@fB zFQSjoaQ7*U?JqOVOP2TdUav7v=xBh?Qk{^fE&w~9_nj_OaD`uhTekr_Y$hA7)K79| zhJsjXMU?&!qIqBo)&Rw$QWZ<*y-gWQpOZhgat9xCc~zB6B_)}1WiHRC3qg(Lkz zHP15&$JFohoQni(-VI6bp{5Wyi(=RA!N|ewdp?X$e2VaCJ;*%WF+%J^tfhkQ;sCFu zrFJN1UM#tQl}2arhKx7cqxoTUBf+E|!4bAZkxYl_3Lly+|Uie2?5<5y1nsLn5v4XCDJTL1J-a>o`o` z?s-EnJjc2TW!DiNBCWwMfSiW{oeLz-GXz&Kyiy&wnrE<6NRuxB@!~Et$mU12w%aoj z?#Cj3V=inGx24ms3trxlZc`t1xaAI=z98IQvAYT6MB^w;6TC-Np4}#i+YOCZvpV;N ze|Uzwq?`x<_b5DY4;i#Vi*+g<@^@Q8rL3yWGPJKZ9eHD5W+G0)xtArLL-zxw7?(g@ zw8Dts))JHt@hqYo$2z$aVnf5!PxWxZAg#AjK^OK8gdX+dX;GgVQdR6pn9AyTaZ;uY zC5pMaJHE8itT5vW<1^`_)98E6F;L@Hqom>ev{ZT&7 zd5Xu*)LeI{!*a#ZV4OPfHlptRprk~QR@s|-+ znB9MlA~+!f2e_59AX~K9Pl#PD76S$hS-ge*=$#7^4dqPG<2HOwdTG$eb6g%S-oKKu zNU7aDO9;1ftt>2Pl>CTQ;&e1-JRvht3)(Z4vW!G@V#?qNcyM7t)E|huw*m%y#}u^b zTq`Z`&}Rf8vFQU~F)OvGtTgr>3ZCl|hqI?5mI}}(dC-7LEM#&*DRbk)m8{r``aIUl zID(!H_0TP}Aqp)bwbDGCW5^dse+Ej`?ie-Nxq8iDcjdU=A3YKAF8vQ^qwwd0a&DuX zGO-d{seIyTN_ZZSxM4Qd|A#Zp$xnyykr;}D+mpZ4YhWpI4+@F;B3Roir)OumnC(BH z4uds}UK5L~&&1Y8oh6PiYx7_e>CbWu*dwj2}}hcwuorA(f2PiSZ73~ zs74d2Mmf(La?WGaPWx+Xm`z7*&kFg%)rnZPJz?gZ^0ITy03`v)5tYdiB0w8R;lrG- z#pA}MBgAqbm(*k`B@^d^&iQ2ug)>>}HZ$@N9@?dyqB9|2)dSAQzgafYJ63IojHq(J zj8EF8rp98GQDI3kcWFC>SI}419d_8#Be;olD6wz^hWck+FsHUgLb`@{$@E+z(7fbL zTm80K1qk{g?-6**?G(&l4E{&wCy?7NptCxzT-5KNI8X?1s}QFzbVoEzr8+b)+*O4xTM ztT|Bi2`v!;r8gR{Tx1>3k#Ikx*KCoY;Xk@}P+%x*pyU21F^beI(G7LjaJ$e(!;sbZ z`8s8QsH1Oyjfa%Xs;C#wwyG27uZCYSl7|Y!K`c$BD6{m8g%kgHvnb zrcIVLcb}%fqG3}^bVX!QJMrq>A&d!5ji3IV!g)+|{xvOj6ix^!kt&1&zUTU^BzH~* z?bwGqMr_-xCfbH%{JG&){B-OmIk=GWlpAqC8gg?6|EGg?V1v{EBQR5~lyc2zKY_4w z$TyeZSWb5bA2pJeB3GH_pOrMo*>f3l8hV;3Eo@zs`akN!I;38ebn`Clv+5A6ye$xf zFDhsLri(Lh0!LnK<$u-LA>ql_6Yywv>|)hP_W4!?b*-y87b_XvQ7jE|f{A2|2Uf~@ z+L(nJ{l1iF_e}#N;e%0LcE*0F#nqZAw3ug6?%oUj;7TIq@65^9wnZGA(vl5M$^MdP zxc24lmX)yCv6=tshz0TE5E^cPXJC0G=W7kIczJI)8HFS@<;muTf$#q>BnTZHWV|y1 zI1Rf)YYA3<$P+LstH1dYvcevcG?gBieRy0~RsYi@GhT836SmL0cZ>=Dc}`4Bw!wevRQrZH4GgxC7*c?q5m$1LV|Tk2tEQZ%N{)%ITq21 zjg&!IWk3FF7`_=DZiCwvr`)>f8N7%3AyfyHry*B8hkcTk|I^whe=#**n}jWxv2SKyA!y@N!|4$2kXQ%+H4gwkLnxR9B?}@!K2*3Tc+VNaqK2EEydhoc5>;z8G zh~SHEZKP;%8hqR>O4YgIr|wIzmFIZq?U3dhe=k$1ZB)5F8k8NDa%nsC|BV4$mvLi0 z3}-c|Hq&HHrle1C^hhVcQ5G-kJ;;+Z35&7nr&)%laa`N&5~jS6AipT)Y|9lTmyM&SMN z(Wn#kSR|^usV{Kma22@4t5wvKjVJh$O$lBj`5{Gg=$pN%lo^t>l#&%DlE6aBZ$Z9a zA)VbU3iz{8+2v&o0K>%+dhw#K-+E_^iHt4ziN(PZ>OslRysc6ZCmd2dOv{c7$qo@Sz{^uHdXjs zVG{>bZ|VD^%7{45%Vr??B#HI>7P{f^61o4gpCp{go1Jj8Qc|on3ev`Lk<*X%gb=%? z_JvF@_1ytstUeGa)_gmdo1i5j#96K851(AJS0mwbyulBO>$>no-ORui08v1$zk~Ny zOr9uy=B6G;|Not&Xdn*`nftY~6OPh9y64z2w95=r=Jk5%5-T?zSPNt#+2Hz!W%f5Sf|X^4godM9aTRbjAS}FJXbkz~!8q)< zT?3-j@-EEYM*T`;**`QMI)f0YhyK0Dlqx8JTRez%NwHS}b*LAo6 zq2i|}5oKQx5fMRc(k^vMC*$|dL0^Lh$;Wz_YSQ`{Jci8`q7|dkeLf7n&`nL&Fy<(5 zW&cB(GPryj`Tv`J3?y-|KOOYHQn|TNcDW&GiSm&o@s9Bu_)Wc!-I0w6UppEqF3;R z(wF#iCh!@ULR?Zs_BeC6q=1Mmky-#0%MA7-At<{rR2bhl#rWR4urN)~(l$bafMdna zKx2cGji>+u$FQ+vh6DvEDthg0tY@|Z-r{=Du(kG7J&hwSFE(4NIb)QQ`rKD zm7*9ToG=k+{+Ai2Mz}8_Iuqs}ZHIjVv1v|@|46GC)WGI9^`#>8t2I0SW%zAzDcq_a zIb84C5nW$4H?>?mnX#wZMn}gC@yyvjogQPRR}k}aVPp3tUj?rKV%en?VQxm%SEQD#dY%s@w8*a8v|ud(<^g9G8h(+Dm~Yok$wRc!{;=MPO~ua5xTDb1 z8%Pq`28$nn?i*wff3(^X2Bg0RFYT>A;1z4TO1dfOKDc2=QF0 z7lDRAe#i;lH^d1*kHUOy9j^5Sr1$xf;!rX1vTQtpNJWE|SsLJ(^tNK(xlk zbi}{2{-1bdA6ufL=2|DoadPJbt+zI|m!sN7s?RKBvAFt$-#h73!91-Rb<><4p?)9( zi~yMezF&=a7|1Iz+|zZ2YL5|EHuQ8k|MFLll>r+}k1;pfcX{!(V zh$~O^8mt=VK8U<$EUQkikKCj8O7)@siqy!?>~}2q@7`bm3ruF3F0A>eql_1oY#mj{ z6DS|==yFze3drY$cld9{*S!&Ea@0+J&C+)pzY5(Zc+wXt=6fq*(;2UNZNnG9@w&$K zDN+vPaFS47_CjRSH=Wf6Px$)(SP<0Kh*7iT?>#UkcJ9w;l2aih!Gb{xPVGlIujusn z&vD`yka7-Sh!X9(3_oOEt%Kfk*x48AQOmog4ge#q2kyjJxYV>&T<@ESYGF*{gup^K z68!B;aDfk7giPx;U^Tbaou6~SF_Dkn_gm-rfI7UFu+SXjS}c=maa1Bz} zt0m^LD7G&KFMwg;H!y0_MX*eq|3qZa1v@Fh zHl+P!87mUp-2?7&5vG<^K3>M2 zL9d4Ab7%**IM*i91Yvz7SMGj{Fq9Pb`0Z$QEB-- zbNq+YWlsj%WP-*CUV;IJ8n&pbIYQdKfWH-R;gA5Zi5MHfw-N>8kr_cw4r6Mpvxn&U zpsX40rz>QRvNK(?P+mqTS64o|)1ZNu3@X78bH3E?5lOGPE;bkSFab$M1?NO$`;t=;FyN5CB12i!W zlzr&Uh3k-$l5*}mq(b~8!#L+V;QFf8ID{TT&!vh%zD!fL5=u*Ru&UD<5qg$ZACGxv zHe>cO`geD4{tJ!Z0hvPt-<=R;;^I!Av$b3iqgog3CmxTPS2qlx<@0m|fW<^Cj~|jo z3z`Kh1wt~8wh-Z!B}jwol8IE*u{^Zgk@LN$xv!Xr$H$i`_CB8f4gs=JZssqwPne(a zGk|7f`Q0PB0sBEk1WxYDgqV<7oD3>c7<0(?AVu+v5jDYI*aVo|*TutmTthOU`6N{f ztZ;+sDHovt3to#haUv?r=7sEm%OleiS1Wh^XLs@FP_E=Sz9I%^PE#7-LEmn*#xkR) z-)S4e~0ylHIw9rj6t zz7Jq=_PXgDAFuvNO^NFV4r#RsVAEUqom@tl#jxA&e-$5Q z3o3w~Kx}hfuCaSvgulV%E#-N44DPH*INj6BkXmG*uiTnlBIr%R&;`Qr!j& zE10+PN8fIWy8YNLpwg`ZP9v_0;WI0<%PCyDTD)`l-E$R~--!z%m)dw;j^2Ekq*WNd z;GnJczjW+PKQFrHfclkrdah`QZF-OXv#l;TmgkTU?qAf9gkRInAYxX=#y@teA&YDG zV>H&Rb==tSFhSziwjFQBfPv}$4X}~}ue4U((mftfIz7NJS&!gJmwCLc;AKi^7o&cXTGq+DOvYayp@k_09N0rx2cvZX5)FK%R zGSCGbJBKly{lOHxy|h0ARR^Fc`0_TN%-o#VW*lWxwn1Y$fw|Kr{<(Z0GGGhrYXY!; zLR&D1H`yYpeVz7Zrs_9bY7lpfQK}_6f4Z8<+Z;Wwiq(LKy2o1hN#}p&z{~-!a@jC? zC@vGyZJMOlQAcWOekjvIh9ldiP^&|P(MIi0soxL4uP@YV+eFO9??Z-9kYoGnkbXVB zNhuaYW}c-H*2Fq{hmNKz%OaAS^~1Q!k)#Tc+q{mO;LFKS5;W;oj#9dY2iKFu!6-nX z$SGyp3H>c~;>e70X$e)dM~$~OAv3w6!^9&*V+Q4bBq3Qrvz0WEB2fbLiOqxPf;&OD zFy)C7B#^unYkZyT{H2~^j72oneR?ydLMcQd9%NOLkl<0)@@0^ zTO8A%c+0nl>>FCM^?A9y$0h1(YiRt@T^0N^kw3#$q>&*a;PmT|TgtcZ0?#mkceUYq zy0457#-gpgiT^S~LU3EhpKt}Z`S6CuZ&XrEX^O{rt&P<;HuI#QCsAsMWq?G~Od5gB zb&#KVbTJw6vN-|NJ^U%$OK2}7Pv0(=>za|XglA-|D%}51`d-(#sx%Ksjxr$+btC4~ z!b-oaS9cRL(Vm7uXO3R;h-R2L@W6M3^ZH(AIS$BlxyG{oM->0~PJrDGkZ?txdB%{2 z#1B*C?T;-5TMY;%ajZW@ngnIf0M*5h;u;%n+T0!%O7r=eKDSaGK_!xgO|DGVl?l8j zXA%lQFHBWwo_WMR`1RCH$&6G10H&EmjG5_bZuw+>rv_L{otATsGQAS zZlE1igRMOZ2>Mzr#^lPliy=P?Vd-haRW0YCA9e>t$iMcZR5$%Z6ZF;qUq4h_&|jN? zoU$Es8`H|aO;3a(OOg&eQ9F~BeBfRmpF9S4K$kj228WjXX!CK=1m?Wb5Y1>T+3vtR z3~uj|on^hA7-^W&MQ@*&rp8M(K6U*Aw-)!qzr~&$3asibHm&-W8!!;~2hcX65eq7# zz=t}DH+HXQVawTmng^>S{z{zQNawtllCbQIvZ485St==77&X%T`Mikm$h0sz99j{1 zGMXtnGXFj!5b0veK!#^;op zWEvz#Pwc0Knf;|xIFpUV{Mn0Rv_;-yPA<^cvcWb99QS}|%XqtTf zN!)KhfzOb=W%}?cg3*7yyoEELhQ&}u-9rlPwPY8LVE!yRvMvM$n(K6<;YHyj(NJOz2uF= z+SC?4CP0F%Go9HYl|-aWRUyy^9%h1Y76|9|h(Do79XCN;kUPF{TXdgR$y-5DLi|pi z35L=?5zv4hK@9=?G{_zm!He|tfI1~VW+o?|n$ciUAI*-(1HD{!K`QX`LT4P&?8LF4 za=K6J8)Ek`CW;OOMg}T^#-5mHwpPlWX-a$NzAs`h-mr@0cB6f+e8(~v6{Wa_s1o3N z*a2-h@NpQ|p8$i|20_3qs!u;BzlVzy6~AMnxocRO$V?M^?NM!V?X(v+bO!?xr9U{J zELF*{;-V@)$`cJ{O;>do3np@Q97eJEj2v8=%_H=U_6xq3PW~97+gspjxS+7P0%_4f{*r^FwKQy2g>@1UXQUv1k; zso$)Fp%_}3^3RaoY?*>3@u(FTo_L}ACczRZnv?yobg1T2H(;}sm7c!c#6=84iO%XF zJ(v*idIfZY41}hFq5)?=-yLW@3Nj{FI4M+WFQ0e}CH>o@$xs7ua9Gp&Mxm{9*NPay z`?kF5y2G1LRNkdB%_2hB94*LXU$MWGie|rp78x$n>0s}WLrZ?5n3e9 zjHA4JZ$Lj>ofn^wAc}xKP$)VNYHgd6ib&!wN6t_Qp9hGsl8W2xwIrYItr*=}N7z|D z4Gv9J@x(x_BYTleDIR=eS^dF|afqHckKHhDRNWYrn2)RLYjuAslze`qbTKskMVmT_ z0Uk}g4I4Y~0Id-Xn#vHfO1CoY|8r%^c8#5tGRunB6z>VHnF9KLa_w%E0t-{q*8PI) zJY6S<%Y(%2=8ugrY?Cii&vhiRC1zP5apT0_j4(#Z!*^v8&dmQ_IW#kS^?g`7#VCah zw3cWh84`K^lwH)1&huaag6#u-i@C45VC}utt6XFYFtC+_d8O}Y6Qd~Sv{?;lBNlOg zm!+QXXQvGVql(uI)>dc5{ZbBouG9COWS*~@T!UOxmI~SEOK43k2hx>St{VT}EOR?p z`9#u3RmbbpUA?EfUa$at74r6hfRc!zWRL_6+m5i1JfRO_M$t(pDF@!V$;%Jv^2j)? zSOrzP)VV^A5T?9jybm`&EL;%Pec|d2sC=au0o42`myag~J$#0DbxPmkG{tUA&7Z>B zs>D1L6+1|#wCpi18yd0~dVhj8eLFgXcLe%5eyYs?^pcsXG#m!vUdf|ClJ`~Wj;v*| zKQQkv%c@QO%YwRDm$k(I_gpzCGkthVlWWlC!&K2tfkLd<$eRfC*qXWk*C5?dgk)oG zdkHXzkTeybEui)VBCMhbYG<3PihR-vi@_~j>lYiTVH=7`k_%jp^+uGMrV@ejdt8s_ z(o#dIe4o)7<58=yJD(+lFT)9ukGy2irZQ+B4%URJP)dDwZ_0_jPBJ?!2`7US3c&{h zzM+Egl-{j-nx(?AoL65=8rN}D(kjHBZ4g?SS*t;;)B{P$0*|o@N-8td@v?ESFEk5^ z$;srnZ*MnX(bT#GAkMFlLV_dZR|INQ@ikv}&|~!hb(Na7!QAw9u^fLH0_2@$fz&rY zJsOtEb<>1)NT(?wO>&Faep{BGLe!a?k(w=EM6G@*5|}_eREPd&eW~?fzi2zOMf>M@ zS7@^_-50zpYFDys+J!*iRaFUnH?Ap7Eu#N-ME>Ev#zH-Zf_^?NKzME-R zwAW`+QWXRz!(y58Tg?}D-pauz%n8FV!v1cqEPK}tdVDSp1AsL$cLu-*DpU55Lhv9Y zqyaGzIB%Ol;f^X{nB+m^3@GFMQ?0P8*M%^^{!0-S19Cx?@cr|ZaiqXhiG|J25Amzt znKSX-zj4I1OoXTv5l3Rf^-di+WbdAuqX^37r?f>-Z&Mr+{IP~tN{dp)F7`pqI)I$A zgOwx6uPUY$501{?6sr!Jt37FuMOE}0Qw4q+N>uhpx{zTep5DdK6k(ALGKhX+FKp6L zx!lwZaVzm_Kv7X}pV_dmtrt*U+Ol@zeZW8@MC z6NJ0n|*aPO0-R?EUtcldJc0}8*L35QI70GOp@|W z8L#9p?wnK$6Xrj#LYN&BeRadgXy0M=*L`G@S7E|oF>!^kpEdW=8wP7KPsM$=q<)K( zEZWKIe4}wvF9MSmF-iJnzX}i-LNxeR3Q5+Qd#DY2GYB!`&0ODk#8VD9HJ5D;tmG*OrIiEdoeB*yLiXFY-gW7-B^1t1+mq z@gEa@1<+n7`}VxD3whU$fr?2MOO7e{bWi?uCRrKE+yNBWlPkWspD@Kfo_GH5eTiyN zbt|*f=G^N}5SbF|jc+Z95yR$}fQdoamk(2EST6_85A=G5<%4N#o21-GYAvKb7|IrS z9Dg?@J_CbUIkNN0!b+X@j>WmIp`2;mzcKDZ-JltFZR4u^ox&5`O0ddWq+LE z@0T=&sq%1KoKLj@EO)(^dL(4t%2>CqM-Al9jG4qI>DJ)BL)A=2|Q+ zpr8{p5G;@~eW4+~h^{_ur`>nYy_N%fknotFsot|jMNP2EU3KdUU~m9|7`&e2B?)bK z>RY*g#gw=P>27`h7Ak#r^5tfZLe3Vhl&I&!*4lwesAXZET6@I8nMiheb0 z`4Wotk>VPzZNcMg%51BLN}A3F)Ec~gu^EWtpctK1RSC@P9T@6(N~?%C3owL?D6Q_O z)q*$G#D5si)1-Li2trqjJ|VCx#^kinj>h{AvBh5GTyKGQ`7+1`V;;@x!H$~pgQ?zjj7b3&iIsu%P&aS!gJUj_ybhr+h_(t0W95I@ z4@NkYt8)B7l)}U!fCX)$wb8mmxA(>O7Y2-2(6c-r4EXx52}$pld?-D*SwVa&an!&< zN6*g~_(R_X8UfdRsWHHI@Iz`=<=%vv>Hu{~DtRQ-eu4o(t!%=Xz=FQqdqCobb8wKf zaGuuKOp#4?rKQuO+ct}Vd(3jgE>0QH3glV)eio~ik_i|o`nlzm1zjys%jDsP+@aZD zki>j{4us0-U+ed}?iJb(x!;5cW8iN)~9^fJE5(~iwVbAbZY)Kh!awv-rSNtUC zDro82%o?u3F-Bx0Vqco(<6`Q3?4vX(H1Q;Y!2hAn&uZmdh@=&5MPp&4 zYysPVU|{y@7S*)p-%VLLizO2b*D#`gO&Q72^+B9ctS1H0TA;n=9|is?(sR|V(InWh=3xxCtwNY zOWI({dy^I@mIm8EM>KSGO~tq zPGI<(Rzw&=fw8us8GsgK?y|7_ZOkYIg%0sPm($rGVjgnj$|iuxE(VIV_X zK!#HfQ~dR5a@l-GoXhUXkn`0{Md_Yyx~Od16G^m`!OBC?pw#^Ss~*t7k>^@C>YWYK z)niu<9JUQag#}q7*6{6|C1op`zmL4co&R&p{8Ww@tAb(*^@sSgkcf|jr!%gSiy;Dc zo5EaMBO>(F5#)%<1u0-&(|3-QG6hTY@J@{vkZTr&l%8t)#dknAb^m!gYav*1B(eTY zDC;P@^2`X0C!@K*M~(%@6D%(*FT!=XvUGx@8LoHSe_%4+5^`-@w{oaypbfE*f99P#27>-9yW+ z0LIkpv{XuVI|+SibkjZ6r{xLlzE!z{AXw_&CJoYlV&o=(59EqVRfSq~dTTF&RS}f- z=!BsJ16^ftKgBof)=S!Scp^o7Fc(+LZb>p{gd&O8YCIr<)P9 zhnBtI`!*%mTn8;-P}S&a4r_@)MFj8`JF|=v^N0*Uu@^(pYht`jy`*fWET0fnc=dX9x6LwdpeJOtvWF(#zk9Lts{;OUQx1Q1*Vm2@NSl4E<1+)Z`q7qLThB14fag(<@T$eB6QtWz z%rv3nF1^2NQ!i4S1l)fm2Lihir8!m9cQp-jwg~N)=l3}~7&(%egL4r(Kb0`_#7QKg z{_!TZI#xk@hbU699|Q3aUGilUXD=5gwQ_VRTJOtB2t6`V`DO|dcKD;Tjs)N{0*i@j zuRAq5zRH9OjTcL_+s;JrE>5?tWFuf#lQ_wG`d^|H*-1gk!*37+d{GzK&q@`3$JVM9 z%8}PZZi(5{KZCJ+yE>#Z;@3Ahp5Vd{MhAVh(%ToTT2RC8FXUT{XvAVG0sz7}=G;<~ zw)vZZEG8@c&~HGd-sd@xkms4T@ML{B4LQCQusBG#Kk>1!u;h_ z-8AC5Q$l@)l?o?L3=%6gHB`g8c9(t1*I}%gU_J{7$OQU?IO)dlP?XmKB+8=vg6Xerij;eBw+tO2*?k#MUh-Ja;1AJZ*mJ0Tog+#9L{52Mz8KM>fUT} z#yc2@0o!!OpL5UgHxMuStZb}P$a;dBV@G!KlYpvxz!_Oi$0E551@czGb|KQ(za3y48F(-GHgFSkZx;9s)=yy~BJT1cukcw9JX4 z%|Ksf{vLB9Jw;TJSA0Q+YZ8$Xtx5%>t`|282xr-?zq=0^f_S`0Oec+-gSlpc4_`S zbQ@C*6Sgs@X31%!xOL81BQ&-&z0cW3co6Pn9D{!MF7CvOV?HHi%ZZ{WG;6Md;a&Jx)Gc_p9WZt+{4jnvt?;(VysMhAl!psZ$5|A~}GQifK!PdWm_j&@=t;08r z6T9lZ(H;u=31P#b1w8oWUJc$0K=~l4TDVh`PrQc*tY|=)Wt7do-N{whl&e+40`f?+ zBS|`ytHWCC8Ot1ShcfGCu#Gqph&+W1%UkX=jSiSpwse`f9M7H120MR1t=P>L12X(WlvRCk-A z!4&$?4$v(PO{f=k0}C{KuHstV>hEz8iy|%&v_Nh zf!FX4tFbqRx=k}wex%(JL-Z62aMJBFxh5u76jyU*K*m}^*GgRJ?iUhQ5?uS!TQ)U* z-W6(_`)K7T)E4AiylhjE@wDX~8wh=i;veh9C6sZ%fx9Tw?zIh_Guk({18CH`Z5kA& z26-Hrt=+1D{^B=)V(@1=0qws7r4x2GUsv*mkbN@9%msjfugE{GSLtZpRe0y^uP2l< zX%7)KCJnzgb>|&dFYMd!rZz_Wr(h$owtM}*>OF+(L*y*>jB>rCFBL|GXJSf$5?dY5 zURw%-%(1Eq(yEV6;7Vz71-oPw-mWFMjTdJYPLerzn@<3KM1B2sAq4||0t+k7zGQ(8 z0qEK?#zLz2iZ4&+3G3#k#wDzM69?Do!#S0uTNDf5(MCLJKZbJICq4Ap@CFMij-Y3- z_xG{Lo*U$`Ub2H26H~vg8upIr-qz8O`j{+`mgHmsK|C!ZqEz8^r$C@UWYpP!`Y%Ax zlYz)bjS_U$>u7)lSis9#+KWHC7|L zCedAdt9az@o#01`gR2CwMEvv?)8F4)11<96AdMdkNv@paYtgoC58p33ruCs&y$=HF@Cp>AFj3L=W zq~XWe{&L8uC)3Vo^rkT&$b0*)Y4>Q}IjNt?XipL2-b04=lya_wC27@-dVP#4M^+iA zSKWKRW#<0LwQ^mzj|nQRl^K<`!PILw*Eb6mAfTIk!RpLa8!z+yV_uJKy(3yr`zU1 zDUaC~t6jxd1W}B`59E-PcweLgKD{5L8&`^9=bpcfBtxM4SGgo6-OF-n2 z4bMJyXxJgHF+Y$p`3bTP(;4{neTEYufHbh*`Qb!j2_yJ&+Hy2bc!he0CLJoAGV9{q zqBNnA1b@c+&N!HOQdEu<2mVykdA8y1UORuD!5D2@aP_p7kgrOjxb73*qF&SEob&&Y z^zWvyggX3QZdsM4ecbY zHdqAv#l2U=WT*j$;zCMrJ15dngAMvKO^T(Hi=y8=JBCuD(Vd6cp5v^m6`sowBVD4W zl6#I%v<2G}Sd~7m$*O3Ff#jTI6+?!sXVZzuC=C8YEps_P3o1GZ`VsQseYTpVt=oHk z$7{arMDD)ub$ZJrK~<8z91$Q%+4o2khy38Aa%#f!p}yG*D`n=OlO(#JKa%-9=YVJ^ zS81c9(qn4MCV$r4!xU0Q5$pp5ZVJj1pQSA9ALHme(~s^}Hu6orN0FWgTUc-((epvPeT zu;i=K+rvdj3}u0ymh1Hfqq=X;D!S#QQ$MK07n=ny@s-m4wGQ<#?lC@Y%y-LqA=AjXHDzHZ46**9z9i{M_&Y+Rws@~;BfHzc(Q-lXq@ZC%S}Dyt0CO=5_HUhc|O z*0u8KTp9tahClV^T%l43Akj20gX!w3Di#+_YXIKhM?;JnJ#G0`^l^CL5s6y&00Ai( z6wJ@B;d}pHa}jC%o1+kpU4W01``rC#u<-DiFh=_xBI_6^{bBM&)$d!27TryUO=LnT zQm2m^cZ3P2lMo{ihrP1cKW}}LfH9MQfFk3%|5A|kQh6$N4Le72ybxRAOVLZ!*bqie zS~1BQy0!+_B?ARjh@65Wv$FA)2-yAfKmM6cKUU_yOW^;7;JF*~8oU@-vaM?!iG}!6 zeyAz<88g{o)Z=gHHO0A7GzSyFH0&y*k#h-ICOdn)@IbwBpWPN?N2%oz^<{E2!qWF` zUE_j^aGUYF)GxV?UGSs?MNKZxPyHUHIKsN=9(^~j`pFuin!6fed!wX;`gi@~ROB7Y zy1E;ByD!#T^gRE*S@)7d1efRaW|199fzP`HNP4k%`nW$8Z-#{}M6nOoJ@;m-y4HHN zrl^c@o@Sh)D{do}9UzCm3{rnzb7;Z{r9Kmh^iEmv>nHLZ#n{rLHS=>jUHyZ5Lrf&` z;$AT%OJYQ>(|Vn#1SkJi#{}98C&*ZSNt^mkXOrq=bSjH6eoJNLV}GF3bEKTWkde64zM&*vb=DvlLt2Smcz-IumEI}} z(5yFGhsXvRsZ(_pbUSoe|JV~atP_$03%sF?A+Fn?>QQm4s@bM!bv*9-aSr5@>ypW% z-jw05RTK@S@y+CtVm7^;_W_}5<&V3!dUD+>$tSfEa?ewbdTKr-@G#F?Fp(H0oqbWI zReDN*G%SumS|v9AS+^|~n-QOl8kx-<*NA1r#PO_rSw)g%1FY$^$D3zq9Q|OO^fSRP79nh+>qc{9rfSkHADeet84)0sj-%-Zo1?+0sP@SQkB+0-sSz zw`>l89plu3l==6C?)EjAO2HBFgV2{_aK0f zs$3aw1p|icefN8t-nhJU8qEh4^adEjlXW)KqaJ7p*x`AC$)#HV2S=B~RXZrguDv=6 zpx#+$#e@aFh z23N6{i*qqIcy_6rvMgwD>vQErtO6!#=WfoMLJRa;KE`WUxvEk}3LMkj`;OzZ9q2@5Uc|Cj?NiRN!x}5l) zq;;%L*l1LoXc4xRiSBFI9?GHY7%8J6H2AlHOs6M(Bq@ zyn$D`Z$Vpa3p?yQl!Ss}eGaTxn6c4fsjcXAK)@ZHSJMo8l7AzevHb*?{o8e=UCjJQ z8qcAkszRhul;fY!4{arp)|{FDLH!w`Eo&14_PRjOp5J)xJRE)8Vd>E7q;=C01s_fe zMRF)<5 zC_Uw9ASkWx{D#jpjR93c?{%?(=qtfOb@*t z)|Lrx0;R2n+g+nF1n>Iz$H>(!=>bzb`GG(2Dq7BITOFQ(pA-3BA`cR$Mk&Zc5 z-lSWnMKxzO(wz0d^oiBD3n0_jn3iu+ul%yEnq|o$jZH-ywQYnqRSOf|n1j@x5>?(Q ze0K=o_*JwE@eP4$Z+D&Z?%Kj3&w%7BYL zCX229dqlln>YwXYdxkNfRRjI0RlurT26+4cRTE&(40-_L4hxA|nY4UAq}^V+20$Rb zVeSu?5_RA6m+>ip{<|E&TW(;z$Y*iy>j0~(5?URV0(~LnKaRQqB%%!?tE(U?Ae*A7 z`GMWg4*pk-eGzdKS6V7|Qt-{K`l`f4%xcA*iOv-fIF*|ia*H5dG}wK$5&+0F40fq& zjW!D1J~Z!2p{ZBoYN z&cOyVgtfF@mU&gO4Zk4fuql@>3V0h!nk%g+d$md4V<%G%^pyD|vO)t#;Rz`&iIDYf z^O2PuD#gSjxoUuKjb$JH@3IYHQu_C^oBefIf4t_}SAwGa_fAS1%pK&lvo{~D(x7Df zO4WuZc1UBSZ|JssyUBuYU%;T0sB5uJ`}HiZan{#0mAzkGR11sOTPM@zM1vEJb?=we z@GZqn+7W3n8I&NMPdldqT~qW8fjdv_i3GzQYJ|gjnC#o>6jv@TF~{+T;pve(*+rXG zICa$p_qWyn>l(OZqlMqbLl-9gD0x8r#7S_-$C5AmErGxF8xK|%f*A-q@m!R~lsrn( z3F$WSA?z9V#FV&&zI7XP9VrPEv^kwAg8hG~M5Tk)UWuiaK0Q=fe69w|B^-#e0-uX6 zD;pR@A6Zvwp9J7Q8_LHS*BjWfC?`lX##s5cLTgrmtB0d-`N>cd8OiH>GnKN*&%rd4 zC=_S{2+8oaH&Es>V6!K8y3uFweCG!Czf75^+$|pjha!4kQvJ3kw$zGZCZ^Q#XSM&* z-|%pI{OV+5Wz_?jVEdU2xxt|~u;V1%+eop|k+x-J;)Lz;RY{UE)?pK!qPmIamCv3la&3A0kU3{5n?_`ODyN0WG|aV8uOdSq^iHD` zU*%RJYV4^YQ>}N`bxwUbPI#Z%GDPX`=2a?CKGcdAah0&Ns8Oey;OhNH_P|JfW)^G z2Y|1|`KO|_7DOe~;xj1~+*h7p&R1;33&qEs-6 z)zkz+5#77L7-~9DBiAH|oCT%}b$v34hnJTYqFHt>rtngp8zK6ZZOZ6Y(9CBbx4Re) z0$F7*14>4cg_I5v_@Cf{-2;7Dh!ZgTr7 zZuw8yHAz+2wZcyr-4AfsIgL3{gTI}Wza9Ua7cjbg4s+RHd=Kio{<{z_=x%DwIB?*S zX+M^AI;_fwVAYr)H&@|9vdE1a-GZL(F&H|Wi^r-{m2IzPq~AO&-%3U+SvMJ>J7syp z?|%#yYhJQae#{DH`-Z-XzJGIo1NN_r%dAFSv+rgOV1yUW?K5++ANn4ML5O3Lfvbtq zFSxk21=D$&qrIStNP-S7`=y`Fh*+IpR&(D8?^3dnW$L-=tsqU*XqnEFp7k0@1@#@b z@^E`Mc^yxwGMG&>PKK~XI_Bc?Ib!?>*|q-o@Zu`_8dR-6$0I!nmEBMAWo(~Tp2N7K zX1o?4vW#8j-(sd&h`cKr5a%FyQp!nfq2t5bsC&c*#*?R@98A>Z7>hII#R?MFcQPR@ z?!_pNEZvzK1%v|W`W(z$z4Vj&N=6PbFD{kJVQcX8hc%iInbHV!e}?7Ws@+W;nFH=S z$$sg|9jP&B1k>K6hr}4SaeiBt$V9<)FPYVoC@CBjTqv|@EHiI_;bPQ64VK!VpMPO; z5dF(Oq1Z%+uBe(~sN=z(C1yEyG0-!!+&84Bm|q}`7WH&?%Sys`hjz{<`o;_*p}J?M~hd&l@=wsmMQ=tejwo{6YHD+f!aBD3;Mh?5`F z|D2Li0P3zP4OeE*C5D*TT7X4OdU4l@tS2~@cy)Dd0BxX9#MI&EH5#HpSy;`IOUy7(3KfF5VMKpqy>r0 z>6Tpg)yCE~2gwc+;XWkX;v$uP`R+pZ{sAN}qnHK)XdAp`kgi>u<~T6d7;zIhd`7$%f-l2E)K zFlq}$ITA2zOk&$iLG#EGPzl~81K5gfzGk7VsCC^WTn{cUxxLIJz1I;>eN@k(Z40@x zDdN^kGvwPDxmX}CgdXntQ?i_j5U<5}6DCbijD+?wd8>qC|Fb6gpO%vjzc83uP8+(J zjeZ}nQE+6BYM5GKZJq>tB~{i#8)8zt`;UZ~Odn4_dg%a_d|cw#*=ff?W!sq(A(t(+ zwQr7SUj~Y>VPzD&tH?wGSlNuQHSz!;Y@c|7d4W1E3cHhQQF!16edWRYdF~bqlf(|u z9qcnZD-T2Y*9NhqyDYWdcS+N-bkPbDdpJ}5KR-XWyB~1P(`TY#rH^oNExh9Qrm`- zcO^XnvQUz5V+p-U?3*r$SSVX=lqV4j*?g+QUh$SOC4lrCCpwd&Tyr_VTz$s~_nw_n zsx8?&@-1n8i*q@ZcRFqo-OM%GUfI3z4{=%smoIYfR3>vmiLfvIyYGxL#9Rf2MQwKd zfoE}fF#fcu9z_tL0)~Pwk<+it-5T-GfD(w+StT^Bk%YF7^6Z245yV5WIqD}YP8`qO zjR;IfDTF8*HgRwG1L#Fng|ycBSptQsXxFg5pEpQ&Z$P6)@j0A4GDO<-faNbQ&0P+R zHzd!4Is-=p%SNaxE!>b9K5o(?2%)LiLhk|A%1IWtYZ!h@MvIbb@)#!?k45I{7MSo< zxUpG6?|_ura}uaGU!AhR%Z$I_PlEe?L_a-Qyjl2p)aeOhNXyD~v0FHUq6th&@(5z>FI-^l)J$s`bzM(2cP^7CrCE{u^~-*D(As?l8-rBc>n?ugW% zGUsd+%YMtc00v>poaY)-iFM6LjMzj$%Ouerkk1hXIC1qA-+e(D3CGH+PIBRNUx}@{ zBo{xPCMt~Xvl;)1biiffUniQ8)W$F>(|7`|0XvvoVpq4EYukEXLA@mQO_Mv|tJi2L z+P4Oyw*kn?rMImmSAlq;u_ z#`nULNs$p<#tzMzT?Lyg`-&#^^DmvlACm))NCkP0%HMaGCv?j6N$(yAM(5JSwzpG* z$x`dM3|nC28_5*BzGFs~E#N&wWK{_q=2Z9((w64n*GVei@1bFCqUPc`M(J?3R8@J9Fy)N;j&YLTBbd+0QxaXncYjLwrNcGkyI zB}3fZmAup+!Z8wr8pL8L*LS46<_k0Y-=uGCeUbUbwbU0oQ$9|MEl1bg5{PgqrHUq6 zR^sVGdM(s^8VLYOs0nIrVAmlPqx|UZP=+nez9kBf^7HT2ABBpR=o8f}66k*V`#CQv zed{Q=khZ~5xy*d0tJ#7CZ^NuA_em9J&Gg`qGGZWgD^$USwyN5gro>2@{k&{ysRtEcmkk@wiE~{BF0#5%Mnha6LBc(>cwwN`Gvp{eADL*pPG1XybaWg|=q~W9o2?M( z%4$rEHbBZJV|EoLx5}o}3yxKOV>+n4!Y(KxX@Rqa757WBxQpo`RI_JybsN@k2&quHQLPLTJ%E6-+*a&FSF8E$}XvHr5+$uZFj?!t$i z%HCiFO>GeSpJx)rlpp9gSrLa0gF6W-H(c_8E~PcoJt(I@$U%ciewQj@jpk~37;-`41Uv+@`k0QrE{5KnkeKU%i#A#s4W-MMUz_jx zaa}oMu$qm|l6#Axd1rbkeJ!-fVwt6OL2}?$O;0)zP z9_@>SFcnq4z5XmqFcry5GpGpq4B=? zsF~xh%!bgGhPvdXELO$+KhiO@b0MewTRlRKT3_$I)sSAg$s~wc2PE=b)Dwg*52D9G zG02Bp^zJEkkMhG$Y|po{G0dP!pp{_+Y0mFmn@cL9d!zmMlMq=$W6ZiWON{Wjw%&*F z64bP)pd>D=U$AqA*{z`Cu-u~cHLDfA9qa?a2z~n34ZCPNEJyW(Yl9;r-CSMAF&+Hw zp3an_1D>2|w`h6;#WLYvRBSdB^>Z78N3omMID=7XBQY15yB5i85%Wg@psbs+$Ln7s z7@JKtd!#DmNC3Sy=TXorAY9s74y&O(=gq#9-&r8Afvgp~ebCl%U?=Vym5`~N*)SXz zYE_^HdHdYw{6^9F54%dUHR|HRuoCB)$mcz;O`p=DgbD8`yTX87b1ea(bFTDtYktEJ zAGu+!UQ4Qy#FXg}ZK4>e+eN=z4Qka>jumd%z`4UP0FC&gX-hnV~g;A8&TDD4JOpv6_+D+4MGT8}yyXs(pWX zJZZqQexI$?Y@ltSM7<@VZI(SCK;>FvnaFOQ^bSa~(#q&~D;Fme|Ei2Aqca~nbN%|D zTr-YFHk9E=RGy##lZBHqrIlb;_QJ4NjjM>@$T)5h;AidV(*iQajyNm*W;m_d%TX%0(%arJfI^>g9QI?y!9a9^og)vI4@Nok)+d`mH)Mj?9 zp}V#|#4-acft(R(2T-OB{LEx#g{0aT9m{J#DtY@9=acsvz&Bad?7~A|CizUQFtm$Q zd%{j|6v9wM$cT@6@L!sFA+iY`NkKiy{k=Of%0_58P|epAod0sLWwG?JMSNK1jx)w~_(yJX zitNNdseId+vovOd0?qYii}jq!=_=1e>^7%TU5)#n!gSlx7^;d-Pmy)}=NtXKD3Ow& zmS>-nE=b3CSl{r8PBB`AIQ;cjRtwXNKw2WSVg=&?goM1c9?T2E8$`W%zB0R*T|n|h zwPv+g0R}$+jN$$F#8${K0gK-|B^;c?PbSl8kf=678Mw2xkGtn?;X*OGfjOkwL zb$rlO_&BfYS-c?+XhB5yvZ9HW>(ULB0;n9G`HSF zO!1r^1dS8mt6)mP%+YHz-@K}u(wvw-k~hK1{&FDPa1qF?K=(VkztiI zbb-l~M#!Gb8RDLEkw_p-EA}<~5A-VYoE4T2`3|bVXtgnSe{y$wMbH2LG?``ahRO0O zDup=&76-lDU8-SGht4ZWQDw;ceO7TzTu7a9XL)8!>C(-EfXQWMBz_h-tys&M7b>+= zRIEXp8XP-*Gvw}c+?_xwTmtK^LVuM7tGMp#Pz;YYN`SB-~9ZY|QdaXn~ZYPx_wKT~{ICu31;`+O}QG z=)wuvy$75Xsh2xP^u8VbAkwf=g<{HZ*~zWTaH^TfnJ^wgm+2nwr91 zQg`dLq`Q>mfCgkA_0LOfycDdHB`wJbZN+~UGy(3|DN$mbmD25Ubi#+k`@>_AUHvS? zE4K+6u8I-~9HZ6#eN2uHz;cb>_q!|U?B(8JiRQRTJ7b9f{->hsyOC z=`fm8?@}J4T6WpDgZNVrPKMAOc3KqqIzm?Uu+Y|t$^Gj!VHy~b5k8iSN>4DYYG#K? z`4s1kfhC(8BNp*ucr3scL$%X;EvROnt5bVrIGay4$JywYmk~*AIe#ixe{*f}+V)Y} zML@oOjK41=1RRo1+ZP@>bM?{zH##)!^>~OJsGu{D?fy*;k}aIR@-e9?;6|w}AJ^z) z;6o6zkZ$7o;Yeh=lynj!J*hXRUTUSH`sU&yI> zDELSsbsAEDQkoslp*s#r<$YuIzWpVV$pkdY?Ui#j?>6MiBV*tT#%zxL=N2B*tD97Q zW9t!|&b8nEkjm(cLhR$cnX|01fwP--rPwG-5nZ-)kZ&c`Ih8FwC=wya3NSvL(6px{ zja#jQ8ckaH_2@3)9+O-&vTe~s=ABB5=I~c2D&85x#qBxIg8)u)i^K8a&LwYX)s}f} zet_jvi%Dz{v4%&*|kM1 zC8MWmA6IHuSn2QBFGMUS7jfG=F1=T7z_J|bIqk1I5nZ+C2}D3?_7&Cw>nQz*=8&R7 z1I3dP_@}N6s&b6=X*I|rs^oUr)w^ekSlp{QvJITDswdY~d6k{UYi*F9Ugj=9z9q-j zCpicKk4j01B0xr_TgJT-^H8i2gI+8zEJY@|r%uWRznz-wS(h-Q(=T#~^I62wM{s7U zdHWb(X`Gt~0#lXoX$7~m^ZAnxo%~$e5K#$7v*lPSv zd`?E<1OAH9<*wxxE3VCe+yjNDLXWzPn6}2O)H%C+H!GIDFH82=`JFjA^}$d=H(FkY zZQr>4qzCd~>PC3WN?qxqMxBb%1aaFdt^n&dr5lxp)=-(0^L<|PRCe%wco)q*VJayu zjj+K)uC}&s_PFQ^gPJY#-`Vm)JcVh=90HWS*SrB&jt~3sI@?k$oRdJ%*?LXKt(l=v zJoEbxj=1JsN^^6&>$Oyavf{s{evspUlZLAQgr-}n9%FeK-AUFx0-GDyyRM|`dFmKM z@LUG>*prPx$K4Jq3WTaak~UC9yBnQ?+5ub6YQbqM(2mh7q&mdFpb0K?%xx#;m?$Nc zFT)N&V}@#~6sf0KlEun87b1bAWTzv>=fT|QKj3TsOv{@IM}0fre~xhyey^V8tR79X ze1kS%1F@sP5hi-Lk0szn=>0*^bk~saoMK7eX*wmSkII^nENl%xcXZS z97dm$(~vt@EOUIGjmLrp(HI{VK^rTm(C`DN-Bj6X#CGLS z%4}C(83Jha{chw$WU-G_rfvWV4A+ahw;<2>dZ{B!Rj1wSRib#g6yoAwZ1cI=fh29+ zol^j2I!rPFeW&*lW1~^kPLzJ==#mY(&_bjW+*N6=EAf@_bHnBI(}WB%o9yx#<8)%J zrAeFM^rMlZ?4LHH8bbbMLp0Q*H}yj*7)K6+;d7XOoL6tE<$WJppny`A z4+ucQwcr0QXv)lOFtj(0KNZN9OT5(OJcdH5`R zO*p?K*Wz1#dMgxx z$ug7yut`VBH92y!;+h@D3FT)1(};jrAk6K%rOl33v((0_kVw6k#wO~m+*{>m!lUZyO2fh%6^q2R8bANo88jzr-bCOxc+D-g$AVbtqqZUDviLL12$#S#Ph z$YAV3us66zs;zXZ5$&=7(NeDAqH@tX76%7bj*Fv7?x9diG|sQzHsoQW1}I{XA^;8R z$ir~YMs`{~1Syo2aUvzdQr{!=In@#%W+FkG`s%Q>8dv{Qt)Hc^Yj|wi%~_F5!9RmY zEzFvrS#aZ()RH>H3(+cN*1Mh!PEi_$Z%}c)hQqJ%Mo%>y zG#><%V*MLKvbUlL02%wOY-wmGE59%kIf1lrCDDeHuag8e!xT07Ed{wsVWCc> zP>U)a^OQ`xGoj#$xzic>%dXWL(F^`v+;>q5la`yHF%XvKeyIsO{`BY|Z9{};f&3DN zF1U8TZTU9yn8pl^#ug<^&a@0DPpsj=u=a+eAG}FN3qg#bebZC%m*yd{P_SMIYl@*V z4vYWUSak<%mQ-qs>*%x|%CZchsBz$mAtuAT@*CX`v4P$*lpRfAfm~G%IPOe@FcWf7 zSUEDODM11L@hG2A?64PLJ*vfm;mlU6;gQK+ON5l}oN8>6r2!XAsy4Jl=zSs_o;|rO z;rS4|G9E_jZ}Ayw>&z@RiZtlgT+r1#^1&CI$wiMjmusqNK(P;tVe}M%X}~2Fnj9!_ zVE!oRe@PMP|F6`JgN#6ITqBc!L>F|CII5qqdq*Ze?;*V&nL*LGMG!7U?8A$@aAHQK zmi_pA=^7yeFMji-L5c<7Fh0bW8!jVf(8Tb&dmWw5yT?rx8FMszk+LnPR9gr7qk1L$ zqS`JuJnTm+&={dr-(@a?{4Dgn_zNk}xm-|c!Z3U9d6yCNZ~_j&lW&u4Yg4?1R~WE1 zXtr{yH?qsef=3O$ob%04%?K#T;0t&_$`#DG{4fGdx+m@BN;=P%S=oo&{F<$lrgd0J z4-$zZFGCp7)pCv@6$l{V=P#F6`ao%-Pw?!6%=*H&g5yM`NWmOc#zT*QaSU70 zOO>StLG>PFq`&R?Uw{cAZj`)kc$C-vr-uhRYb4TsG>>hz0#SCAuOpv@FdFWLEAZ|R zbb26#72u&iq6n);IVeyNLfhmYhj)#p6JXLYd_qV$=%{#w(RS^x0 zQDyBMp)eR`BwkN120f->qLC5VB$JXURsuykJSa-njml`8*HZNqGDS0xJ^zdIoKg1- z;iAz8f&BA29zwvaU(hy)*P#YFbxCIq(9RXL*CS4gbcu3(s-qi#G{y}Y*NZ~3HwylC z6Gcb%+qRipEMUyKw|+s#D9`}RQ#JtD z@hR_VxW~mZrI%MX!y|xBKc2SN)2GH;Q0ZwJh+|CH5 znGeg$d_f3A^|$Ye)#tj@l(qukSayT4w3K*DtOnuBKx=xT!h>xmST$kWTAXcOP*BLH zn=i!fvNS&0`5^e4KEyjnD{zo39&sQ>Zt!B^o0PA6g`p#`u_1*r<3{yu+W$U?3gpDBvY8e3( z`&}B(>93{Fj-zMuv}WI|w1v&di%2yNjo8s162I#Cs_&GPLJq3!qAh7olQh2$RJk%^^+dr|-+kRYCm7b`+8&|) z4C%q_<6Ysy%&j~y`6dH}{4{>c(g#*KHOBSP$GJ0)39CNZ*ce8BbzFAvjfBO^JmX!8 z<#BtH;jJlC@n!N#Yl~83&_;25jF#aIyV$hnw|ptt3?^;e2+WzQKQ_!VqS6WBZ`a@g zfeH}Cxj7pQwE*0rMvelq5!uAjy4T3TLS5hEJ{VKdaWwex&f|1UlKUVeYdgByvRRJv zieIOvgi)LG$S|7_OyQgdQRb41@SW&(800f!=EUz8ow_x_{r_o*uO7wx7I{C%BhE%B zPZ`4D@5_XZZQ7omQg&d`_aDUERp+!le~i886wa8Hh8BeRVYD=2>tC%T#h1hC{K2ue zoci=s6q0rW-TML0?_f}pJrv~P33}}xACae-_=_kr*@?a_d2$Q%YRHndhT)O$X$viP ze%z!o$5@R1vP=hzC}2ZTt-5#1 zzRT>K%DN2`rbNZiTn;&S(>C?Xk=ZcWqaUT3iXL#dTKl-@eDpI*2J}e8Isa-lE~my@ za7%sT;&c|`^g>u0{K{RSQQx>Ef&*>a6SwXXjBntFSNebZ^-C1jK>6FbSM2oE0Pt1H z;x4u`#;7iY22>5?@OjBJDw^a$x&Dk>f(O1s_bpTw_GZQ$C`}&-2l^{M-6C_TnQAP> z(mkF`K&6@@JhfmtEc*@BCc*as7)ENFC`K#lsRCWj9}5L}#`HB5zgtGpDUyPf4bgIw z*6f*%pRXc16=jF=A>BNWgoOZ58iUdWx{m3;PFfdBN+3F_LMuq@zbN5Mg?dSDm zFxR(OM0nH*e^!Uw}{EAa5?^;k~c|v_u=9c`qi0 zk@A$#e6sgSpUWw31vCDTnsiogDi1?dTi0XS<(rY|53w;*4}@3Uj>SL4!d)|BT5$DY z)!a`P$<~q~Y;AB^773~uz-x7Z{cK2|W3?p?jkX|#_{&k4%V?+#u}DuQR$7eKsIQa- z?2fr{v7aU3QgZd^T067|pk=)m0ftRMrFBB^h+0COVoZRd(agw2mRc{yN;`$TTqQq)=lpn~R&f zAwIX1u%O$-jRH{q`y@t3t;#en_bU|koVsbaH`FtTWm6Qfn+jxMN=4J zL^~na4s!)}x5KTJR;ke@i*WL~T1hZPz%`*BF(M>DoLTo2(4KNTe-tIw{l6C*pk%F* z2P`&4+WvXn6Z-=3FsJrYmk7v0i(sW)PG9&zhVa1IdI_p0zdT;_gozRlke2!&%kz~s zFLNn>j4#VeWL8T- zHM39&Lckqvl|!*<0WBoznUS|j3iB0OP|Q$Lo`|NH?=FIRSw(f|7WT}hV!tB(tRzZk ziBWiFb9AIC3kG?ATk&bj2EG^{qc-fkN{BXv1FwQq?e|m5mi^=gpE~Cm{A!{+GRz;CRQj4mxDDP#&BTZ9NPp1_igSeRQxmL1?O=Ur7iJ0)Lne@1e?AW%L~oMJL1uWb;*Y`EwI1ugxvj#zQ8PA2Vjc- z6aw*^=Uy`h2h7@h1Wg6@7u?5AXd5xGx5~PX$MXt`oiMZbf+Ei%j2v`jm%adOZc5Va zLk#9qjRC_l%$>62>U7}|B{*9VnXL~Z zf-=w1qqv0;n=iEF1uZI)>A&>i+|7VUjRe|KL=v;z`qu{iR!4Mw|9zmiwt|S*q>?z? zwx;8n50^~IVo~S9UncZ9A2(23l_1>^)aT3I=rWX1fgTiiF_k4KfEf?I>E@Ze=20omF7XM5tmU0k`rX5aL@ODQZoATl&~r zb-ra-^$;LS*@1J}4eg%##kuR9Lo$;d6G<&6j&%#;MbCzu9WfZ(t)M0S^Z~!XDX-Kb zcvX=$J#hFk9I4w)%+7onu^xOJUC&cIlK0Jew_(4+e%_xH&kR^-;3i8l8* zm$wqTCm&+=Ml;mFHu>h0OpTQhzar_PeW-=YX^i@IO9-i%*{IGr6_C#ykS$|wAQG;T z*e3PD2>e57o@f|LDI^%coa|2CdhMNCgxXz5MlkPXmA%0<)grc{%*nLM&5SLMfGT1_ zUIahIiigE1ZbJiQ$xPd6$NWw+*n}1NIkt3RtJHj5Z>AA;5K;B7o9XjXE8oLDTKQ-} z1@C+zkH9Ke`tn25d2(2oFM07#h(Y;0%+TWp&|*?reGgciP%_>%x`(f=R`TBW>%X`f zjKxo}V>o*j{_j)xs<@IIg27B7^)oVp-?;2yB&k+WoTx%-IJf?oz4QF>gZAeb=$V>^ zeZC<|HRB*)*}6j>q;ha8H@Co5ou;z}UXp}_EYdfdo@i-Fv&8G$Mf;KGI9h=^BFCs- zM7*V{GqMOf+NqlyY&dMTkdqF34R?{ZkCDmK3mu)kr!!5HJ%Z#*98hedC`B|$_HFN| ztIOI71*)JR@^?7yIBfTNjNx_HvAgbR(+hjQBKIL5gVu{iXR$$0V(oH3R>8Z@S_5Mn4Z5FVA=Kx zXL8Q~Ux^(Ob9do_`-EZA98jq!q5iQt(y(Zr+u5umm<8BjS$+H~ABCl4*+ChEjfA9# z1Y5!Xb#OAXuUZB+TC4uHn?PhV%U2hU=i=~NyE;k5MJ`D#vI0+w)9CJBGsOJ&~Xt-?6f(EDWij_)Q ze&koxjc7F4F774KAaR*>)@X4ZEc%Ko7fd!?q?-6Er7+>GI1n%)T?G{<+D<#w;!rTl~RAD^0RqI5aTac81yZP4^sxg;{#Dqa+V8@=(|3TXuX}_I`!68AF-_h z^kidOH+_!w?KUb)eU5oexg!YsXidO5#0-_Lk{EhZX~!STS1N3MDSt>1)M(XS))uz* zKC9UhSR3>U#hKp-WZE$bU;tWcA(S+r;0mF(#dWyjManRA{rRm;~&y^cpoO;&y zBYWZ7@qp#ubw{|8%P_<;-pNr-z?AwOfT$|s9Z~XH97Jj zScTfjAnZ9AsQ{ECUmZ*Sp2Sx%T_^NXy}&&$6ZV>}5&#Z12;t8xrrfsu!fp^5#J&t$ zC@yUe%kw^<70%ff3AEgW0vmU94V5%yGtczbr!gqkL`z#`uyG)|09U8?pjI-m#O-z; zJt5fk8k{H#tShf=|Ej7SOmo?PmVIzCpHZz-uHhT6C05$-QWn+C9~KF(nHfLL0UF(l z=Vh&6umm(Qtcr)hH@yJj9FKl}#d>1;4-35B7u%&qwtT=|9l@6C!(U0bsU?=zd-+o$ z#NK>`rOOIW-hAzfy@Yg>g^4Vy1m8r#!AG1w#mW8nB}T<;&N z=qiQ#S(;ra4TPo2!5K&J=-MqNYxN$-|0Yr_qri42&k|_~1(>!qeKDa0Ixc(m^|HSL z?+q-V^8(NeYBPoqSbe-#luPprKequw{h(06m_dbe|M*&NbE&vo<#XQ7auR6L6#35& zsaBJ(<=c3EKKmg>V%iYb_^Oo{F&PDSYg?2WtqLMtJLCMvQw^Ff7qaO-%c@|z>fVDk zufk>MkOJjZlj-L^)H)et?QPg}zA{LF2ZVJgkgk{xQ{(X8HV#D)Lcrxp7#yN(c3aiU zSg+NyOmkS&ndhj@bG>F^vWI>^Yuoa6Z9@U*&E>TRfX8BW+$namZ6?!ucXgyj6o|^v zN>eA3QeGTo&}WkK1j1csxYajKpeonxxj42~!Xh5|@N9vjfWmCq%2u%QB&<~mx)jQ4 zMPsKa^ZCrj5#GKn8RuDCD?cs)SUw?_nxGG6C|mta&>E?F(ao&H_dlza*Xd|du@}x^ z3^4YQ(e5lbWTe^y!^jp5D(XRD^Xl*;A%sOPtsUtu<#DdXeBh<3DKO&r>I@_(Gl{8HfGo{6>aHUdfnDX~ZMAn( zF=zO%pvTVEJgsl>Dw30+gaqiEcOzE^IjSL%Bv!wTO%MZEb0I4QgY6Nf;>IJlKB6J) zEyvby<<;$v)0h~jq1eCWAwWexM)Tv6E=nKi4eS<>g+;UwvR$A`G6*3^6SPNE<kN) z()oG`T`UeVDSvbVb4boK%Zy+H(9UFN`AwWL0Ppaf{_q%{;YSK%dWLuY@a8DT!utv; zQPSS1a~S89?CcYtG0HE)wet$cbBZRD2Sx8{70Y+{NW0BZRC$-`cQJy81ltoYPsr>Ov^}sjuf?T>I`Qb;fa8`M%Cs`a%iA!i zgjvVdosrw^rw&~TMii*Cy5!ArW44HH29@)D8x(w1?s%C0Bjo~EnF3DI6$x^UWxOB4 zf7Dfa?0N6GgQO{96cNFQC>D4|tI|+zD8raAf3k*kPG_2DH6QY zU}ctJ?3TA1+`x23qt3@LafqT$;I+e$G`ii9b3v7)&_5Xy83gb~vLRM)>(F7jl(~cg z6U7dM<+ zlwNIx%jFGq^}@%!#iwz_=#O85xcPxpB3bRr`o5wAHUV-&nJ19(Aln)BhPdgz{a1K7 zKoQF_FdI-4*=F9XEMha^M?W{1yJS^H^cD@6rr#aI8tkO#zY!fUufu8xDB=9pHZS-k zxJTx}xE;{6fz8WZ<6MG?_#C>pdQSfTBhkb~I+S;CZN{|RO;;J9F{k--wBTq*p;Mm~jpil1X{0~#$`MD~kOOK(P9`9L$qX0T0MSTchSx-Qcn`gz)@t^y1KbtIeNv>Q)kMMkUPnNRM|B`0}x6O24q` zqUT(0`jK*bz6m#u!o;ufCMXQbZ%$uH4Ps^E|K<7)#DjQawM%qv<8Kudt^m z0{-F$WHmur3bQk?L`3W z^-$hQC{M(L6&czlXcZ=kQp{NPe zF$vr48~^k~L;v}LnQ2qt)8xhfa-{e{aoSCuY4mDiU_=R8Vr#_N#Y1HP>A_k28t|Le z7DA@SrBzGS%Mp(O?ZpBD;F>r?M~Tn{k@8ja2~e8iBaXKe_=|#sOGn2XNMjZU5uyM^ zC^G)0u3A}giZFpr**+7xd+z^CsP|Wst2^9bob*sK=mL|yOZ>4uVEGe$0*vNgRx$XjQ3 zUNDo+2tv*_d>HAzjeX9xtOxZAc+_(|+f$Q@)2Z~vnn9Sb=Y%jf=;;3R^3LaE4rc8e^|#c znVHxYm`Q9Nr>=3X8&9~7)8Nt6`7VPQ9e2~=cvw9nL**EW7>v$ucFT1V$dj3_>W^Kq z23*t&)pp~E$4T5t4NCoSOCr27b-KRvEKsu=zIRr(?VaCZEUuhqM^@2?zCro}De(aK zu4X-|YXp;QV9l$hn_^WNu4i}CadqPmZ#rO-w{F+JSTk-KSh5b08!t(rok#cyuL>25 z!0UHe6xbjSI;{}P>S$~)rTemU*Gt27XC8(1hRxmYHpf_Xyw4VD6Azu?I%RpZUhj(tlKZk)qw?OLL2Hr;o!$ zgb+v5Hy(a8zm`T`cO|wHjL>73#mIH^|6G;)pZ$=Xe*x z4m(~+R;NW9M`lx3$=&%b&dZaQJA`;H6*VU2#A`=P|K<&720OF>*v#S0B1hxXi{GQr zCLzEO$CPvklL4k#G@!sttc_ThzTA}QM*hq2_Q*a1xT#Mzs76};0>Ggvr|iq72y*Enw~{qOU}ulIv$rCXFp37+ z@PDbzy2dkQxYrRi3;vLo9Cij;>s-6!+I$$tHX@ajutMIc!*6JkTLoHI9cn5u@UBQz zZNf}eLx~#VJ2F`$HQwfz6P2%yJpZCss41i9ML{e|#oqHjwqG@w!;$ z)w6ZW-jnbhtAu3o#E49Mi)ehyiOdYI=qtE;8dE%#@X`n&>g?}3>#57llrjoLo4zIo z`6==us!wsUze+NCL~v2o+XU4(3-NF}JU_$tKdmx#7q{a^I3aj$aTsM=o;{a2t`S`7#0A()T>`}K zt6P`4=sHaHG@cU+?7Xu~nG#K9Kjz;hCWCOt;3A{ck_Mywz(yZ*pmp-$RJmUpP57RV zg@&&r(l?M0I{u)c%MXM6tmj1DLI=a<&pXXR%SpHVcGQEY1;)U9QJmhpu)4 z1cGxuet}-Gf@Py-KS$^+U!oE&BImmI4EU7&bUQ6~%2&F-a-lnfmMYJUgB-h--ylHf+4a?brhb`TTJkC_Au z`rc9#(^S@pZDLR!?-56R=27P@XF)|)<2AC7#|No{~5MDK(KYf@QuQ(Imj=AW3kx7ZY z4J=4Yg0Wqs8!3`UXppc1G z9j4-LB#wWQqf~t;2DAiUwENJ__}iP$eDy(5)PrpWIT?x|9@cHo5l4mcoS+J>I4hpn z4E=6+&1jlsE%>uGAMI4j7fofF93uNiUC(QBXpHQ1j67^)}T~HR>4|Pc; z1*0b}?T_|)g#j&I3q!WZM3!kDqI;q9KjByZIxhnBj;h>NtnEAxdtWj|w15ZeN-&8% zu1IZR!wd8mmt-b5Hn1y+@!z@GUADOad9r@gLsnh#n>_2RqEL)jH!FQWWmQ;pmWCpC z6Y8x88m>rI1OT!HxM|q3ixnhS`zX%E8jxvMwq1=pLDt*?B>NSs?>n!p8L!aqV z9Njen;Tq?r;M`4BBoEqCm+&@K)rfD9U&4M#4q9HShV;Mo<8Vm?IR|R?r>PtaB4ac= zzrLkWZ`2dA2>l%wx-dTyj_wAK*t%#udKb)*%+N;OWQJABr=o{e3-ePyFcbIGcN#0o zn9wxahA>HhuffiiL+7{ZoE`e%n3W(uEqZ`14VR#~WoE6cDugOf%G@^42k7cbdmF6_ zHwYnMv@J)4Fs~D#P>vQHIylEI$EcekbigBT3M>o6!jIRo*CqMVc8zg0(ys+yysPF& zU4Osy;jV9xdLO?%{r4E!RFojHc-5aG=fOa!-!juVJ+e`N#!Q;amD4U{V|R;czZOPj zsiBPN_H8CnTJ{TrY_OXwHayvI4udLVFNO?@m@o(^LhDSm+NE+K5kRrb;kGR+6yXhS z?)f#?gL)*8#FTYb_3K61^zBe$n!urA7ajN#b1>&HBJaz+mP|2@)3nu zIHKp*>zHCtgLTDttMTKd^$KcE1O9F~5|pE-+q!DOj;VIsEE@dU(Om$^e zX7OUJLiueB!wn$DO4#Rjwt(~N#ejs5=I6uNOcLR+YPjNDQJeq5q#BZR0vsT0p}=Rr zBqw=#TKzRn-MJ#V#>%2>v!E=;I7CW>+Chq_@KLF2{6;HF^j&pK9uh-BcJpNP^L#<# zc0K`w+_d%ZzqB;~aZ1GXS0#YoUS(*rjiK1DRLI@|KICMF{GG~}QO>h(I4I4CRhcER z%`SVnHP(p^C)k%6Ng64y-REmu*w}=)(%TTJ8BR*4Q}PELaijmSyjTz+Opkq$32&%au|{GB{C$BWWokUf{T-D6^W1M+6s0ChRwCCkp{@#~J-lTPHSzZ4 zDC8_Hy}K{kCq+86B`#;Ae8a-*NFf^>cz0!;GhVrKjMG|}TzHEyDY?oLlyWk})aLC! zB?m@SUj-dQ~x=hp?ZtIEF$DkqDGNP$A>Ld=sOw; z7d|rl{GMqlq!Vsj3k>+lM}TnBDU?$DdrD@G6tFq%-%~_lnBV=Zr@9R>hAAF1Vsw`q zE@bVuvS{^d9m=xRBBJsUNsgqZELKiOR-$KD)I$^WwdVFyciu}gN^NSbxD(ph;{*jB zf?G(Uo#l!JOWA(*J91{*Ru3>W)Vgq5y z;99iet?x|q0`S`GGRJyX50F#-^bF9_WfSV8A|&w}$-9OY1wA6HKmEycr+Ia_}m z5xoy+q+>4NZ##1vFF+?Nyb>!-Lr%;n*vxheK1FibpbIJ^F+ZD;#1BOVTiAP>T1dVV zf_VoaJI49qYUucvf4*(|<4G?D=YT`xHbG_K`PS+ye7Z7&D-!WcL~@vC;$#%fXeT$7 zCCa%k>xFlD@yDSG+H2&B$BlaU12AO&=y>GV<}DtTcR|WiC6oYih&veJn>C}EoEU<> z1WAh_SCztkR@=&8U|W{onJR7+`B}EMAiI!-5tZNg{KZ47!) zX6Q9MS3nBBKrqD!goI3-IE}Pzdqz;nDp;?%Rp2m?_mMWpi*I#fLbbEB1`vc-U-{b~ zPRS-OrqSW*+i!CM495Hfngs{}mGgV-KN+S7RZ;b3I0`cutqvJ5G5x=#hkaBKe3N$; z_T*mSgU>u9V_jbrQs}6;DBMd?K2ZB1hc9jL>3c7bJX4d*cssE$AQi-jRq;59p2Qp1 zr4luS18l#FffDM1adBC09wq->NJ}!QhF)_s(yo9}&woNKmtXOYqwR1`D-+}vrQ$*x zcPiB>3dnM%Npk{nM6#RsJ_jujVj2j$Cg3%=-xf<&gyPtL+HWI-H9@?ul@k&p+o%H{u#Epw?0;HIPT5yzit5fJo5g zZc||(TeM?xI`%A0egxFsX`$ciT)^LWzpAO`0mk0i07q@yVsu1%1>xR4MgSnoJO#+1>-94+3qt> zKxaS89n(6V(*dqZc(xjZh{xfCx4)U@4N&dpnc71f7cIT|7I4#aq~kmNN;*Kw4hK^{ z-DuhKVcWO;*{do{7tA(JRIy-6czzC~B0MQJtY7Mx(8F(an^LR;Ht52Bw4B?PF9`bv z8v=DPGkLt2!Z{EN6EAh)fd#KL^gpCg_fKh2Cd9qHCOKCuKYY5iNnE_HCYn_?!ZGeV zvm3{!(?}o$={tVXg90(d4b^(z4a4TWDT}IRBZ+1p?Lg>IbW!xh-&LUtifXl08P9pz ztgJ;vYaE~@_d@}beZ8CgjGW2chkaeY=BnmDQe?#2j9XT+#CS2MXwr*LWrKwR?ioJ0 z;8G7Xly8?;9#k&6CNE#gI@El{^4}_RfTwWaq&FnkcDyVTi9HzN{=sW$g`>3mnfaA} zHX=Mglkw#D`*(m#$a>J%Yt!5K(z&M>?+P00gW=ty*b*oi%glI;g<}^9v`)x5xQKlh z*)&0B;EbeAPp?lnZUIp^`aQCx*W6q4oeY+bx;pX>h?=4iQ_3Urx(_ES_dk9kb2ouTVER7p>H7HgGO%CZ?vL;7O1D95!h>{J9EfW6( zf!K#S`hoE0T5H#&Pf@#-oQnZr>xA(q>gT7&;cc%OTCFTLs-`5$>tf<@?|3C`I95K~_nPj2SRvtEMLM@%h{O_i&US=OaCwWmg@tLJYjZ|M+sr!lLW4%F*+r&?p5U^?ZcQQbw zBSCw3E-X~We?`Edl9vJ#>R~G9 qrmTCG=Y-e*0001Iq$CUg0rrmsw&NeQvBHHuFb#_W000000a;oY|L{Tp literal 69068 zcmV(pK=8l)H+ooF0004LBHlIv03iV!0000G&sfaoDXH)OT>vQ&2UKVgRpfkl* zs)A-(1?mrGpRD!ZGl(jIKV0o498AZQJr5ThUTo6G?CiTsN^@9Pn{Z4z+$o9g9m4$3ttE zj(>y6)vM4N{K~4@i*|74svXGSp4GB&zC{#IhlXRO$5Zmvk7S+AXasVgv1SRjZ?=Ec z+B}sXrtE{$^P^cxbgdxp9SK-vn;*uOO`9c4%8?csI)XYMFsHWUHtOBM=yFqrcTZmz5cBB*d4> zKY|~x!efGdWC??VLB(5g&V-cZF08PQm&mbsJmbO8IKPh8z7;x9$%(96liM1cOhHVQ zG0$QWgWDVHgx8Q4>m^rWpr2$&_Uj8j3Q$w8%x^If1JOYl@d8EehQUBG1QN!q&xcRD= zE(bEBIn<>bS2I+qt#7;#+*?>!Agd+DkLc2YgPohuEUj@GIO-yde%3BA=K1x9(mQU7 z)It3D2dr81A2+HP-@7>0cehp>4@U;Pn=vuOzqdtH-&c2A<1xeLb4JkVw3(D7;9ILj zui>%4i?L`svXgdVxlpf(!{ut7vJNj*W3Uj&9yk7V8c%=p;-n>{|IF?52*~8cYcmV3 zz)|JO4B^*)-~3cLZA-*;h;{hn!L&gABCA+GfW@_=e$q9Lc!Vr(f|f!dQXk?c*p-byAGnY_2N71>o2IErB~Z?jI${S7X=Fd>9+c3!4&6-J@S313n8GM z)yEzaqr6o(t}YQz7Q~VWG+l0VNf3LM-%%nGWl4HzMiRGI!@Lv$ zH)X@kDj$kT$S|lyX4KG1NfxB)AI%b{JL>mRdoWGic%k^nC1BxmC3m&D$sY^n7)7@r zLZY(?0-xh8jUFW@;2QH`r`<8DMn#id#`tX|#m{BPamGhac}qs(b>B#W&mN-5#@Q5G z3W$&>0l_;+LU7ZcMamJIbnLVy_CS`&&2+}YQlx260h!bSEj>tN0C8;wr7y+`d)cuM zfso~Q4W&jHN|hgg`a|_~MVd8T41I%~ZzBy~;_7;gA*g@xi{|Q6ZHs*j{O8ac#RFz5 zx)%(dESN`7a7n{+YWFLS`K{&GL)9RH5R5%u_Bw#M zgRZjF<|oPYEx?6hb{bayFeG)^4h3X~FC{X2*Z0`lc~zu%QJ;G`_9tq3I0fuIG4V3q(bNGVa?>S*gPl;t zP>f{+A9>e86PK;(%edRsK_s1)yGA*r3nEUOR8__duIC*8N^W6+&ewBtMlZfT2w%1c zyKPj;Q1#+iD9+51jsjuA5RK znu2ePQqPjW!bI1=4!P^|Au_jh!Sk~C=@{TnAH)g*hWZQ_A5dhINM=1G> z-@%Kuu0omBLcm$2!U8zWeO`Bu3p^o zb8P~>-uiI&G6h$Fbq)F^jTE3#jpiSFcc;<1*~-eVuFu}UuaP>%-yMf&^ok0*Kaqm< z>{KB-lLai+le^#Kd9w3jXwn>j%}Es3g+mZ^Yfbc0k6I+UvGT> ziJA&NsZ1cf*uY{5fT$svT9y~my*3l!akSe#O|6}Bdidq(`9GzquK8)Q8=KVpRC^!gV&HlaxPD!02l3pFeFO8!IgO#9cH|{P?Q3wo zt)@gn58KbJCK6O2S1yJU+gApy7^VV z$}2U))d28mWSacDf-}71K?=vHLMvi!ER0U_YlaX+Q%ss#x8!3UkpC*FG@i%sjbXdl z5t3gQRA0sBz>R*l;a8AX%d-Gw0lMx;Mbt)~3y&j85Fmox4XWyD0x480iFg;VrfY_4 zKpseJ=@M6f#VZ7`A%f>2P!=%tE64TbJkd-n(+QrUN4@@t4%g&`Jgfbu%i7%HP5))) zx7sPYSyi{TNOpBj$i?NCU5__+>Jg&Opa1^ifv|5!$S)T@t;~|K_g;^23_ylt4!eQ# zKDS=DJVLNM|H*3-MsU)(on&27QOYuZNnXDZ}4%kZiv!%)rzx;Mt3~S!k=6#8wKk9xsc2RsbV$Lkj zWCKtCwqq_H8mjs!+#mP3CXV_E&6*d#YwNS`n6{OGa=|r}+MK4!ArADa)N(~Vz$aje zDd|ziC0M-EvnN?&u645dB87b1AMQH?RI^Hs*r>cwZA(|c!=q|hgv;nNZf?B@iU35O z*Zim;32W8zZaqo4y3n33mi1)P9&v=wP-(7#%!dusLc0Ah-TJ39(n5LM0kE4PM?WG? z%hqA7X-kLQLl2N3MQ!!wSN3Rn6@I@bO|WfsUo%Vqdc-!E2~U(A`YN?uI2gq-Q?hiJ z#b>gZ_F-4J;Of4-W@D|MIB+`BsZ**%Z_{|MikYr4|JjsT@biw7nk519__*eBHu#|R zm82H;bo>ieNfoN13b{U{3y>%N1b$G;z~-yjR2;eiROj2rW}{%P?I&J(7h(-^6$T;Ym{y5$qAbosA1l0ccwyOUrHL=QgFm5lmY!wJjK z9b#-N#vaEZ9USDf^?p<{5t#hH?X`|~I?8vrVT|GuyQ$01z!x_5bG`O`V7CAcu><1F zUN8zT$M=Jj3sH_<4D=*NnMpdCPOD?`@4E4P3#p|j8?_z};nv`95jr*SvdAh;iS|s- zPJ?@9hskdfLDA_phBPCp5mCsP>bOJK@p!7V11EG;i-Gs;3QM+kS2bIE+4MMcxHuV6 zj4}pfD^=(b&lVpCJsR%$jk-9~%g)ji*CwEXG*PbmDgTW6UpgZqLA~GEEE#|{{iuam zZ{g|D!FYG4)kRF;RK>)ig+X@Z(1`}WNPxdu-3Lxg?&99#+p9$h<^Uxt}DOdyS7n#Ub`|Y$t&>qWn`2>qTtxt`rU1;m@1wLv~_ZP zD&lf}3YVMg^K#>m3H*j-rdU@iLz$*9Vgx6krc2p zC*g0A5;77TPR)3*>EK_)E*XWi+TY-HIn&cp^GRNME~8sOI!b>`qmVx(g{aaEgDk$M;Z?$JN$fb02Eob3To`vU5B@3SPo*T2an*rgPiSDIZA zP9#EWyuUX*+JPrN+46pDnS0fWm5F#S(DN#>OP2)P!8Zvk-uOl##rvn2pYsjv(9)=l z5yZC6LDxsFPAx=kM$!mB9}-*peVP7q^9Ky6Zt(q#Cq=QU>eu0`R3*0rS(M3_-vIa+ zzw90GvYDh#ILA3m6s^;IH2PQ+lS8&ZGkLLC=UccfD1LBq70o6gFx@v-(k6oz=v;=d z)f9yxPu!D{V7O;hR&?ct=ETa>qxT%QlT76_Qz?7)f-rFW{Pegec~bq=X=@@=>5Kj( zokNzV=~A*^N=m!Mj+~miVFjtr6}t?w{mo4ys49wp#(nny5vhoL@`%3ZNWDwzWD+uT z^&=Wo<@L{D+|yNn%yW4!44l+R*-eW-ab~jycYAU5t#$AYqA~&$D9wJ3&<=E$s*6v z`ta`vJbO~pFTE@Q8mX<7m?W-UrgDXJMk4$P&<|%RPj)nRbnt+7hDsB)VUt`kEeJH%$C|+S zl*tnw=I2{(C5yWi+3?U`n*38-)>K2|5!#sI&Mz16CyVBIr{mrpvP{1K9F8EkS85Ra-t@Jb;u|8(&9!2P~{KhBC??8>IevLc-?{hpp&&?3OPZ`x*=ju5lx?!G z07^0y7qGlr7nP=lwE{`N>@Jj|?1N-FCHeSG@{)~y%eVinS5)!LkVc1a$#OTGf~3u; zxqRzCx1t#a2u7aOW<8F^I}f=%=+?Y8B(S?>{G$2N8A7s zFuz(VC?7~tMFcq8)AuHv@#48d-!wO*569L4uOzi_(uVYdp5@j1y0*pTxL#Zr0O1+k zc($-PjLH7$K9ckTMJ74Mn=&Gm_ZBu!1-xRp=FsDRDL%U>N`-xTz(#Oo&C82EW{WIi}^ip_U%Ya7QG;A0R zGf(qW(su2(30Dnm6UEdM3Q8A>@et2ULPa$tFyc4F!U@tVY;CKY5~OB>G?@NfZ!CWY zkILtBs)8#RlSzX#$|ZUBLd_drKhzJmtg6WVj2A`FG9%(U`RLZl+6e}sdRp*0@oyWZ zGgjbh<2Z?1VK;1~^t3%0wWu&_0*$-K@bviue;TYa^I~+HW#$99eCYDwYA=p{*9MHX zHdQEQ9RU{Id47EPtJp#aN=CP_Q#Z|?R&QncbW{v9KPx`N3Wji#ci_^jaoN2B<^dAQ zt0@Pe&Z9U8hT6aLy!Xoh4{`ev9Tq-=My9ioPrh-L@NT8*3Q>I33s&iCi2d@nQ*WuW zv?6_v1LryV*vS7%lqP#+c`oxHrATCdqvWaHkm5CZ8jINfyp26S0BAuCyPsN5UQDW! z?NqVAA--z?7Tawuo5|HW?ui_D4g^?Q6;CSk;wNX;)U_RT9PYDQMdVGLni~%pNU{fT z%Njz1qXVW<&BF{gmqcdv=yv57sIM!k!u7EU$mEG;T#;Mx0fB|#Z$Mg$q!Jja8PBC7 z*M!dRLK&zHA_0vP6tI2QLCr|16r^n;lZMrI>>107SXZ38={Nw6gd(C~q zhFG@AK8d_(7iz^wo}Uncrqe%}hyht(nwv)NbHjpWy(pS01r@e8MjE}NZg)C#4lIp# zfdmaS-hpXqjOGgcUfIq-LqK|4zjcmhWa{c`w7+_J)K7A;eA+0mJLB6MuBV;VOl@B`^r!z(FyyIQqQ|#6*Z%x_)F4Va*fzPhN&ZkbieewNI?|Y5yD4 zc@@sbkcSky03&2A@?o2Znv`88B8?+Gy3H2)T1Tpy+a4oc0splS?bn+{xnt!lq{@?U zLsV>Rg^(?rH4kDEC_b#Fgai&Mcb0mq24P}+;MA36fNJ8>(g2d~FKqP-8sFk5D$V4( zbKKU*GQ>P-wH-j}!lOy2ISgHh-=Mw~_CFx)Z#Q-gad04F8mmK|D0FGsZ~XqFha(r+ z2;pri9o4LdD>tNJe*GUk_Gz!P!Cc#O%=4%1>k&@7X}#2}Y%jhumCXv^IC+~RR)eM;+-aiWg`yd;dp}P};aNUpu>8)o({ljBSBk&73*Fc^K$+R*cjOS|s_@4N9gl(Q4?M;nK9) z1;>2zpnT*Dqv!c1g~1XF&)!jaQ7FbVL$d+^;-c4Wsuh>)|JjA20d~P64v<+$=mlux za`}1f_OFwag^I*HMt0c7waqS6;GO-z84Ar+QB2Ox^? zc~!mJarn2TN@=_z4T*BBnh`NFZCVOUAGfbG%a&oQtip?;O5aiDSdB`H6-%X6KrY~y zI#=E|+x(z0+QOwDrLC!~y&r0+1n?`e+57{b++FWLVs9ZiYXzeh5E=_wsSSe()ibVj zY_c{Wb5bK}pqSRKU6hOy6p$CLj{Nv>(4B7rF^^1rk9CIxopZ)D6c^D%vLMyF{+sS- zhuy{~L|Uaul<&^uUxi@H-W!)eO4)DB&UF=K+tCmGD?|;58BTC+XPu!Y+D`AR-yTl?K?lol?U`N$^Pc3&16R^0xr* z2d6nSy0o-`2c6%cFkUUb04rhwMH`$?8?`jdhX5>Xqp!FS^aC|5bbAlFmQY>75L3`j zO6V#?W5s+Bbz7g0#z4l z*-TSCQjIsNpJ!{UB9_3NIvA@R8MOQp2zB zO@lT*ZGHMADXp+P#kX-_dVcm9TAUNS;1Y|@VO(}s+Y4on=kmAu3`(j5fkM?w!C3nr z3Dje7j^IRZ$WNA<%wagOWiH)>wym=C{I3Zz-5H>g_AyY7q!B)hGA?tgjrtv<`X&C9 zW15}$#CdC)lYKGPC;#GyUArQN?IOyQMVLaQG75>@=>rp|{mmK8S6)jXM8s#LqzWrJ zgSj$F%+;gCG~6TI7pl9awA><{=+2!NI92xs)T=U5+vC+=%(a7wpDyArCcL{5Dd3sz zbWxV~AP?8n!)c!m|D~5<9Ld(%zK3Z6Dtn2u+5)NH zSOChKoZ6fj19L%Nlzh9KM$6W$lj~wDbg_Go(UZMPdKTpuQpvytw<(%iWQF#2`(3yS z9S;%-@gU^uTOUwvQP8_U^p2oHv~B|@NLJFLjNWOlz9p`gL`&=2zzy6e26o&#Q=?p; zLbhXS?l$bUozw#J!S4Y@#h4OOXng68Q3WT03=C1y?3QvmZ!yn_D4s1R@6nE)9u1)( z%`bi^0y$Wl>yB!`n^Af7TnlWY49yh#*>$VREF_I1f90*p8mz(j{foe=;={A~Jow40 zojs}xu-YZ9@&$^TNvEJB;BqT@nlQUmW`G7hs{(h<2or z!sYHl&rDAT70&b(okqI8=5HkT?c`AJ{%E0eF=zV!c`e~&0w?Pe0}?b8oz*D?dc>AD z_}qi=0v?odeJtP->6jS|pPoQ}OX&#qpses5M>%QA(?I#?Au8jgaH+NhPt-KS zygozTHYb%y2OsY53n#gd$X5e3mlHM%!gDj@5ImRqM1nSxHT<2ZW?vURd_@sKQ%czS zH2vIM`DXW3H+^6!0pwc}q{>Ua=5gtuD2{R1-C&O%v`^ACGy~;!J8b|dony7Ax1=xj zoJi||GR~dMWl91N$#5LJH67c+pxR(SfG5ZrkbKv(s-8EG=!aWPz*(s(U49#~@dnbI z?_wV%+#dU?Fj*R8QraOMfD3Gm|ApG{ZQ@A~6lvU>NLV>|_T|jhs2VU<%7||!!i@pS z(c)9IgWxJRb0XM^c(2CXZ~f6LQLx%8$UrIl8#&QZ-!BCi(sQ(1f2gi6=cD3iw>&!A z5@d&l&jB`Zx>2Dv=CYI*H1AiQ7-(SsWJ~SLvU+#zZ3(433eVGYM%{02w0Vx3ZTo#v z%H)8%ruD>4*BI9YLt*&?7!qdVd|}yy~I=3 zOgj&q)?`Y4s0$r04KPDh%G>zsC*Jk2qR3*(lLR()$;P`mvzTU_ZC*_911j!mscR{t z4mc#a&nwdq&G95x56}6%Ckg|cR<|`~nR_Y=Jg^&wLF!f`FTU+UgG<6VnMqVp^npB? zMWP6CeWaxOXh(}H)lCcUF!}U-Okm&}_%$h`?rsQpsYei_^j&U5@=`voWp^W}-kvfE zbvI}h()2u9?H!BT>wS;wI`&{vWqi$_-p=r;29IAB*~bb;W8g~hAz_$$<{mkD+KnW& zYKI3<+tlAbHY&v`K^gNbh!NbVwEc&@u8O=KA*M_;PW9f_YeWCHzl@)IhmB(@_eLI! z#^{bp*QiFrO!a%$xw+EZQ0Ddwr^Sjhyb$ZsD;glKWSfY}o%wugMWD72pu+o7z=Fw} z=r63gs`Pe<@ITJ9=(iB$N;W?*e?w_l zSu2LykBQmGw1}bxK9GH=WwQFPjzGR9_7HY#8yxYJ?g*2$%U&uls7tJ9w#-j@yNax2 zE5_}c+kq8yG9b+EDlhP&+J7^`+r=lP0PYOKhNfF)2y>WwiTZ8A`455h8un?n-w?i3 z>3Jx#Z`0zaZmLK|E#2q7gj_d89r4oxdnxL9t*s~#3o8@~v`h}AeotEbf{i52@NqZ4EA25ryXYS%7G8 zQnpEbnb2<5Vj9zlv_o9rTMbkOO!4Nc*C@8}ugbP}?j4e%1+8j>-2vHdhdG>WDB=Iw z?X$ja8je;Qw#@ei*={y}1bo8=C_UyJ@N-*G*-VICalyW3;yWAbvtG4!{Zqo&TSGLf zOxTri4XVDUW8tdREqB?EsNudU*X5@EoJSLl6ny8GM#Y}ycWG-GqfsV=t#1|@L|h`a zqtW+=#A()_yVY_DAX{)3PX8g?834D0wwdFm=OX#XUW~!P-=d!L;!9xOm3y0thXW$i#Fm3u zOj>KS$hcpy6mp$h5R$En8j)the*@X-tWJ|=5V-tpvd*z-CGOlJdVjff^!sGf zK{xMKPtH3A+?wz>r}rLW$Ymq~(MyZhkGeeO&dfE__#D6P_x^tuYubfSs97+yU2pNl zOMYhHK8=1UPW3pst>B{U)&pp4O}g3e8Mx(WC$$k%Ptg<~@FTX7{hT;Zz#lFXV;bN4 zOvBLG_gf8_gf@1J!L%M|o5V|uKYxJoEIOI=OwEwY$W%YG_yz=6&LNN{0>^CFdE=0aBO#BzAe%zq z9_o>#_ZAJDCy<{ld-gY%bMT|8l+)U>{t$w(G!yIDl9(s(R80ls`eR;nyB-KLYk*37 z-%2=jd8e0^Q2Q0|YH0Vh*@rMTnZr;838K{&vaO*2{&V_ukHhGw$$?~rAd zTQM)g1%Noe{#5!mOFYS=0682!X&7eq1?-bP5z#lgFT0~#L^_%lJ9BTyjzeWdh2@q z6?Qd^4z(YK+cavv{#MKxic>znKJCrKF4X&2j{2W~k+?j^k>q}7A3Ha-KYE?qGzMnr zv_r!&>QYd0(ZGDoF|n|s2FFvk9zod}nVMr1SO5Bw z-#zVGcDFZ-4~wcVNXPF99?uU!$a1809r55yjh-YZ|_+BI2@%(F?1_ zb>rYmJ|>xQBRGYoI=R0a`NG?Ci#UFB3+UDqo69US&O+BaR!xs>tDOl*);r#}nZEmC zXJcSL%Q-K@Z5{#w|1uI+Q3TW(p(n>3%~57erD0xHd>WV`b;833@PVM8u?>q6?Xwdz z^+uA(mNtF^CSY8}|8w(aJAdN*wIz}A>RPbUGlQmI@`nC5fqqD3Ekw^IafjHB>dX2e zJb;?^jtJn?9huTQ5dORT`)@b?vY~}0#Jgw~TzFN>TZ9z`I_s?^i9VINH0tP!hzX9^ z9#WZg1an_1z7P~-b@lNW?(mSNy8KtXnpU)XCG_9$GtYix-o2l!0&qP#7AUzBC)sF$ zBnlCRZAv{8uDN>B3(g=N1ZkU#6O%X!x*|-d)b0Kg$WUmZgIsHL$4D657ezAtv_+Vb z>FZ6!YG4|$2yP0EKNj0o14zh-n9ub>w%;37O%R`T9J06l2pGxNS`_2*<(8i${vJiM zBov1=VLE#-=xFTzgL;IxNtxZ0SZNPGz_#eVB}fpw9}6&FCtBBtOnExiXjhWr$`}Qs zzaMEllvw&}mz0PHcO)1y$Ddj+WC7iU6u1B(m`;hVYP~>NH}oJbT_SADR2ci~EDB0< zMp{>Lz$MT)#i>(E;hdFXNQp$>w5lga_IM;ca`jcq@4Lu@(5ir_kbgdfJvn+5pXv>Zr7;G!Yzp)uVn?S6 znL0nkI`X;X*w}S2M4y7(ZvAzF+jB2XYvObkm-|i-^_Su-9JTN@VKoGNn9@T$bm*5J znX5F&!{J{paIwr(lvBFX@%37@^^Jq{Q5*U>Ca!(H5r(BUMJUCXyrJ4A$JXvUcNfJ}TZW%?kKKIrskZ<4Y5*QtC~i50XKI$Lv4&=!Q5H?_RLXL+b?{tfY0Y9c zp>!?KQsnozM3+tIcq7m_F|B=guSbn@k@^jzIG);A5XCT^(Xqy;LvZ23{X76T$-{TL zQ*|5Kt+LTpKdWR|-X1m99$phjYC)OzChbw9p1tB^@ zU#&uKZqac>)*L*JvY$q!X1!CQP4Je;_bNA^o%0v4ebPC~fO<85y?@0q0H4dl-jRap z6W_^%0bA&qmaSoY?cx^Q^k(dY=@NAnwB4K;31yQdj1OSO4!4>C*S2QLbJ>Wg3rdrBPWUx9GTo*6+OPA#bqDIy|ds&t+s8XfRj-p?q^%#V~-a1E#zQ!__}DY*Z2 ze}4vo!*nfowe(PwIIae@Xea%X^Rzze^4dIaVMoS3N+-|wcZM&h z1`It3sQc-@2B!(>N10hY?Z)KucJVl}uWM4l5Bf*Y6IBk~rm=ZfEs5O5V6`??4`Qf? z1gdrPJ4w-XGodbnRSt7CIscM(R%VIplV3AhyCb3oCq05bNIRWbYjYOxmVo=|Ym3Lh zWW7)`l{?`>Ik#MVw1kxK4|c0$+OvzR;#V6>R-bIqp_C&do7HNnvx#V16IQhnV5Skh z0|M91HW{tmlt1#HAOCt?c+i~(!3pw1u4ij%nH7(i>@n7{_D3`my!XNoDaH`$0q zCQqk-^xjdt%3^OT&fUQ0tkHjx7;KqVq4`Wkz6T(s$QGh03rq&KA|RuK|o5Me*JGTzuEqpD@W_ z7Bwu3@$HG!8a*Mi^)@@j87sBi-HShyS6~i@v_V0@-oxDYBCRYf3kV^vIwH|f)v?_K z<`Prn+zh6QRI)PC27R6SKo*il1<{3&DY6fY<~CM!^Yu~qp*}3O&KAM0D5^8{DD-xs z8_=6bdWYD(B<$V{K>69TLcwDoCEkR^gMLdyHNT5uPPMpFvX8w5yDEwu$fk_{L`eoq zP;Dz^5Hb&~p>b|cZn*rw?1&$-zmESCDccu=q_7IAxC<&=vu*lY=GUgvqdVPqFIXvR z;a4&zem7drX4nFw;ZSJ&U^Z*6+E)hwM#`*qN(uvyi=y3UM0y{2Ni{03VaTc;b z9sVIY*8)rvtYCF$pDB5$-)+7o0o>Xme{zFs5fJRAkM?QCSr&7mj3N8hw~Ryxh@{b! z#DXhHPrCArm=-RW;dBHQU9}fP7+z)BCB~*8qQx7Rxsg8wfG3M@{KIiUXXs48G3yIm zK7c}jl^O+*8y-xzWq&yD#t`lgk`nEEg|Jo!T=DV7Pm}O9JEWKG%(fl#gDNd>iIg+n zC)k0>Jzv>Pa+awH1Up_8>;v2=1W+`4VhVcMu({KLM9{X2_3p|CAaZytA{vtI`tlg ziV+LSC8Ts+LxDA|Ti1?TYId(7+-#&2F}{_-2h{byv0p@o<G&9?ci?+#a*PI??53CNeG*DA4F zV^NLTO8f9QWt|U@>#F71F2N_p7(6cpJ^WwW?gtmjm3+IPTuXL`-^jRNRuJo%A;yN@ zYp%;d_cByt6zD%rKk|(6o=o@uB&=yn*p6TRQM6#F6Aap#StPZUa|A5UY=jTYv!!Uj zlBSTos7q;bLv0QI)~e@ez1F!c{{S$nv`&U<*M+NCNQZvMRh^pX2T6Re(ar|Ze4;X+ z`(YtNXH22FOgY$#RJ8s69MN+P7C?UTCFZSdqFD+BRov?uu0Ev2JQAl64UEnSr)RW5 z!d}lpb~X0=VZwxW0ndLTDSw{-OFD;Shb9Qy_>`U-p<*w&!G|O3Ec;TD?#qEaww(_O ziy2Z^G{_-X0x6x@H0JMwZS#M2XWl+|ovo1TClD&r<00jTI_Q$VprW9d3f$}xYmQ-d z^0qx~3+r^$Hap*J@P@&4#aHIm&7oXN=M9Z=i)%X`R$=iibuRQ zk(NI7i@HSw%ZBm)U4_|Dyo?8GAatFC82b~)@XeYB?;P|dzMwsPX3*R1jcxJL@=>19 zIkBZ`nLUPlgZ*iYLXdxo9M>O{8%|WS?1;io6yJeDK%SjVbOgsNd6UT2+Dh~`)IRgLvneyo?_i2itc&rgN(acI91D7{pPVvJLN)ll%8a{! z&DZ8)@;BZd-;}{f6V%v2pqUG0s+K|X{9`?OeYQ5qOKFn$7go~KBs!}Ubob$C<01I^ z3JPrM*hQhTf?%O?bc=c}SW(xR!i{6lkhAi~3=WVdD<{33AipzChzMiNPPL}ov1)gu zy}HCr-$Lsc+1!hnp1ve8Kh(FEy`*@j=DH02+xtR5>Z6s~%d;0{R)?LrPn$Gh(ZF?% z!vIstMl*JAUj#OR%)Jr2l&|U_WgOr_&Jfjyw5_i-M{)I!87an}`0J7ORgy+1_|#Rs*6!wg!V`Kkx+Exr4P z%8fhYb?v#0U5t%+msy!zCe?A$;L9WtU#x$g@Qf+6II~;HW7kHJ1h6&k*l$q_gkd-_ zIY8fwl67N%!tM~PDNkoTk?3tgRDEyAt(d7R^4V;VXWLDgvX@^^%IAE;A!0g6PiZ?2 zr04icODuDv9XRgc*8<)cF?v4b$~h$ijHzzgGS)N{3XN+y0A-?5oz^Ef_H|*443CXY zR`@G?tA{Ny!e9)P8tjcC#1EHC76UW}79@dswg}k#LN05Yhdp()7s_5D5~#T{y@&k3 zb){iPJU+{aZ{Ub9n!?5ZG=I}HQZ+-n^GM+h^O&H=1ctK^U}YaQxD-F=&o4zFP3w3% z`Z#~?i$04u`B635eBYZTbmbMg_efvXKufRo^{k@)U6k@TuBvSl{U<7q|M|TZO)mS| zbR&tmmV)c!PR?e%%<>-Zr=MZXzjKE?GsQ67c_zxPU5O@7$mqwBqG(%y0h_o2+J#<5 zi^B=uth4bN;3I1i-NhY2>-DhVu1l&EnJ;N4UrPsCyGrRt->GL(Mi=f@W+DXV_u<*C z6d_4)9W!d$hL+@8vdN1cxFWk5??fV^pkX}3$PUZ3*yH)CdsZcdn(j@aB{?fUr#>|ZxO6-wA zu$6}e8&4i}I8Q~6WOjCb^_UuB0lH4PYx=A9UOl?Ctvcxr%+}u_z|q&y?fTfsIDaZ_Ji z`+?+9cmAEYa!$+h(QTkYrnFqF#lFM{xgS#=K*1qB!@Dr^KNcm&sP3A!T%#+dmA9@V z^S;*@_UZ21aHSgOOdkjJ^zb0ha*+ElNT6#}XPQdWi&;bB_OCWd{ zX&WPV0+|MdKcec@#gozuwQbhnn2uO)_j&Jlbb6lnRCa zP#NlO+x@Ny*!D>|=gl6!;1)6rIsp%*#8T=|)!0p-ar6*W;~5?7twYQ@MT)`Ni#<&00d2Yw3v%AOmH{P;?&?$ zK%gp(l_dM0p!{H&6cO1b;-csiQ(Wx9sAZH{PwG!9v<1`-Q>f?7n#>WSu$F7DQ@+a2 zbVuwO_ho+@D+s4NlWi1OSMAM&#O}Z7qt;!JDna2b$HOye%ws41_#Vro@JBY@`ux-Oa*q)Y2-8_6%YioI;XfheawbG}?(F0?M zs^|5Ghw|QK8{9en%s`hYgWXNKL?hn}t3q;w{IK=$hrQ5bNxK1*!%s@N-{5zh^k;f3 zW@cv!#~99yLn37gn_B-ItW0~}LfmoaDPTM=7YAw5DDgv9OBd|MR52)lPtSWy`YgUD z$C9jKUcH;j>GK{$hfzOq=i3RU+yxjb0GlRf@QN;UIJmzGFW#as;Kr<3W78sMQ9M5A z(S)yBh8EoM1w`3oqG-fshxb8m6g2EEcE$O(9dgY; zb^5n_aNi2okEL3Q?{-jMCAaEhofhH`f^qnZGr>mqq|2hFw9D$#p;GZDML`3*`pC}Y zv=P-e6-L*Az>4v=m{*W^(-@0d)Qqr|ZBQ}uX zShI0ZJPXobeZ{XXN78*cl#cXFcg@@t8}L>NG>c0Etlb?_Sk|rHR_XUd^kL2hJo;NX zO6;s@3xm5KAn6NLvl;PJRqBrdD@N)+G3ODrd^G{8U$r^+C^Ps3)S`44MscuUeOyKk zhCv8d4LuU9bRZ1{m~w>0LJP41RjOI^Q+U?i$PSfG!6KTK;fYLZ+{9UyU%sk1N3V!A zh$^d_T5CkjlBW)f1Ax*;zoY#{K`y52nyqeKHkS#6D)vv~fuhg6?L<&;dVQ5rWp!l; z$juLM8K=uv?ys;gi_!K2sIU9ddO;CCrtKF3pIpQpX*MR96JyvlL7}t>8AM8GO&~tA zPZ!+M6D{4cRfx-DzjivSfq7nt+faxOXw#j|L%LRTi7kA1>xVQfb))|P7AqIcQ zHPXN&&UV7doe=Dl?Kv0AAjQ?vJr#n8ZR^*#*D-2uQbzNRwF)5^1~a|RO2oOy*FWge zm>0*pL-Kwj{5BG#VblwIHL~bH>UJ2=L4jQxcKt&eoow5I_#%uAa?rdM%N6KMW=(w< z6U3JeV$E!(2R>W{E+(!BumGg=+OCVk7DLr^d!R)^NT)I_f8*PvQKBp0=>G*@X4$oJ)ayP!3puyn%we{w=K@x z2%NN^bzd}&RVmKtI@`bzKkRaFnnNGKGIHT1za$OYN#tyzgouy=5eqbMe)zpT!R?Pb zG?SiT?N0XEjSlWKqhUQ?!`3w<`e|TW){J6D>h#T52)_*;rp2C{DHWc8(Yy!zN&!rj zfZVn-!$W`Gv|;Q9tuSc88(C#iix zj$tK(S=`ufA298fEQv0&bu&h0v?h0K!v21ZeNJ+>4CvTZI4#;T}ZGfUb{7KyehZ+e&WfCn`<5bLArIcE3H zZNS}r1cd^hEf%YW*X=%P=7^~Cz(X8bAGv?YX&pVN0^14k6$V>pceLuu>K6PM`^(b$ zwwA`}FsaA0dG8K@qVK@#>}DD(bW49Wiul-!Vi;&G1(C9yN>oH)K&Kx{xW zpaP$d67`Homl#!01NxSz8?0p=6ocT@#ZkED43hh~^Ir@SETI3lpMT^8_y$FX_hy-E z>!DGn2&I~XA3{N90$j<#6&7LTEvKBZF-Nl;arE-?v%^d2(PfKNfg5ZT$uiUv#}{)+ zRbi|HA>GC3`^Z0=va$XYfFWGvbx!r3x*F;h8hi`fLGb?`-%b!AAdGV=rf6B&uW{I- zxc9&uzGY4|LV|%)O!L2~hPlNL7LwbFlQr>@m;98CT|U?oY-O!2=xXghKT5xhF+&eb zj&!@PdDIY5sKpl7XU+Z$Y1I6s8U`Vh1K(NjsSgHNdnb93v5P9DI0S#oer1(eN=Jf!;{4Y$fItk5O*@x>vLoB}K|L8d}gQF79`M%R+SIb>fa~t zN{74e4AA)^vU>LbS_V#o^x>{QhCNCz9S}sCf~s>jOkR6a#1wd^i7EK%?L$9KcT`dFojq zqmz1(s!Ev7n>^6^M*-$+SNW-7O?iX_0C8o3VRm3Vc)MlTA$vY6G-nx~7^$Ej4qF{L z1+v@NupkclVKFEu*1>`80EUjhK!~9jv7Y7DjCYDtRT3hB3sxPvN_Kd~m`bwRscqe3 z#@<9wp`M)-4p!~&r@G+tWvvsw=)O1eDhn!f@sczD*B`yP#(svwG~SKE!}N#g{`F^q z7(opYY%AseG1ZUJwJKsnz~av%{Ev6xdTGnRkr)hiKDEHk^&HpbQV3kSf=@dp< zu_RHJDRTI2s7S2}?W4p9LJr+z4=oiU_B(quN_X5mbmela-HV~_PaEk!OZ#t-LUL_E zdzn#v${ZFIP=N2xEI!Z_T6pcV#!)hG+UDISY6WtdC8zYFn-Ot5RzlS-CEj^#A`x*9 zZ%u(rXqPURtPCcmlIwm#O~HO&^B3B>_G53FfF8@bd^8DM0?YkGJo78EXh;J zkO(@pE9U;HG9Ud63Le5)va!2I-NmioBiek@Y^J-y9H}Auaz><|K8hNeEzx}i>(Ke= zAhe?kLpBuZfL@kW#T4#NMMF9a~B{g z1vX1oVuJx(DWvd2FQ{Ng& zHW5U&@<72953S4frN`z;ZwgUF&sE&n(T%It(1KaIcHutw#UJ>&%%aK4Yjty~HAjn93-5l!kJlHHTeaJz*k#wAVrZm|-f(p>D`WhdK1G^VMG$~^KMl;P?_v`N>UY|znuI23{NayL8q-;MzXO2G z(vxbatf>sSk)N^`Na}q2y&lW&E-cc3abLGY66!hgZ-0)l5p+f5|LWDQLo@di-JwN9+M$ z8zRrO3iGUn?PQ7;X|E2c+n_MWqJp8dU@K$C=}ACUSeodh9Z5Qqh(rsPd1cwnm|<|n zf3uq~^3w!W7QlTMOA=k~=8kgRP!x-5wS;MO7%R%(d%6H9(Sml*=QcwrkiSK%j=A_uACtB+v!BE9R- z;t0D*JSDs5buSxG5fNTpZyWb2+Uc)m14nsQhq_j;TnR9P_Dqu$;(AyZhzili=7DB; zcq-_ZV4w%y3`e?J@?lzIuJphVL~8tnt9BAgwE`%bkPq za>}gwGO@@J5^=|vwF|@C*8B+EdQ-!RCzMq_;Snm*zD6Yi{+gIDx`4;+(V(XZ9)}%CVTkP#Ki*keI$MjoXz3j;&j;L74|~2I}_`47=QbSwAXnj?s#A~$nrVm zrix279y{&@k?V)fvUu2)8wKGQMo}uUG#^*^9ul(0vyF3eT7^ehxDA<}>|kl`oh6=; z2Xd}ji7;FcJ{`-CGmB5FM2N?06cj9H8uyvR>@7YEq;@t3MjaJ5gviOuj(vTcmp=+Ai~caLT3Ydm+9k311CO*t<-?B`jxNa zuhZ(0VhyS;)6CNsSysuREAOPa_^ZHFt=BgDeqKTk_;gOmQe3~@=oa0tcvHiOLR@Bc zNE0H2b-G=_fT%ok2br3C112l0LLl+Ur*FRS=6GVs{V03+PDSiK7s zH5HZ-zk0D5=%kVBe$kNDC?bwsdM=GsSxn@GMSP@S--6PRAgqMvyN@xtDTFjyRJFmcX+))oo=05`2-!$3KFF~I3i;x3vo0W-!)x8o@6hyAZsZ<6L^zCsyS!bI!flZM{qM=cGi%w zYlR-F0>p_KHLShMgoef1P5FQBBK7GROJ+aLgGtrS4|0JNOb*bfSc9PQ4MxBGTl}?= zMt^bD!HdQ0NXZoXs1+&eq5ma!t39e=U+^oW(W;d>5s8R^$8gt~U9&$xyQcTBg4-0X zB297ysg#;4QQAL!QJMCxy+nIu$}wRb$s(o~&47o&n)3bGkPcF*lm5fd0k^Cp+@h{U z+EA(i;C?XIa z{kW^xnuUo~u&(wTBA$Gu2Oop{=Idk?C+hB7o<}&=Q|Y4kO(dZn0ci^ggXeixaLOc4 z9AlLZ%-a&tMfwEa%x=zvTj-;kV(NEuT}|ctiY*(0soTyv>=(JFd>5B$OJgwWYxdMk z@k*F~-z?G3AR)Jpj-s>x!jfV;d272MjJ)eFzEYWVR%W!A2=$&k=MA5BS%99ely$YI z^OUes!|^KDF48P`sjj9%nIv46fcGz^Xr%K8mtIzq`?dh%aNn%AT8oaFm^QEQakVo* z%cF`{gM>Y-iBY>UD5h{^jC+ein84!i9aaQ|&#T8;&BPFaF7bo6b@8}Yy4WTSFtq9Egrm~O>#6t|e?%ZM}Ut)aTg#Uk4ZNH-KNSrAO ze$1$q@RZ&g`y9&UK1;GE(7!mc{1WH$-Qs2}J;6efXF(O7z%>u8W%XSqz3vD)ZrD$f zBFY;HNFWFL-H@ARW7$x__t7Jf-OK0r1b4S7)1f_L6?9do!AN$IWs%_=o=b67TUT;- z*NiS8#l<2EY#6Z!wb@?J*oCZx=7HtDX}$&|N5NuLck~*YU3RCbEj7e&7kH3lOWnp0 ztnl;GZ+}2N@LT$HKhO*07x-NxG4gNBTa9`#gIL;mb@61l)EC1d*rywG%wt>km7*o7 z<7t7goGcS9(i;rALJKnPFC<;0gQ~=F8`=S|ic$$99NebPd6Pas z{Ohbwytslx(OwHOlX3c+gAfzIe0SN0*XB>U;$-nIqC4PFyr@ znKhj7p(t^-&#lkAqHnXQUy^pqVCFNa>4WTPkXiXCUxGq6e;M9h$4%RJ2fr%p)HPfG zxvqUe$o78TF+RK<#KLUQ9mBULaJ0=fTNVX6##YARm{AAJ{U#2!Ya)}$Q)0;6!}hsk zkGqXZRp8b}_$n*>P%Z$MFkP5YiBN6tt+?r$6k@_FdAZRnDBh zeqn3P&fs=Z_?L97w(_c)Rm5O4O?h9?O!J!;vo!E92zR2aSaGLl$@;^oAj1ZcOR@U; z-X_&n25bTv83Jfo6r@_-zVO{TC2#D%3t8W=w3bx*EKYrJ1MU$WK_uFO<#3n>( zhTJAEz^SZb0tnQNc?Gcs-a9>78Y=y}4Q{CVu{bo=$0QObK|%iHsaF>)aom1#uMa%5 zK%F#TTcr$)4vmOpDpVY!NWP}SE^`7;Z*8EeRk~eNx$w=5_3VWKSy6!g&x}#!{Hs<# zo!(i<8nY$O4oc>4ZNwFxTshXm#C~;h9FdqoR}gZsfPk*73;{sM-3fb*M>+)Sc8|O` zdb#~j%*F-l;< zXfRDJgFq#*m?R?E<0tBBB*&aHWujDxNjH%Pm2T3QbX}#iq)q&4tq{k!ZLz-}_cr)H zWH>`7F+RjZSuZLuUbb5@r@PD|q*yKTj*J6mtxkl+KNe}7xIQ~g>x|Dm5NYq=BE8l? zj56JkeO7+D$P+tC781r!ke;2<0mkKuzT+9p+8m@I6%^fLN`Te=aE(w45G0FPBd zQ;%d~l%&l92X=3qNp|WZF)Rf3zFEEzcSDO#jfbNa8NGurpEjaL8%3Nk% z8&oE|=elFXK%X6`#b~82mmlu|%+=lj)lUyyBi&97@1X=xc+$Fd(u%I`*Czj6zE?Mk zS(2RNSxdf5>wlsJV>; z5!2E(Su8R7h|<1v9$&NdGBK?V7+54xuAH3+pfm|FO6P!98(Bsxjsq;NKJ!^l4++Wp zX-Azk;Sp@=A+nrXm|EdZ{&cgr(x=(Kb%eU6h0OWm&*#*P)xe@B8JhNIW;FednFme# z%m6|Jmo%}N5g0I=^Q3}w_Ru+$xL#SB9M#gpe$wWH$=)7xs~S;!R2vVF!DFqe?PEB# ztq%5Wp-Q5NeW1>fNibFFcAj<)$!y_>_5N}}$$!Vn6n@uTQ2$k_U+=w$FU#{^$O_ua@xxb0w9Ceq zRUc97BIF&wtX1}ECT_YQ)l#s_mFitz6JQA!`OHLUX9p?p%r9b|9rNd~ZBoJHoC6ui4N%N6N;U4y4Y0UxMLe|(kma4zRvjFxB&;E;lKI2)$jnU~A-oXg4%TW&tt7Pa z_A+60gHW3*Vd{#3I7>wsLZMHXTmu#@RMVDmGc5`p1YqX2@b}?hD&ZN!9a*V9oc}|5 zlnund&B^I)i8s}R@M>eUb7pW&OO2CiJ{eK7q{+;rOEhysCe3iUK1H2&`@tK9J6JXA zkkdh@9pU)H^Y9t%(v{cIAT2kM;bU6|Z?hX!OJA@+$$NPqB<)-DZz96=52wQvMbq#z zlHG#NDL&lfT4VCbpav*nHt)6g=uHu*X`|cD3zy>L+Q}=G%hZootpg!npm)B4@X_L~ zG<+sX(EYYe{Z*?t;2a8Ug>|JFsQWTLHO|{qw49ctP1UCAbt@bdG$v9|h$k%8Z)NzqOONeri z0+JJ!92CAFRr3uBbAsaOj|5)Te051zEfX-vUJFFpqPP9tmojD3)>*~6!d;5JzTyrM z<->17CVNrmE6RZnn7M6~sdVNYuFEQARq3Np^xG*ZV8-*3A1iX%z3jl43C+t)gILtljIP7u+HP1U?R;w|Uhsvlk%N<0~Au@yX% z>&sxdT)BdUeXt-J6pe(QWZO)m*-{OgK-=xUmq9Pw#Z0 zV9LM5#qoLYr{}I(;ks1*U4ED~_0RR&7Y;~>DRJXeWF(D;?I%T>Au%d>--K&X4zF!7 zQC;uLw1Jh*WXUG_`P`DWcY%PHCFKoCpX2t_pJxCJN@D2&f{;10g1*w4D8Otjv4BuPb*=K?D`X_=X?Ff;P>ZzkQ?kxbN?$X zqKAd!y~P$ufID+aLl@HnIU7dSR~M!O*D_xrmQqnCMi9%aLpvPJ_|%-|ZPYs4&A083 zXzmnTGC~n8Q_#3!P=Qe!j%siX=JI9YpN9axSdLp%4_k#E!geDMDYQ_GUj^pPImE8( z{kE=1kRea84M0m>aFtzj>jZ>0U)%Y3%$uPWXL1?qN8mP2ro#sc%ZJ=n?-SNkXmQ(E zXhGTb`rzf6ngCb)Kd(gpYkr6$w>1>lGu)pKwfnBFIx$2GZ>$TFv7Vla#Gsk4b>93) zb3i1KGL(`nE0-2+q>7s@;S!tn`r<=MXHQ`A4yK8(I&G~O!({A8nSh$OGl@Ak5fu%t z(pJu6k}M=UCLWgX3_6_*V?Obl`0y+lv?SuF2BtFBwM3O!@`uMwNPnevr;oh@IQq_H zeAjZjgWB0!r~K*YysQ=JJJtA@d+Eu{D64$D1xUu{uGO zNuWZVlPYt4s`F{a^00Ef0?O-Wka-rn2UXpsM|M^)xcS@3&RT8-QS=v-6IxtBsG4i$Wo~H7PsHy3>&tFfRmc zF1@U`pQ02!aI!htn~3t0!*|)u;1f(?lu^(6gE%sO*p73VT9F^8c*UaB?oAfbYPzK> zTTXo^D)OCARL87cr4V@dPU;zBbO=GJ*d-f01Z<%gkks(nbHbgMK{pkunCB@!Dc%hA zx*Fg}8tC3NQvD|$`#3C^gO;=J?Ez?{*dT(6`*ay7NrN(VRn#qI0m$G7wHBJ*psS>|DBn@OBx4ybKXSa3A}na73o zMq5xaI@x?zM-44Mhg6#4u8!akrr5~-a@`L7iS(F5reK^A(@W5G{^DJ9lZ)4ojd8-@ zq#GQF#fA8!$GZ3mI{n-$iSvpE?sxhQ&Y=|B<={Pgh07Ue%dTFQsMiK&GHb> zk(P+)G~u|QLNSl!G#@6mOOv;kCU=`JlNcR4yui7P}6_p+?t0O?|16&`7 zd{*^*0&Ru$EI~|0lQ=m6{M*+ zT~P!Ry3r!QtX$_|{>3b(YoaB7JE)gLd{*+ac?d6aBuvU)$SGQUlF1--ayUu)7rLph zwL!CHiJ|T?H6Im>3q*K^1=ZwzpZ3Fz%6t#y-wy;1dts3WBOO~vZMJ>x?F1h1djQ~V zHuFDCMMM}G9u~$Oi`yD*Zl{QsT0=z-dFiHGYP&DMp0))~Q#ISPx1o(oIon8{R3#SyKb3Gx#qIvRx)Y>+ zJ^W$=^!=lA-EwHB`@tU(J-W)ko@rk%R z8>gb|jy=Ja_H0bY&MBmZQI-aekE4c`ih3LxZ?tmn83x`1QoD3!B~^0pbvcwe#siS9 zgesHxr!Zfwj_ZH()Pr7R8UiYNa>xP5P0!kw2Gx*+@e1!XNEC=|OE6)?x?!b4;eOxm zJ)1VDUhc<`-pd-P1|lpB4$Z<@A0r*RE2gmlMB4jzqURF_&`ddvpk6T78Ebb-22T*m zv3Qp;8BRwc#yu?x2Z;E_@a~ZV^i2>Y&^6a#r<8m z)IbC4MEO900UO!dsKv>_^+l}&4K?UrNyP0d!_}ZJjG;roOH6AcR59N#&LE6~@&H63 zcU23NYQ!o1O*pZhENo;g1IBsa69J9dNY@Hr)w<5TPPw2wJYNKaS@Y(^2JH>l^z4=6 zhWICot5s<9?s2Z`KbCB;I>zUsmfrV%%FnWyJ0kHEtYZIG`@-&U$%P?z{n$YFy?5>rsoO}^e{U@Dq>YMbx0k|mmw2UT`z2@ z#6yhd-i=CYl+2f3*q{oi{>Qo_rk164bra8=&B}7#IilSia(gbPOYFM)6CV_7$SfBw zGxR+ZyGS;^;e0I|uM$$A7j=0*Aa$#Q?AHGEqdY4w-a@sgi_kb+6cODun3E)&)J!mB z>F^D9pLII_2!#(Tc#~3(bCHQ5L%V$YVBrq;;Evl!>cYZzyAj(p1=DI7g8*i_I?2_Y z;56SBO#k~#3r>13D3oQI=RljD)Pn2KgKjgC#*3BTBmxv#!~GQLY34d!shdR~44sNc zCr}~Gt5P5=9bCbj!NX*lFyI{cXcMV@2$-YpOr{;45nEu+S(+!cV#bk6LmLrGHS@|Q zL0J=gRJUuB=JN}*<3&P?2mjbfd4}T`S`s?H&{w{>dIY4aR^Z1hFKX||;<1z?m#}YP z<##Xsh{AsbP3Z20m+%2p7q0?XN&==|9xHeN0O|B-86svzze+yI9h$*|JJ%AcH~h)M z0V`Rz0;eS4(@<*TVosT_cmT8vB4()t`VJUq4&Uj7%4||d3=PNtD^RoZEiFsP#yUIa zARaPI7SoW>Dh~ho5{0FjC}Mt}ytg<&g?m<|II&elDj&ILmO=%4Ee-`T)9rJKHb8eDi4?j2Y#&;4SxjLt90arAjPs1^jBH0Ub5`2 zt8YsMJ=xTus5XA;(~IC})&jVIsItAy^lK36%&n4g45&l~Cni{?1-|fX@{RfAg8i>2 zNc1R%%BgxF%_NS4DkmqCZD@}A9tW9+3b=|tX$p4czd_YV6j=J`>A%eS0Cm=v1!hYt(#bW~f&$qXN)m}cBbb)-gWoVmjj2wc^WUU*HEqT_=lhd+c>1Oo1uo+LLN-LG>r-}I zQ@^md<+500biJYxXa#d)pHVYdN+iK3|G&P9YL|jjG?Qupa%vHX(e>`#jn|lfHHh`lS!QR9OnI zt1L?RU;b8celtMU@}(%fE$LU?g0ncZKTGNhzi8|v>*UJ|twgd=uzm;APtcaZ46^%4 z?`LXwQA8LtsPI?*Uyfi^F;6+rb{kp!i@t(�whZ)U9~#^Qth6x!)Y}^VQz(z`The z$rOJ-b=`iH{5}9z9^M##yyI_dvKc;KPztZqYDZNF$`J0U5;>f;c1i@+USgEG#@QiG zfL$YYflX|18zF$XR))ER~%lLgQ< zz#*I&?4biVfv|aei!sYbUr-qbN+xQiF3<0;JVLc-Ud9wlq}YG~)oZK)WQ*hJ{p+br zM}oYVZ_fg+32Osb&w_r}GLksiU18pR0U%U>7fh7y?*kx(p=96=gUvR?GqOLMEGLbL zY)tGCNB}bUYHyQT;q&_&^`1DL)>Ey+5r#=t=d`O&x!b7^VUfl|!VTK5EzYMLzYR1s z1W+Z6H+w*5gK{;*C-mYgG_Fn}xS!AA_>B9kfOlvn2DgR>kfv1X@VrP;W}E*+3bkXS zl=Qn)L0DTIS!$0e7AV^p5dv2m}XewYO3&RkMDD+<06J$zI*mM zki9Gg-R#v*5H)#(;dP_l{%N2;`lU_j0)$p)7lhEOV2b@K>MjOG`j+OX6Y(WmP5 z_4(PHK^P*zx&O7cy9`TRVE2T1M~wl<{H5Qc{dahjX+t-P{;1!oCwYLS#{Y&_em{Y{yml-4~|Tch14hb!o3 zkrcSQepLC~tsfk-L>ZOrKhW5CQs>w7)SWZ47ve$Jrt#(KODg4wwW-mvV=~FBtqzm_ z>OfQP+V~Za2z%MTbWNO;MKZ3DSL!YcQn{~mE2p?5j)DwE0K4rIu}8H!@!`JSbh;8$ zkr?`K3yfkMxVL2e^vqjDB1>9H?y zx75I&HWqEXeM%0zMdDcdbLr})o|Fz-(}p`T0v4h)e4WO`L;HW!a&5!@1AoTZHHEIz z5$Va_Z%PvL^~7s@gH^{4+GLq;aSsH`rIZbg{ly&VrpxuO2;YW#%AR8fL-D*HGgsu0 zZkC(Z)qmkRemY@Osxk8QrtR@$DBzUmeJFK!LWHvBTg=`=u6S&1hg*J!0=9)WCU_>_ zAs{5vQ%k9by7!XnOmLF<3SJkscsHajkLkW)*DHf*i*h?ZUt^(G5WhUU#*SNeXC-9qM6*UeG=Om zz;S%0?+7c<02cuM%NWqN!rZ>1O9Zwd+$-;LPpsGCX-85P^$V%xQ{^PYNl$uy#Xw(&dy zJ+O{h*w%7VN947Pyp=m;jXc?p-L4>EkMvW59)vhse^ymi7xn-}UO@4`;1i*Hm{ZD; zR{@fxjpdH@U)9VIf6mUUQ6^z^I$s?+=wP{uf0l3NE5wtO52z{Ad+XtBhT==sbgdc; zSsN_Z4=G_If!Iq!PXbxLP?KdaEXC`jE$S1R41F?@s^cO_;n0x2cQ>lcr2ZPztCdps z&coC5x8o3|vW2dPyi*eyA{1nx(oT&wrHNw4694M;0>&t7VNA`*sQi$0mm-x;gHJw_ zkQ&CiK*lS=>*=02{6j5$RJ=f&6LMe2x+`dW2wWPwRIgQ8xH^E+d4oqn{_NP+4`n@4 z3Dhjch{rmHQ$xYNgT52ur%wKYc~3v1MBiZT`73~~=Db%?EJRihSkfu;+S&Le$tG{L z*Mo>=^MPzuIVI_`cCQ+v#f(JLk6e*}X%bRFIx3jbbX@p2#M9AI0fiiQBdzL?yF3jv zdT=*tt+*SMt`Q)*=i`e)7ki=|9Vj}t?EI_km;{bm`de*Tnb*o%5AWcUeWe@^{ZsOE zP$BmX{S4IM+)-y5GU)o-OPo^iD9``7u$_Tih!quf%s!jbdt_VQPR`fmHr9iO*o!i} zQlV~peTBMW*n)vuF8InBY=qR`ifn!69%v}fL=Nl%^MRX%$w&~|1i;U2R!B;qbD7QcZeLK&JiOAa9ND6K3p& zM!6zwo4ftPxZkWFcX-cNPsz^M-*=$E^>Dqv7NpU#(IssnF`M`~^|hB0s;aTO;8T9( zI(0)PXYWLhmh`{n&Xhs+<#8DQc{piH9rBbFngM`Hg?h(Je>*~ZMmoREU@!AD8!G1M zI563K3>sUb*|Dy%WCeiOEvB-~?dBnc#p!3Q($TZ-V31EcQ5$~{6YrRluCw=CVb6QN zISIaJ_s?9Hlp5q;QA4?DT)G1K=@t8QNJNM=mRYOaCpW+1k)A(@7bG_Em1JqUrb}RC-eFSBB-qYjBBo z&g+^1=TD?0c`XYWYQE=EA})b`Gm@Lt`X|-L#)3eKmPz8(Vb=Hyx{@X6k%)kqcdIpD z&$Cj%Rz?rV_3HhY4I-2m@J<#(-o;`2gA-wteKumXmUbLFLN26u5ouY}iH(Jk8!vaF zh%ynPQsc=C2N)rfSql5eWw2#Cnjf_#2|3I1Iy#adk(G&>Um4f&-2PbPmY0LD1QwVy zBBX3%>UM7a(=41uw&VsQw*?epj?4_)hIi+J`OM}!p#kudzqI+PAmllRS{|| zxT$|%TNR5I%ulg0XK#4eBnRby?z-m^!{cI0V{(m&VUb~ zvZfZZhFADw9GXS7)#QYIyA-`EG2GlNc!`?jZ|5mfnL1}2S<>?%OzgNxj{vHknm&hu zT51PAd~8%SIonlS_+}p-U;jx9tKJhgNifQ*>uzs}BM8S57{08BpswB_Qc10My!y<0 z*U3unkZ<&L9H!j4izZ#Sz8MuWG~e&jS~Cquu2^R&`g za$A-oj5du=Az#|2Xs@;67qlqfl|P0gwj8_Xi?2`pa)v=O|;L+c`C ztSkZ$9-tb^c@E3f=xzk!g+qt`ur_3ME)jU2@3&5nj}dA^^>6?am2vSMYE7!$%uZiy z40M;+^>nfm$2MbYh0KBWow*B^Du;W7^*5P~oDIbpohC8!XEGjCBUD-po-cM<fc`N0v8g9lp_hsHHu>lSEU&S(q9SEJU} zH4Bq4(`ocB-=+@Ac+B>?+rmq#mgI2w;e}(cEZP4P>Vt38j2gYRA5Hh`*P}a45=qLh z$jKqY0raV}$_pE~TbGBFOw9+j9PBwuef}R`WlJqgxSX{p`< zQ&^$Y-P69mj3B^pL=nq6QGNnu9f+} zxU|MK>+V&k1Nl~0BW5~qYbKuiq2$lT|obo%V&yX>Ov2m87H z*~jJ|sgBZrlDZ}3oVnGAwX!a=Mh+nA$l#@Mn zpsrKblSdRIEXG2SlYJe};5sd6CPRk=C%iY{0KcFRLPc&Gy*|D-J0kyFA-dL-Glb|E znzE~!0KsmV=-B{LHcuOMJmcXLk1iK&W=EGTZN96FbF)MHkC4Zy+Kp`RTIg{$XF_ll z(*wbmjoznw^dJVSZ3M+Y;yuA)l`kVQ2hOdX*p+QNKo#kz-=o{ZdeHX!FbZcd3{gQY zYlpsfmD9jBe-Mb;Li6#zjg||P2nVZc(x>n{NAc=m@mB8ty7V|WA}=EZNfq;QcCDmc zEvMpWGMiGj??{3kuGQZVr)~K-=-6u+uEx^_$G;(uo@JwLN%-7b zS?vP?f}HxV;4CH0G5atXVWtvaVO5bzOL~PG(|>k$l2w69BBtR`Nq))DB`(fBW7;XP zxIKBY7OriAaO*YQ!G;{-pX6gOsV$%yD5uQ<-MI(|o?iHWzQ?f=&E|N3Tx}>IvvMRHH1~;ibbeBvZMyEAGynu&u*g^v4%M z!XaA0yC$`9p+o>eefzh<1Ft>buGPG^1!Wn{y!^qv_Lgy59KYa&xvG`25p5SUD%9D4 zi#)b0WYR1!uy!z};*^BiQB(MTet6RrHV8NPq5ou{Ip;8%-_brRk^7|NPC_hWx(?;A zf`$}9d|K)Z7M#L+J8E)#TFN;8mEDMQEiE`&iRW%I8w`c6#bHs&V@ltLA1b(jduJIz zKLuL5r6G9Q-T<#)X{L}cm)diV;2Rz!$a2cQEOs;!0X5{~Sl+c=+uW<`iee=*r1pPD z{Z<%ujDe{qbQ*|2z?$Nu8|t`jsgI%yTh95>2Bh<@1>`O=wW*jk@8RaMm?O@1nu>|V zj3*_W#Jx+kK%`Bx;W(O(yXX1x07pQ$zf?B71{jnM7SKNOKvN3Vkh?Odk5yUg1secf z!Ps8^TFtS?MteJ#4`{hW;GAntB~LAbT5jV6f$E>87gac1rJjTREgCRB@u>@{lbvfXMRUkAdonC&m7lnW z;^||iEyCzasz`i%QXwZVyT$kcFSL<-OG}P-dPJt6>pi|?6C#23fh5P=XtaV4_TrWq z92RI~E+es9O@7W%NM_;{_Rk;ArVlQSqb5spc3_gzV8hn%F;Q*_T&mb>Vn7DYB{lxz zwascFQ)uZKea1s_X$e?=R2oO*QjRr8X(6Kv&#*O3^h~}ujsJ3a;giRN=-4RUl4uc?hdzz$E~vQl^tEdY&E;!pdhC^HfjrT z2sJBxpwMgBdK6_uA*4Kau@_T?&hFp#Xjn{IloDR0Yd(dnar2CGm!;;g+j#Jk_-uI7 zWTcq1RXgIqmp&GAbSlx|upN$`DXNo$;hI~PWNWi24-~!?MJt!r*~(A^ewpx)y(m5ai20pO?FJXhE^<9jCie#KcL$CjPo41Bo7TlQuJuY0 zNdG2x)%Ih;{eIS4*6Z-SZKQC4%6}WZ^X?B>b9JfN=_3C0jSXToBYJA^7tEaO-@y2` zVVTHih8vXv|2%JL#AI10o=G1mf$5N_t=5NOmKI>BVxtOpO@D+#ZE>sk(OeN>{SYb~w;p>Jr%Dwj!q00=56~(bg(9U;)oa{oo|aVqQG#A>jIXq|+}RD#rU^rCSb`Zdb$;*ge%L&ZHeB&gzT z^$0mcXUS_BJ;&8v3(R!}Z%SJ?gK7&WoWVC( zd{CM)>aIk~tkj|n+8ZZpOVQW5f)VM1w|eSq80O1_95~uUKJ|kX3gB7sYO(cf{s;0_ z{1l8S`r&cqO)mJ~J`-uGrii*M;{I9>dH)LFK7R_XTPC=MwF7GhkDsNUJ@faMh8c86XEy|k)r_!W5c~Dy742r5wkG>TGp-YB=Fq+RFIFC zlTMT!YaOY`_Nn$-64OgrP0*M$Hs?~ol_2V-l9YJEGq-T`59^mRE3~n(dnM|#SlMWW z-&Ine5*-4n()gZ}G;O}M zW8z2$5AK@t#(N9exFg$GU9pIQ@km(68}Sfy9Ob9z27|JKm0*BudI!>DPe|vo1TIb^ zKp%YtK>L9zQO}maP=lOAs;j-{RC1BF@p zqPdbWXnc5L7Gdf=i01(FVKsYn75_lwi2ii}C%qhwZF>UYrLff|#L|U6dru%^HjD%l>Y?}WV0^cCoA9wn!%nj zdVntW#u7*6$A3O(C1Olz+tF6CkC+w7VQva#{dGN=O8I3tNNedbQ*qfthVug0bu&6d z-k>OG^4YJCjxRUk-p9}_+qS>oWITsgxC3OWK;JzVOL%egvo5p7sWz%#vl$i@+h)}i zy%c(44<>DLOq*Utk-xv^H)B7JggdlzVGD;K*6DR{cFb`&e}Lg}R?XA?X2JgPWuSzn za!RHv;qf_Sspi-F@*|!L@GAe11QZ{}60wCXyhbKwBg_xizC7BP!6yKJXz587%FYjB zsw;Q-!udc|PMY>Pjrt)x9|!I@iA8VACRrA++1Cc%P5i?SbZz0yDk%paLl~@~N04Bc zqbk4YRkvCnu(Om%6;!mcF$pa+QENDGNe-o1k(6sk3U%Z#{cbL!xH)KTG?rRBoZfAv z)$1MmunQl!l3H}!;I(?5kQHmlOwIC-CMpg-u@G#BHL>Q#1^-9yOr$dlIkY5}GIUOM zP8!CSw`x{4vfdsDU8g6>9THXW6WVR+v)FZNivVRA^tznc$>_XC$jM`~HFymgi*4`q zi?Qlu{_q)2_aLPkm{!K6duymjS3>YUhkobF8(dOslMsF)pm`juYmeW8@xfh-HG#8b z{^N2=p86rhZT`k1=1<#UzH~@GO=L%9y-1x^Iap6p8o$NJUL_|JM?fCS(xzSdE_s?%(Ok21Mmp%loD!^yxlpB#&%!<%Kwxhh}7=wm`fIf)Zq`Qooz zy};k&wKV5&<8`GgxDoIb?%S2`XH!;t<>M2y>C`R@L^|9mkifK}b}{;c(5yE&F5@OJ zs7#B>7&u*%EacmuXfdlR+@onL<+TC79po5`pZZvzs~S$JZ35H^Blj^CixWScDU)UB zYIM{C#!UK#P5DKcTgk(%Lhfg}uO5dxgU-t`QJ6ptnlmzLgRKd1+}ky+SZ>y!rE#gydH9-Hi5G4kmmOi;ZhHWe zq(OEE390Tna5ltYFw{SOz@AWrONs6%$rVH!)yXN>(VA+LD@$M@cX(L<@k5n`?pfRF z!$=V(y|WKEuFdvyW$Id1FeC%;&~=0iDYo;UT-hA}1P;K-RpKy)Pim6*st5z-Wsy39 zSKe)Y)fiqrLzI;?PpNBn>(gHmM_V?$AG$fq#%gWQs=pdB;9cG7Sy{n@R+Vx>#bVt_bGKV=d6hK`|C}sdC2_@Eq&@ORItx~}}3mP@JE|8eiPvXmk z4blfwWs7k*aa665`WOkGJ3jm@b<#+aTOsB#6a|l@bg4Uy1)Y<=U7*`-aax_f_K(s6 z&3HMC0br!%3%>zt)!(BZG(XPke)U}W@`%l<3kHuVzWmZmAHbt8gM4A+q_CW8E%vlf z4J1@`1?HHGgANj6wQ(qeq`ixqv=@zyxEGeKvmO#~Tc4B}?m#lP8Zz4AA2EoOw^25Z z=koe0Sz(@0oo$&U$W|zK$a9zLE5O3(?9({{1w@4A=e*q9kN`8Z+{Y!IwI+LJ{CMjV zMMlv;?wP1^Ejp(w0lZQMIjJ{s4QW7w`ooA03~UcPfHx$Cd|NLqo5LQKi4;y^B(TzPGB zR7Nf|lM+v7ORNz9Uk#r6@P7+3Rv_sI1*&@s+rO@%dzM5iPCKR{%EWMvlEv&R$lkw9 zIx3LLQ@79|k%Wy4K=0|zCV7h<5ZEw;5q8~uedCY|l`aNGvdu4}TTdv9C2|YtSk+10 zV|0~O{`^u~9P7+&*a`VB2x@$wVagisolW>#7T@r-oit+ppjx+DgwzQ7@0sioTshYa zl9LYLi3%r7HuozRmMRyuw@r|e5B>sL?xbnq(w5HS^hXyR2B1ZlQAl-?Aa#&&Me#jm z4UNOs<vkYCWXAu~dr0-fjmiP9e8Kv`w=n=(jQ)mfO;m0~52%$^+&x|d3OCVe ztyocrn6_RUKq0gS`I}E&$RK5vV{p`rxb)kyDPaz*dgOhF%NzGDb$gUyVXI&l^yshG zPdX1pmUgVaHFhm9tZIdkp1FVn3ci-pz9FHvc1JOkPIRcF{z{KTu|ktJDqWRUyuL-H!LUG3FR`9qL$lx{#d9X64yNufC7Cgo|yq{Azmf{ zG8p~9Gbn;c*7?*Ir0@MXJ%9QEPHV~P1Dh20Z z(~>}$PV=uV%lP67_fRJ-iI*523T`VAA z^<9No**m|LhIHvH$wLhZt#h53xrywjSfbD%yF9sv2|y%Yj}-)_z|Y|H#9(2oh9&@) zA6~8W+GJbUp5mtrO?9DRpJAn$;=)N+0Y&6P@zMJbG>s!8`I21sJrvc}VxME1Uj@cH z)F9(%pz6Aitl54~7*MG^*TttNb*tVf2#oePU%>G(1)UvE*J+<{wOB~_6q4xhJ>R(= zuLXoW^qeOpb6 z{=yAsR8%6VDjNI9)5G3*8PW*Vk76ZAh8-O=*2X-8Agq_R9TLAp18bxLnYqD=sR0%) zcWCouO*0&gmB$!HJy`m~?5kpRjS~#;ZhRkt&e(v)9PBikC+3xV^8ZbRd0ZQ`FII>& zn7uUy{M-?G1|SK!(sf(zCK_i)9am#8ma3$3bMB3-hzdzyk=wtX75U^wz1Za*GBa&% z0X!g?LClS^o**5vA33IqMlRBdMv`k#-}=-<@_Tbs16zc zaGo#ulb5|&1h)%BMD{Y8+9~R%Z&6@r4?V}#-+V@`e&lyfKfTNI%GQ^AL?*&&bctYV zWK5|X^I4AC7@&&Y*Ya-|N3a8dZKhsttkTE{9XQ@m57@!+KoG82)^>`S+^F1!<-gWW z%+qU`uoQB|{-H{X!|_RdNopD@My1LK_B@`CO+?Z0DHTWZxbuNcwb4k{uPkA6H6sf%LRtuT7kH>zh5=CsXosUXJRzy z){=6uklS_0Cc)CvY+nM1J(c7@DIbz`B@(Ppt+&S0ZveOc=;2tP5Q1o;(dSdzXv)q% zK;NaNp#t!P@_*oHcTT1D0&qE2G`#V0qOdgA<#w-YUr)Fe@8?7B+$hPBMJt(zr{iAX z02&3lBgukkGi$mE<2!1pw&@MR(zDoF^hjQ4JV^n&9zC^LoFTotkvIk4x8w%yWG&Z9 zxrOUQ75;SJx1bL1&UWnq>iEv=>G8*dKgO4PdHl#xu zRWF^gN&HkupVgsju7BZ*ZB=UpJRAO>6l6!H0Yk}okTny%V zOolx)1#_`;(`$nraH_=bm{5cD)LPCIH^yhCTO(lpaNkY7K)+;f6uGu)3vIa&z_H^x z{SdLwewLqy9Al0d^kl0A)G{8722y2lFkm#QQ%-!fxHcy6!@i+1_&l9I6UU>TSX%2# z5Zo`iqK<#5_E^N@`A*YaXxcs#bJ8m<#e+ZJDh-elRq&MYi$B9EFs>j<6zV;1mCKdn zV06#@c?{~&s2B;nI%+iCvNgrKeGoM5@`)z z^M`zfJs4<*=Y;)8jAL^1LHZ1?%#7017czz;ht&Kqch~nZ87`@nT5` zL|whV)B=6@6(o4LaBwX7KE@?cGD0q&X59tMifygYv(;&rx zuu&#L`lf2@YwZa1_F3o?N|R4{_mixQP)Q}OWq|FSh^6HMTEzOTa&hb|;fW06K(=7G z6paKDS3=ADO8=x5>f>kCkEB6szzaVN!t4yFRujmOW=EScI8avw=ELbDc*x zwR2QO4n`|PGS!LuP4AWW^I?(?tYR~(gYLbrT=jk$`l(;Rn3iuFieVq-bqw7wXVf62 z*@R|dT*Hh7o=-x08A7} zj6TZ1Y1n2n=ur1~K)vxyWhiO2Tm_%A!mlW|PrLJgzgEeTwq`KO$^DX1wWi#_*_`l`euW5wWmaUW}sqSJRPx z{Q>>bj{leDX6+0Smrm4~_s6X-a^hT;QORiH;Bh&WW1Xt90PHx^rdSAQ9vJKxjiuVl z>$VZG%yQ|^zwOA8qW<<72Q>E`WD=Ud;!O5|?(J~g>-BgBfqjeGSZIYxW$O`{BAi(lRe=DY2sm17&(jbEHD!V35)9Pgs6OGqvV z$?lMiMt_w@WF~7DX?(pe|GUfZf3E@4ow>wBYV(6BCNd`Bkk(OKnb7)JXu$UB*gFO# z-!0K^vvSe;tu?;ORV$q+?bE3aW#O6Grf-X3Jl1QGXa)?MQUEGDZck_tdg$OSf}J8^ z)LMv`&ZgM%u`9r1g6S%NKo=V-#0HIfzsn(@Etd<3hn~t%2WuMQ$E2Rmvu*-ZJ3X8E zJztN>2bGJeCf>wsT5Ua|CphGw4eV`;*K)|0a9Uns|1~otafaw!P!ZBuBTPuE)1>}n;jOMJ&KnidqpeMj&sDek>`Y(j+ zi)CszTf|-z(I=ECLH`1UxX9rgE|QJQNHyC+hWylDt}UIx6%L6!*x4H~9WqT4;*Y{5 z1Ig9>M*%qHFdIZVVisK5sVh{N|3USsdFWB1$#9(AZ)`i2sttiI4E9Fcy927nJeySy~`QCPqllXn|s7`_7t#8~o7MyUcHTv3_R*TCr9B!V}ooNe$$?h|?{F5w_pF}5+PSfl~K^)U8m+zk?cwg>zt;NYeZ@g=xvI2#7K^cHp@qR2qL?C|8Ge zF?m@G4WUFy4JA4k)0b!Vp|tHQ2;*^8dHDOGB=xV3*WI#yk29I<7};^EuNU00JNqKm zy>@|QYdp{_49FSs7D*;HS1EKr7R^#_&mB z_|bjau2I+>(mecJ%(BT;N_So-{SHxjA5Ft(`J&FpLl+NLz0umgk$yP+@#{;zj#DNz z6c3ar6RF9JgCB8AUJ%0O#tvQ;B#^wwjrQE6e(o=#?k@ zWYT|MKj$iNSf#`ViGjTu$p)1dy>}7uz4kq}tb|&d za^dv_hmiE&g(BLFMDIk%=LSe&CVko~PMrHu(v*+7&V;jx0%8`BKb9fxKxClRDhsbm z7M*brluMUQghN~K6Irh0!dn&R4OeO@4=V!&0fNqif5h+U+^_w25W75klw3a`3QY@u ziELwXYx11=<=IFXF^6RA6*q&iu3f3jTuAC9iWY>A=*y7rDXrOb1~0G{x1}UPxXhRtDOqpRQXK7xEnD!)P*UCRLdULcCP9TYTXatE26;`G1&(1z*elv z&-{&n82Gfipu?lr$oMp{d4!0gGphGIgc946bOYk?2dy-wvjkTGOw-Dn~aLQbu^Kf+#h+@}5( zImhIL*fm!&E2;OQE!KxjrJ{|8%hAfw9T?gZvt+n6oK>{VRq@NGZaR^B+$ynFT9CM< zyd9uG=b5{CQ>LE~DxRMZWt9EOs|s9F*6E#UST^E_7zZc21k6baeY<1%kBiI%6K<@r)uHXY&Ns$rMP-i~2B zX##rCXYFnz;KLF@a>l)Qs^S_T07iKMgl~|f%+UpXJotMUZ}v{>Yv8QpM*$wZUC~AM za1Fx!p5nR+c=q4J;X*kk`d1|K2tT7QANlx%I*}TpV0)lX%{zd|pWLQZO&6UPyk7X8 zd$l#*F)~a59<405E&0oTMO%5HIwxB@q;wIJ^Dj7;&Asu!@q@hgRT04FyPzQLGFplV zXV?Y1bv|~*wl1u*I0CN2htpfomYp$S`4mUqbX=;n_h!X%kfmRgdc80KgqD@&Ga&9h znCwC>X&UZ2?+qr6$|xkvO*WOT9??$)tNSlI2vLJF`~&dpmhesCE4`k^_lOQf<63L8 zf!?l+$ELQ40s7&t#f)NhWPJfplfaVt$_b={=%h4ldKD5Zh;|Bgyqlm8N^%PTum z&?2EoQX=|I4U98YcX^kGFON%SP;&IChneFA^)czlwq8@b6;vH6SXNr(w9x|lWWsRPeb2eAcu(V$J7(^}yVjYN%Qj9aw` z%qE;9yhsI%8|~u{mIdy%T-$~^bHz=bPA}9E2qX`AKkR`v3p#FZs7|m2KoW&91`HOM zNMyNX-oE5as?;~enmxw?KO@SzpnYNfxV_ykCH#s<6_q# z_zZv~PnAm0GfU%RE#H;M%bG zdoBeUFa5wOQ4IKpeU2pB&sNn!UgT`@-pVG@2uImniM6tYNsD^XHpDS&4?ER2mSvbL zC-GoJdm3HDi5UuK3I=s3LYnt`5GciYrh^s(2W8r>m6ECF*zZ6F}0>tZQ{q=dOGRzu9ko!?c^QQ>Tw zP^k00@FaDcEUj_vN{9U1c3h^2#PEPh!uvxfLrv#aJXMJu^2XMeB`yVIhS$fKcst>j znJYy2({<0_(FD3QNKNuQvj&;0n#;JA-fUTvBwz{(TnV-R&KwM1(|LO-pQSlF)Rl3EoABIWA@I zMBYB==~hVDDxYAvv8Kq6n2`mTE9ZVvV>c|lp~JsE;8~g-f-)mFicBrGXA2p=({NUH zzz;ZZ!bRwAW+8CjR|9ZPLD!W-sj8e`_kNfnvq{FVHZ_&nTiJZVHfGO3K!V9XDPE1P zUfkEh`)D+|5u#97Guik)eq09pUS5iffo9nB!l{BtP~5fU!bJ;}&%I~=Z`GoKbyi2C z^2rLQifph2|0EG#GW0KjHdr5z3pG<41IEw0c{HMm;sqlvNIDIcCeEMY*8=bpM!E&D zI@FVmjfRQL&arxC@m`^LJvAYnSAl~MH?~VURqIKYInSpsykN#-T|1Rk?`aI4%)-iP z-;Zqwu&_p8sqr(wm@AX0|Ek51A%F&)isx(nU#Y+OCMnm-1_TJM2RK91-E2x)jj}b1 zQ`88>-9Sf}K}Q(hxQQxIBbD551a2}80gp~!0=}J*$@B5R$}r^=V65u17rJkH21D^1 zFYF4=Z7gwb66||6(8@}HCl$%*jRoXz`%<&q)`y5Z_3lLft%t7qD3Z!^A{^u zi+BANhhTtKb@M0C8RY@=AmaZ@y%(1%;=SVoBV3aGqf&ftB8T{2gm$`$k$i!Af!gd| zviq3bp5_8NK0alz9;6W98d3kRSz+JrA5fy|k3yYwzer9A=ule_W((sRzaZ599SZL{ zhUcsHpTUhBBJ=>Y0B`7{D01_2*@0ShB`Npha`q&3q-C8f9ease;>G?KwI7OE3_Ip- zD$BmTj_{q%Hojs!sWIsX09deSER}@-7W?HS07>a*;D8M2P}M<*BTE`mb@1V1z)s;P zpGg>XXa=-dI{vVgs8dGzCAl+{EbdEy<>=b>iW=m&UQFRECCpsDDmx6uh)rjY@Kx4! z|DOPt?=87pH+^zK2or<-3o8$3(PEC>4q45+4sk8hTo%fmUWb!9rWf1iR040;E3lY^ zjLtf)=nA-GQ01X$xWzRzEHG1+algXuRr{3W&LobYc;Kq4csO!K4YH2EVI()#dm=`+ zNc}F7k7T?>5bVWRfX}&j1&<->BvNmof7}+vw|Y4}snx;Q7c0i?njOmUewvs!T}Afz9|FrMKfV*kUocrV(e-J7FM*kB>Nehe zUVG_G^LZRy5Kgt--cK@5Oa~EOVGWp(tL+2d=_5yZG0gaKAX~fQ6DQK?J~RN&o~l=h zoFzAH$1lCiEN!fI`x8=3CjyF+7K4bB5wXNpBO8IOplssR)g%_4G9KN1B@_!&751gY zg-0r={NJ&rGy$~NF)5f|zY+9^2G^hAjT3-&F(ZL9%z)KlA@pj%Q6}Mf`pJznBi_LtG5b9Jn7& z1sN{rV9%avWTpN#-d}^!wm^>|z3dRDPLlp>k zm|$%yAOI|ayWD#$MSv&n1txjq@q=k*PVJybW$)$3)DNylBfTt6v?fHr#*nemNh?c5 zK?PmFbMjEtWJw-sQ1B3qP$ck_@=^WluafA&tP$o;Z0c=uxMP&1)JbTLgJ4P?IFDWC zCcAb3|9H7d=lN!Zj2kN14}vt>-~g%l%YbP}#Q5dZo+q<&Pv=9xz&W3JS>VH?Rw+^VmK@^CY^8dW9bDE;(OY;pXh+0U<)F)_ddZ9Fy3sD&${rA20$OE6z zil9+*p#dqYsYN`q#PSfE{7v#pIyi`SJMn9d1HY=lx%fvdylNkNZP)QJNWcPP9L|&YLnzIy9h_r**@Hm z45ZYQd)VBPlplkyay^*54BLLcHDjRHf$(Fmw<`r+{~B zi`@gU;8SLBN^+6g;j(N1W3&WE5-k*$^~`} z_yB3yG~A#UKKE+UvG`LX_kOpE|HLJ zwD8?`Qk*_46l)l9((U{LVVhV}@z~lH$x07TE0as?EBDw4smh7&=iEFpAyHsA zi*gvbJzgGK3SS~I4PKAny48`!S7AXhVxh3&Y^qIAgp@k$Vm6>Eg506=N%{}59$@_Br`;!m+P znLBKqfA&1XhN|ZQ%d11n$QFc<)pUrqMbIYV(V*eHM{42|#(a8xWB@7o`wcVib_K1+ zZxaQgJIp1>bGSCqq^_K6%0qitnZP0vK+~GB<(F{2q=&wAT@3QEm6E^h1ZB*eMjbuo$~H1?QMSkA!W>X z7Hry|PO@hAEOKA<&PgguWwAKTIkfFppTifDr>j1IEgKll8!{!*Qn9CIwDK%lst#2D z?vO6i#s$(gOZWMSo-4fzjHsR_P1k;Bf9=2IlR8uODM;t2*tzP@9;*%Efl&VuZ#4HA z!FqW^c_?dOJFZ&KgKn3a(ZpxY#CXy?8}>*9_(>zx!q8kgm_kH4fCjkqk@O@Sp}Hfp zrU!7Tu}YKpLIy8c!<509{g;_jq@l0KRI*#*am<6wAa)kIC@T#RZk|zLrNpf0;mj2V zTp0`Q6dzIOqAOx281ZizpHb9S8=bGD31_nDO%vlJz(#+Mc=$3mK$0l`m@Bm_KtkqD zh=5HOOg?CQtk#vm+^V^E`EOzfSu3q%ba65O{*A}_ClUtlN2So=nV~Z+UcGM$8TWGD zg@qEzZ6AE3(C>qO*3ce0XXA|upJGw5-Q=e-4d_QOwlJ%7e&=!at^rR;%<9!210bA^ z3G6s__iNmMW`maIN*1SoP6-IkWV}k@=ay!i7s8J*&Qxl#=HVbIROKgVc+jo-Vh>pN zP8IWY;(Ntq1t+<^=e%B2f{({W#4#{IA>V=Kx`++}vs=45YyNxlYNWrqpppwKn}J8ROz;eZ zSuteq(s<&sdRs5^S=V2QqQj81BV#EKhf6&E!~6!>*x)t}-b{9_Rz6+ag-Pz00)bTq zpGmxYg-UGqyXk~^B^*{)sDxPtl3X{7ZlBlpUb>8zhdM~BqixC8 z0x@pq_c)__|941`MENyf`YYYpjEw>>puZWQl)k;R{kUPq+(nB+dy}0Ficg3K8UfaG zW=kVTtrqTl81J*vR97M~cbzJ+$-X$W96f{;-g`?CV*CD{g7 zM>ss?^)agfsYy!CI_sc!qw%-l3y~?5@gcfys&faFZ2N^Wf`D{3KDtUr(0V3tI2~A` z3F6MJz#OPABb0L{+2!Gk2Beo;-{)Wk~{d2pI?P%Zot^tVTP12jlKQ zs}TisPgY9uWg9;r_Z_XV0JDe|DMP(~A}g?^Q~AteU60BbtM7OAXl7Gg`lmgu*LY*> zB(}!2_v>KE_hHPn;gBXO6=!-nQ~h4bnSyzpeLKfcb?xP7M5KAk{91!kFD!$(o^kXc z0_m3lXs6sg4pjXuJr61O;ntS%OOm6kwtdKcq4~;j1rt*S$7e(Q0608vnL!h(=6qR! zj=+4c-}{r_v3YYPvfoRpUg)x3gvBrPQh|-;^>(~9(U;>^jr5~fa;Ez3pJzoA4 z*su%}mwRcL$o1g=B*O3|(z-Ga5DH7kFjjqCX|1lD2Uj>hw79luKG7_Heq~o|>&EQO zZ#vHJS|1Js%9~D55rtWo%CV7=1MUw9gyQcs-ErK)(yl?1ZYgtiyfHmpj<(z9hfycl9^qVYu8> zX^7-8t8>lHiNmB*c4E!v+DSo%@7J-F6+g{Dw~Z?0Q;HCE<_6`bvzUWQjG99X{EJcd ziF({$J5!d-_2m^C`cd0|p5i?_*q{^0;qvXZ@z(vtT(@I9!z>Wb@*IAW@u{@`Iq$$) zG1^!pIW(n{hpf-I6y0`^M6ZrFIAL9^Cwj&8^*0ZXuz0B3(Sx0^ceXb1@88==)a}%- zM{$qec-F1gkp(7TMOm zB}@9;n3xQq(85uBbs26F7+?KHrl-a?+L&V~|&UXx9a%T?RrnQ_zftl)Fw`stR))<8MOzL1n=mR|vA>^%3vpRZ}snI2mQ4l1vC$W_W~jA`JTN z#DCJi$*0fX@>?N7>vY%PDq5LoQ2U?6KtUaPO3zb-oV@pc3qf)xs})aLg6H4|bSoR| zR+m)g&{6#;pel^u&-X>+SAv4<&nI1esD;}A$X?p?cpbP=X7DBcm698h;7eG20!Uv; z!Cbj@tGL#`Lp+FecKdWYHSlu>uL;-}UFT#t;l9Y#4ak?+A3p}_1HXbH`2P;ffiMvM z<@T9RFKiX<9(l4(L?eYNPmBli0u=RX0-TRt&A0FOmi)t3gvpCXdo#BbT|526=Yy`X z#Ccm^=v#8wvS}&v7z#CY;nPAsJ<$$Mm@ku?4!3MrBea!Go*QXcoaY)Ub6ku}E{=-Z zxZBje5Oa-$X-fn4>>PVx_{gRu$@B2v^;LK$G5IHn2fY4-1dzUp-PBkwml$?kdsD8I zC9fiuKm7T3+ConYWmphz&L>z59XkmcB2TSlql-1g$OU?p>mAj4AkAV^AHA>1VM<7T zXK~Fhy}{W29yr;Jb+&VTen1r0un`saEugR)2~_R#$C2qHzGP060$bplIy0;G}sXJ_e9JR>{gO+n6907$@p2>*M5b6Qm z>a({(S|pBu3*d;mK#&?caX(y+3Ln?)DTAS~Tswk1Wr;?MpHm#NQ_Vkf*^(J`n@pC0 zwa8#mGI@NwHq5IAxKzWM1-sl;4&*4qeVZk}QvP3m_*Uy3dCT}P+8ulPqSdFY=>cuKo`_rp-T_B?mq?g@QjKxvH}5}Up)NP77dv3-+$=#_ zFm^XbOB!6o&h+KAi*cHV&QT(rq~HadZ%$WI*>!LeMV5}fOC5`bx)bN0U7pX`lYFNt z|2HX9j;Y4^+Y4YF!n810lRd6XK2yZ3QO+K|*MQm-z_K#&0lQ07kgMJAAw+{?L21Cs z!29{F#_+9E@KI%@dW_gU;IOV#?jLMe$E;0Jl+|%CNYM;Y-;+C%oQ5&8%uK*J;bXh(ci(~;UK7?#cBct(lO;@cI=ekCXHd%2geD4 z*#3w+`kQrBk_3RD_W3>)bSVG{gfF0%KU2rjUAxPM$iO>J7!cyl`on9&j8k#!D?l2FD8o-sL%r&{IqO^lg-?8OJ575ZZT@7eoY%{C_0@Q->2 z4vrhth;KQvfq!goz`kiIRCKzFp*o3-V@F%zj^QTU+#bo ztC&&aCb;yuWAnVZGl+@IatjNn)=dq=XRw_NrMoIom?Rv>*poLO=@n>Lk9S3YH7cr8 z*=Kl#&@f!8s(pR9yB#p=j^MLq7FiWK!!#5sTdpa?Z-as01LoDxwhx^mIc|PBjD(QP zaQIzLFMgbgS|JM|0ZOQhxL4V*9Ji>as&u;U;Q9a|UI^;ibH;wgp@Arna^6N<4o?REyvz35FGsVrv8yAC#?GGzt{Draip1->d3p&pZeALuibr@#xUlcus{9OhpgQ; ztC?z1y*=3brs?aYWtb0+Uqb~qxBaeae7Dd6i}UToPUSNcH*x>ZQ&W# z_$ZL;yxKqT00AlBA|wrPPdJvGir^;2AxMH2vsS7fZ{&L{!hNQyCQ{n)(XV; zV1~I^F2ALcP-`>UO~>>v7MlnCP@J5sHPN?oaASdhl$(>DKjX%KsMy_i-5q9V z0Apa>UsI5_%LOHbX+@_(sZrT4cY@i7`WJKC^2R2G?f5@@m?<`-%x8vAIS$juZ4WC) zQ=xKuK?6vS7{zFg^uQCjMnDejwv)x^J^9dz|J|r7^C@RSqq~t;W#8g`PM0GNSd`SA z8Vt(nD}csLXFt7Nec{w$BgKf3*%wEsov7m{@c2YR8;(!7^N2I70m45r(fXu5TwK7{ zVXlTDJHF2zC0#OCmRY~4YEDG0i>$3?N|*t5dmq0Rt%o3aA<|sEQ1|MnHRmQQ3Mb?* z+x>5&!Y-UB`0HI27NXYit18C@dspHJ(SjTvTao+U7pTC0&=Y$^4HwbS$_WXs&Cx?* zF)efEUL`Eh>WKN1cf({p2x$_V<4X9}qB+|ghs3~7K6c@&(pp>>V{Of0=Kuiwd0LSB z`^f?XO6IZT7b_|RL++G24k^eOCiKP=YEa0V-Y$jxLhPZ{I@;I;f#Ly&?I(?nH1W~6 z5%E2j*sk&M_P{9FMD1RUds5rSAJ|DsBWmwL(W&tesh>k_U|&EE0J_IzgJOQYa@Pc9 z3*OF_*#ndQHrBO&pDL|FVJO4KUHm%y(jvS`dJeQL%9dOuU4UhMMu0V})&f?J07O8$ zzu=_e!+-EufOssAd$%b4m5T>B$h(AvU~sgzw}e?(#tyZD%JG=b7o~Wu834%;`|K;rTfiZVwtvFrE@jqiPKbOifGM5;v9A2+9A3pKrz zM=m~h_wrWO%>4=!VaGy>vp-P%zktaovsJS@K{BQC*HJw1D#5dPPERaA(Pf!26r8m# zn{J7+P+=65^D~~Agk4kY2ran9TbP(_E#f6n&3Pq$32)t-{WTRT+U3h!#yZE8HgMS#FfgGOwzTZ_I=Ka}{jJK7i*IOy z8|?3j@f<2kwWPmB@0q>v^X5y3^HVr)m*6K--YG5(>S@jpw$Ky0arP9{$H_-k)=efT z>WOq$E1&;vJR%j`?s*mEIiX;-}>@f;=iFRhFWTM}bk8_j@273%zBjt9>7f?R4R z30Le4;NtK~l%~TjA}iOyXl{!OAXv#OSmR7|ENRX$x}`ciE)CHylPUNIZ!P$b{xqFx zQu>(g$Lpj;UZ4!|;bd(l_ft-gR^0JT0Ao<|D*2`sZ!@M*JcOq)OncVx z*6?d+i}m35E-*6cfkC(h>t{45NL(}%rYtu_4h`$cKCt#uy5ze^g04H zpCWSe>BMIer&FFLC^h4>ucYS_iM4qGT6{->FX`^GI5Q^gY2C9$TcB? z`_{SGOmPMP*;`fZ8i2&n-=Fy4A0<_z{W>MLy~V>EVXCd*88ASqwrlD~?UL#clI z|34$|12C(aw$yM54Jvsm*#7C=j@2CJ3y*BJ7keekMK8haE9v1U!{$L(IH0>R$u@6; z$86cA4nju2jdpgR8*+iFFE)Fnx?SR;R!$^J!RD(qaGte~1fjKwG#*;_@{Xp(6|EDd ztwys$!3d4XH@>$4vXPFj^mHT7)YzW3_*E3vzs5ikLF=qv!6~wD=fz%IEFlGpx`9`; z0W~XGeu-8uWHV@LSE%^Gp0kT}gtCEw+HcI)vl+hV;`l<9H6l;Ao}O~^g{S_v>hmZ4 zWhhoRA1iFr`ybT2a8gA0XoAW1!Hwj6pFov;t&K7sO9%l~O=eds=(G$h>v7=( z))E0;2suh1n83Px)-hhfM-G@ajK@s;KCF@dplYvp8j;02=j_)|zcmpFJ#)|W=&W|= z4RrnMvYE#q3J79PrCy(2UAgWJ1rSV)0*Icln?T|oXQ?lmc4DgA!T&>8`(-C z|MgZ5RsqE0+}7!g%~gNJj;q>$BQ3VQ)xDQa=Fy`(p^hD}Drp#LuH1m*zW)ne2#lh5 zjT+T?5$|3e0)sM;K#NG;u3kN}Q7ze6>rAb@9RB{=677Xh2dp&O@iwzPHONUR@YvmN zYNTdIqo@W`m`pQD{BshxJ`3y16pO`c84N8nB-8sD8}i_%=6uh0>#%#qRftwaR_hE; z-^GcQ2hmD0ugQ?-qp91JI2cvFND4*Y-*YidQq+4;jl3sYW~)QQ1wLO6E}HAvxM`*1 z8L0FlxUoyUd9lNcGxkX&F8)}CS*wuHNxAMtAGY&AY3mO~LH(&)&>OqNIZ`v71RxW( zUX@(|MkXSVeF;|FQi#F|B@_8jr37W5r%PmwI#t4Zh@XU*31M`0M%9!I%2R_#h~m^9 zMl@Af!P5P+&*-LQX|2(XW_vR=^U!xh*KA$pjEGdTg2WwCUAz)$gvPkIscN%FeJaG% zl)X1*U6J_R;5%^8a;79U~#&a-pbZhOw1XI+vz+()hEJy~0tQ zTt4Wpt~Gq#hKT_?$=rzkeX=<(ZM;&5hnhv_b4015gFL5el7Hz@2kE3$!VWR^Ia{%Y z!2i?>$O4aV2y*sayQ_>De$(DUOaY&%7gcSh9?c7rfcr zoG%uXBS7p4&^#h zE}F6?fAvBRfU!@X;#9R>da)WTW9hn7J>2m9B(Bn z+yxf7p~t!MS4Q+YTU*GI9cJe_sA9jpvTcDps;$PZDQlR+We%srQuLbuP-DSUOGO zw9bJRSi3ah!%`5ERX;Q z<4yf7UmX7|f?6nW3EsWlhl)cyP!(Gl$5fy)Bhe}`V0APskxHZhdkRpFE?%bv}zEPWF zAA^51VVE^E8K=4o=qx3=dU`(Cbth5?r%?`+v1LGUQ}%mj2l%_VIFCh~^*=G7;rP8| zQ6O?Qetc#}s(c4u6^EhlJVK#~PCo}{CS$LJlN1n8U5uRoW6Rn}n=b8Utq4M87x|L_ z_9BP*HrQRCQr8A6J-B@yd{y9)JwXJBM@UQ7Ioddx-$G!t_nj>9haa&}QMh(l>&>h6%kky&~koCnh# zHr_>YBUL869oGwu#deavM>eUWpG1gIGi#`n$B?COsg@RcKy|6li`?T1trFe{%my$f zE*J``q?;w?K>OF$Xm&*sI{jVCKws}Ui-vP+2BZ$lLWxbKTfrCyheniUqm+3!dqXr@ zK$e?BXX6s|JfDGEXl=}>nu8%UzBx8D;i;BO9l3*R>#KvRg*(vDb$yhyNjN#=i6b&< z4?J0ptaTX&J)PIt;Sxfwf`&kc<9Y8>yD*Rbz_Dc%K5SVH0@8Fv=@${PHl^qzEdw_L z2Tq%wGtPHCQB*0VZm>~mzd90E_yocxH~iMd#uz3|WTD_>7~|_{2lk9`JfQ>B%Hc-% zG5RC5uX?+JLrUt_zjnZ*kTIZpCl~oJwDU>)iB7 z40fgaMks8tQJO3#on~GDoDGho4J`Fz%I04rvojV_bLt;vc0r@GaYRi{K9XTLh*DHE`5g<#^SCaG=!h*{&riMI?{k((&)aoD(B92J5^l{9 zT&>^m=5gfwvJB)04^Lp&FA!|ycRU8@m6 zK-y86l8zzzkorJ*6lj1lc`?94b-3*-siG!43s)rI!r6{Xp7@b}>(~rV??ym@Sg(k$ zF#j;*8oEG7mzr%QP}zfIa5@zyeeD&c93-zC7hs=+CfLC8blWo!JD*05$KwZwiy0z8$?yu&lE#zO5IBc79= zEEc=7%g)!vlTU`bvFV|?jL(mHT8*YRcyh<1%OJDRwm6)Cq1_h2aK-c}xQN#=WAt#C z9&0EJxHuD4y9{Q!XpIO1Swe@cTZdODMZgSnr%QT%P9PsgweOP^qX29e^4rcTXGuD3 z?fUj1IOC03G`xYG($N1jBA1Cg2D6UVqF)m8lZ+9kxdMoDw<^0PEAY{d&F(w7(4Fs6 zKY+V&iApZB`UCz$mE=`o3RRkvCu$84V0`u0+>?YGE~el0cXTa9wMC(a4AEzUl(mAH zu|)%6VrdLnIdkaCQoW&|vh~GB8Lcp^VOfiN`ptZzIgizj^6O|as5kn^&~2S3369= z&^?1NqIX&%`n-?icerSaX>;kL*=wqD+F6L z5*%KeY-Hga$t|x+OQNfuMckW18u{#3212fV6qSjRuk_4m2~kwF!=ZzBF{2=)uW!|*1qOz zLHk$svl&hP9Zp^GcCs9{$Xn}&u1y%7oLlBl>XFZ^)%^zEaM^B${3)d#^-4bQEv5{B zZ*%iy4g(E+aZ0i!LF3|Xn1@}HjHx`M*Y!9=@C;m-K4dny0Rbtti?S1<{M$_3TG3jf zx6!7ooffIX@?#9!4Z^l9cs-P867~JFH&?-@anXIXrqv>Xie;a?6soPn!t#>0>xoYvH4*2XawlS2Bs_4V9ji+6 zn`oCIA8cY~YE-9KwAZ+|l30~b8(PXXg3K2Fvj%X1STLMwT9agbeqalv^DNzIo(l*+ zrY5;+zk@^94vju-4_y`091%$A<0nvX%p5k4CIiqPgWnKVgs36;AAnz8b!M2Q(=}=; zPHE=wNY#s1hBfNw(bU+y*S0Blu(SN*Ovg~kO)CvqxtO&A18eo9kYA=*Xieey2!&r@$)%h=K za(BVss4D|JLiaf6d4DJ0CQW2L0aglpA0eXjkI+jJE;3 zt?ANJ8*gyKFvJIcQKUwCy^v{;He@(r8OGqXakN12E+P4&L`q*xvD# zxmKpdx`!p0n-=+c4yya$!ReE+YId8_cka81lMiPvq+TP+x)kA=WW`VFDu`9P!H+eVks5^T9&lBRa7G{y!idXkvdqt&kTfR0196nlDC8+X}@4}UDcj5Z3y4lS&x{5k+@LIb1-z*R8h)Zwd- zfYRxy-!6ybpD(Y#&^x|Ff94yPzOLfa?0D9X=|Ogdcslwij{VlO3B8P8SVR1QV+nBz zog1bn6qRSI{68l4-Q(6S;k?+aT^V}Mbb9R!!uO>`vYX>Fh0bfZL~an`TGK7TDV06XRy#H+!5ju=fXBhba=i#-#%@ zX+e6S>GkQF7yyFL-Xj3f%lg?_$v8fbi7a(Hjz?!Yvl;O=V1c;lqRR60X}gBn3W1{r zv*k<{QiYqAnCDM;pC+73&n~^X408%cg%ABQW?1TLS(RDyZ!rRLd-0-PDa0edW=5if z482oJmFY6@4--jGsWvD-_SX><&)@pSnwD)OWb2b32q*a=#Wvw1mEGhaG) zM6lbOHhVap?9+}iu&x(L7J0Ds%4@^UrPVXUdV#H=)TVx!s9)Oi*M3V(5Cm)}#XHEg2p zi}$WxiTqERqDl* zrX^Pcpk^>cibYpLN22XH)U*D2kNSZLvh*HFKxLx7Q}wnQyNx8#S(d`L%wCrJl&?bg zWoyNF8+=+X$Zrgngv%0MT+)GU*ob`;S zd_SVYxbeX}%V>0Fbx@`oc*?G9YCpLgoJX1Vim%Sb#LKEDOy;ikIsIa~ zs<9}IFaAvhiNvnA_Z!WSW-Mt8tZOKQapw(UI2P?rr0!e%@#YFlV^N z+H&{D(~((ly%#RSIVCo&LnK7U&O5Q-NI$tf3lg+38WBu%#HJ7f+V@1uGOC2M zXQIBAJvqsSP(|_?E9J|}(bLwA0J?b50jAvvv8F6Izy}>7vNtMm<5Ul}sEyH=SYpHF zhwYL&9>)T~W}ng!;n=j_3EX@0N0e6}d%I2Tk*+{7f@_?-t6BI`qD;|(0dOrGaz{q4 z^l~<78edwwM`#{`4hZzl3Mx7rCoA7C=>n+`Qm$O7 zwJ7WfFP1I=u(Xf=#AzwV=F<5th*eZA^&Z*6EDwB=VSVMJ@zw^<7l=w4`<-xjBX^5AJ;LvuA|Ax%{onZp$2^#i=LJqHp-gCfcNbz+l0XS{=TX z+33KVEAu`fwIFXTj{0x&4lj~BJt>~QFOtPYo5&Y8#OQXwz95u=vt|6}rdJ(cOm-ry z!%Fq@W5$#T3}s;VY?XU$fu@R!?muhvKvhgRKOI^~R<+h*>XB%;%F@DVV-SFDEkjis z_{{ZWhhQAQP3@0G<6`WlW%iXIqYg0@56oG5JQ~0PP;PZcHS9?s!0+rHAKb-u5KTzs zg)3*$GqUr*OKlOo#OGSa(1^EB4tcBeqr0UI^vy&u#N9Li+-+k=f3^bUfpb~)P zWd11oV%ez`ew~^NG?v-UZ+#7Ck0K+~AE4|H&+RcHcr&o<;u4&TW_6QtOZK=Y# z#?&TyYjZXQ@BNolP>g-pPO&=;+d8CIPTz-05A|9rI{k1;-Ckk)xu-H>I4a-(`HuaE zl<{>L!Y06P&6?QQLJFnqVBTOt?C)7Gyh3X76$8Zx=r@4dJ~Q7y&zzRI;SsmjCNya^3ES9IBd4ziC2%L0fv*wemMo@M2=YfI zJwSkxGT(a!cfwdX3mBNo(b#4xZHx)V#mAeIhGUeBX`z(g9l=l8SrU98X+w2Bks71!6W3o=oj6_FpJ}-LG*Et zO!dekll?zbI_(hYo0M5hu_JM*SzGp2#Hkq;ctysRE0Xz;>uG9g7r{n81rV781b}_~2*kd_lE<4pK%g)9WfD<)> z$>YFq7U{9W8kW07Y8fn7uYYHteoe73Ttf5ebO^vLeqP4KVY|n6=i0{xJgq$K*}}N{ z()k3|u09djoH-zQu^+TP>7)#ISyDV6f#1R!rNyZqr=|ul3^lbmL(W$eEfR$54zn+( z98WR3)+!&x`X1--v0zQ$u<0j)Xn8PM+5+Jgc&Q5RyQmJK9A;B|YyE-YMr&~6^MWhn zah=gOfqea{fvH>?xWHMbS9h63iM=Ssn=1I2NyU;;Gza(eKa3jK$EB;tbFC>N3j5ll zLChy1j7xfUUm! zm1&hC%$HzUSIQSOx%P z1}Ea@2%Q1W1u>2a;fr|AJV1qHL!Xl+&1W$Jw!{`GFULt}>&8D!n>j^?V?^h0%Mj*n z%>^J_oNIJX<$kUr-6Oau$cDC$>5Qvdz+d@nYwyV(86BuGMDu^%b#^hYr(&*WXG!iw zAq!q_q4uV{@+Q58CO5WCH!5J>)(4q-H$D1`O7LJ6c4t}4aonXx-ScciN(|n90H}%m z-;-+#5V1`iRN;Oo7*i->C0>K03XSo%TkW5&Bey(3w6HKk@EvSlr7&I`>)gMOQA=)} zrk^&kp0cHM!Wy7szOVh7P}5b`dmMFgrB3aam}#6d(qfaUULQ_^@~@1@EZTAe$!8*h zLvGGEumD4EAc4pucAL&OK1x^$FBTIUoqnl~Jt&VxoyR2mF}Qh7Hnx%uoOL5-Y-Hrs>?1VR;zL(os4 zTE{3B%joj@j&(3^Qj&Y#%2s!BMoRzcWD>&TsTvI~9yZ_>Lv zJp81|i>Bun)7wL8>LbUUV^fHPs}1G)arKiK#+wYAlw7wvKK`5@)6ctQ0)4;HRW%vjR|_Hn>t`(&6e)@o>iuhTZ}|)+EzmAT$JbCwfdS_C z_(EXhi^lvsX0Hyoa2i3+cLFw-QUR1XORRS-3y3h?8Ek1YSC~cn&hJxt@56ldl??>}zzK@IsFnS!i`Iq3Q5d-o8 z!M04Yw^7fo!1=NMtgfCn^U3gl$z8pMgvcoCFk>)Pj2Q9hbIG}=LF#yjtGY6vH>Qh^ z)&nWn%X}ON7&pVBr={jE&?@#DlOw{~tROrn($|1)6lNfS{yRyQ9BozU7@u`@BkGg7 z1Oj)ka9x;eL$~Ql0&JZ3M^}oWc5I@#ZJz+I&XODp25CHx?>$_hp3j)WZ5W(hz86$N z#`Iyp4xsP)X;q(bOcIGKc6TPujgH9L5A8XXsk)QcWnhsbL_xS1z}Oqz+9A5g+4QIw zRV9kMpgAstB49^XIv>Jd)27l}u?GrKU%-4hp{gBUkqwyrcfbpham9)w$BL`uVC;W* z*bw$R<{-q>p=f2?q`3m#rso?3=-UCw8cTl3}!mA2)R~X5~cl?lsfq zx5boTr;)86P1fOtnX8hG&c(h?Hcei)HZ#b>yKk8KJq4Glv|nYPB8Y+WOvmcHuv!Go z?4;qf7Is0HoCE9ZK=E2x6Ap+nrrD&#Sj1`vAocL;{1#Q6nsS$x&$)emV)@XwSu=|$ zR7p$fehW7S0;Yv}3>Yi#3}Ace*J%3}dUt~MV?1G&k2+`V<<4nIS30=m?8VsqgRu(8 zf`Ot-#sl>u1|(deSp?2Y*sO3JoE#HwWRX9-|BE3N`sqpb#~P?hGR==^eSTE_t$T!% zrle@cv-4xhbXO#LEJ>;a1m(er30ukQR7ZcDHntqk&b-?OTVX=ws{c=>EWEP-9U}Ae zG@+^?M6n}^MHUgD3P*mbY(Dnt!(<5F76y0rp31<$nJYNbhu=SE4O|chP}4+~PovTM zJVX~JiUEoWD(pTYV%vCv!C{Gc9)S^7V>sc$me+qSI*LGx_1N%I7r%`Krrr z=s=Mld2DfC+~WR!Wc_oAJVx92z)rMZ{9r=0Fiq8m6oLx1`t|0R3| zqSMFLNBN|KKuB0g-nI4#0=FNq-{nb>-7OTTekc8yGX_(CuCTQcKrhp{WUfK=>gP${cz)Z%%Z zT08HfP0#M#4CZ@%ShF}Z=7dZvQ8v_qoQA>A@5m*3O&5J!v8or=PBx@6g2lTwOaKO^ zQXgh7%gD0RDB)`gMuZo9coOuAnVcRnuv2YbdA!hl5yLA$#c4~s2>&WL2zvM`f-`W6 zi7eEY=UoNh-x&2F4SnLR**4L^x^4Gm^d;~NS0)E^taOY&IM;oP*T}m-cLV1q)eTP^ zics{#adqg&lniu@Ag9bJv>f?PpgZ@?s=Or|?)zjC`01QEt37naQKG~hxV5MVqY85q z3*;cWXkVx@WfJwAlMPfK$2{@#UHo-Lm>LxS`7xi&^s*6Th-`sd%c=3s9e^P~8AGtI zJnfZ@irSffHoFJgkMPVc^yBzciC|OPUmDT^8zR`xLm_1XnmB@uKfON?qOC?57}?gO z$sNY3GF07pNu_gvDfXG_fi8M=d$7v9OwqS%ObG${Oz>x!JX`-Fx}0~kR1k{VDL(t` z8VR_j#Bpnm1A+qKmvv}_d(QoK7^Uu-j96n!{%OWQ6|d> z)-jMoNxAuPw8KOH(igB&`6lP&%$5KKNCe))%_F8CTS~92--e1Y^MkA0LMrjpslO&m z!;_STadLd)TZJdQ)Vo;%Qjvgo%Og&z7opn~0{r3Kq@@#E!K3yVJL%F&=GQ(ZA20{6 z1Y^+P4S6xhMPOnBUHFf06DRvd=LA7t5Fuz-gHmLyRf3Xunhd&HAx9vnBi!!Aq$bHe zKpQ`WvVFy5=mw>_F))wf+RozL2KAi`h2Qx_Y`YcPrUENtKNf5KB&N4)6^Qfg$jF2W zppO$%ldrDAaMcSK<@#e;5bUf&R~XxWky#}ypgCvjY{*SSI#q_7i0fw`KN|1kE_6>M z;P61_nY-actH-ua)xW3tP9`+GtODa`+&c5ucL}tdibTW;TyCa#jeMeOAN?z|S zM_;kR4nd@xm>=g?{j!Mu9Rr8*9AklVMC~=z7yU*G>Hv>* zd-go*uwI2e2My8oXhK{(tkllv6ZgV;Qt@dkCP{0xqf|uQM}srGhm7CS4Zz;I1W}9k zF4maeB91fbsi|0$l|QExfdM#fwVehcgRX|w=85zHt&V1)%C~>im8<_!Zav5+zS0^A^Hfi)2fQ;UA*|P+Ne%V-BPn6;aY=1x{yGDBB_wle>H54g z*$Fxl)CK#PZ(&K)@^wd2+8F2GJhzw;IGWD@fcFa>hyZTQb4wsQpFwdlT}Ysbu-@|u zE47|GNL6So`6|dzFuSPQGp^I2r0BQqCjCOG!N6kVtW7*XPh9`Omh5yv|2EnFc~zLH zKy}A(0BVm0S=2X|zze-6D$t0)s(pwgZLU}%$SS2YC%1)%qvj9yT!U$f@jciC?L0tF zVhcO{(-iRtPSbrsRs3xEcs6kwHhe|!ITn!96H>B!v>Vbkr3tF%S#E?2^?zH}yt*hc z*jMF?S=-73(?qv;Y~Wv?6xn0YZ3T0ssH~x%&fMWy-E7!UJKFB&lXO@ocOshf4g_WMJ4o8CLR zo*57`k0H3zO}9pOsL~Q8CZBn@fr{|3sU3?L%{^I;{x@tQBl~CGV+m+LgCC*{c+1r! zJVsxVZn*mHl{#2}Re(QrZBVC*iGFdqXe@;%?K}cK6Lz9}@|@|{n|eFi?D7QXgt_OKADayWenj~%FiVB# zn_P5BG(~Rn+X)qGVb{i(v<53-Vhrvhe^c2YhS&Lms^Nw-VA(wR%x>PBXP)B&HF(ec zC)31*JGH{CEhaMb2B1TlYnDXTI}OCwguY>VZ*P7B7f%VZ*RPyf=4fov=-N9yh_{(g z&Vhwo>Yp_iTNd++ki0c~C4JZh$GlM*+B-Lap@*%Vv?~O&KQ(pR=K8l`9UU!6wU^** zzbWl&=Pu1V2_`YTptZL+%r>;7C5R9R5{vLY%CiHf&y}gMsIs7<5y_ZAyBCk2rxgmQ z00{ELQ&x`$FrAA@H4PuDXo>HPRnfB7{}%{OT1_0T9>4Ki(FyNTxgo|Ls$Tj^yx2XS zPzAdZSGIm+n+b?r%`8U+^CvoHp?nUdQmy_EqpCGWIDu2$cr-C97v^9nW!7@(%L%kf zpT}Kg@o=TR7h~OiS)Kl%%~F(PS!;z4yzKDui45wN^wem5fS|<%=N9@R z7#>jvkkBd*`_kC%6X6TIQC}{wet7C7)y$MQ6ZNqxjr|6fY<%Md)@)-8u^R`f#8AfB8K{SI%Y9PS-VTz2xK;}M}NcZMJ}j9%!> zRj}EN`+)*DCN16hEoVeO@!yjlibQF81#09jYjwZ?5_SO=xahaVQzn&jeJk3J)5U(su%=?xE@@LlNwGZ8lo z{7l|8`vftX(2W%GbF=xhy(1$>CpsiqgC!9HF@-T1n@rzi^jp@3jd5+F3jn)_)RG9& z9+k1q^m%0V3Tt-i_eHTyFU|xg3XgmVyemY*Wu z+`TXvsrF%!4WZLNtqUjp$n9^}+com*BxS;cFl(09P!%el z7RiK}`LJD9G)=TlCz_eL|*%sglBW_$e3SF>376~_8Uun zbeT48xmh5yz>foYOsTOQTK+vu>LTp1Fi9_%i5Xssp05w>veExZ{EPeX0hAKZ=SpPz#*|AFy$~<{67J-DrkFJ9-DK}MftqUR z_A=|kWQ#ADO5|@isy^F}B<*=-AZCkR(j%nUUv>Q+Ub&G@YXK#piJx4$C5Jg?Go$I2 zUg9_cOZWE>8U$+9^T+H(?=RtcbMwD;2Cn8NV9*yY-=QQ-vJ4V#PNK*YYpPxbA2IcX)1GL?Q3B(IyMDFUBYM*kM%C!Fe93d zt`z8ogUx z;XJd}fDos($W!8VM8Cvi3!I~%cFJ@sPIe=}XB=CCAAL_Tpy_@Bh0?!*$3p7V-bvX z5>}B)QTZePV4xwyK~0pXh5t}9?ux6?x84AoVY{#vhMA_q^Wyp^x*_3XqA^adyiy+VEyw9RhJQ; zP5%fG;j*XF{#}=<7?;uwdL#Vmfyf3>)Wo;IsB;xqOyT0SQvSEB3x%2bqny>Dl!IXiGzS%T$JvA?I-Bi?$2mC`Q$iyPu{fxikIZW)OWT2bjf$ z5;1+v|KS$(T9gAGw}7RM%I$PfC1$}|UD+EXo_D@bKD0~{x`xq7TK5c{wL)c&4H_#6 z>!&G<247C6muIP8u%0gQX!|kv}AB+>;-zq!cUZ)Dy z0zWyhia>A}d0#-7Z*#@?w=Oa_HYu+bkG6pcC%1aLBh4v#>3lF-xvlf(%T;tw70UGc z=mT|>U|HpF*fAN9Peb-v(%23qPOl$;Sy6)C5Wkcb%DH5Ni$sl0^4RQxNL3G`kn?`( zBJYC>p!H|$4T``5!lYc`^RYP0-NbQoju1p~IO=JW!oK<3Ly#7C_hiw5{e*a(p@YWN zI7zlOTXtZ020DetA}y#~bUt?VQ0{v`Z09%S3Ao&iG{h|A8h9ygAn>>g;ly|5T78z= zRV%YAZ?1rv?Wtu>kbvty3znAw)@@?wZRGS7|3@+ba)(G`cE zL(Oh>5K#IkNmI)e@Hf~F`rj$L42#64!VHLbE$10U2x5&eMJ-+1}G>n}W5^v&CZ^;f|AZgPf${@R$iuYu!Ta#ZU@w`haDW&0{ zP3{Q&3?&$ZONk{kDdGbjh&okDqOI7QfmsjaUlNdQ4RMBwAqHwgiy$<0@~&wRFVKiJ zVhd640PV$=K30MZ(UM%n!iuT7mTB8B5RhrLeG83)Dwa#mJOw$ zyaz!3bRQP$r z03}BC5@8bup3=Q%33c1xDX=TxMg&b8+3O;E^}5s+50>(@DERUb;R0Y&CAvfBTMNP| zbWeUb2Y0OkKyb>>3@Z3$Cg3H@lh6)-#Va;_E;YL10z7{xAz*ZA8JX1M=zNy(q(CJaRe-BY%UCMN0dhLp{tFW+Y2LxWE$+Arm)Ann5EsxgTCXeLRt< zAQt?&mg~j#ltk=22g5`SO$IujiK&2kXidfA2%Bv1(HwRaiAP=k5Uj6gj0t#5AI5BE zk`8c@J`!*@Wh}>r`Q=01L0hlicorDsvu;T7-|;T}+KDmSV_bCLe$85R(mZnAdbvfl zEmS&&Agfkgx|etA#gYBWFOdmXKcQt;oUa0WHZ?s7$c1(#g0T%`jfO+erjti`oSWpTHDrIV>DYVQ z(*0cO3Rnjo32TVMaIRZ2M?cwBXvf^|%2#pNLhWYh_S) z@01`laAKXm$+?imfnGvtEb&#dr0JfAgD&V1{W%HiVI*1GXWjCv*W+BPBPeK@=jo1n z+lCr*W8ZkCm!p45RLemGYWklivHy-P3Y2LcJ4SAg?@>Kw{rQHSn=v!QyTqc1DUOb*8Vct&``1vGh~Iczq|sYoxz8Ki=F)KV za4xAtAe|SBjK_@5MAwd(;qxdZGzXp6l4tTWF19|_{!&}<)-o-%K-udZwClGz9Y(Ik%_#!vilMGK%R(BX zgkomWhs+F;aW^m+mpDGboRZRRjsXn~^70460$e!yC*2C5s5xXXEOrk8B)n(#zu_&O zKi`moRu#tmJ8v~3ZdaOg!vdHQlp#?oPzB!|=M285mDFVn3dzW0=T!e+^pNS2b0;eUtEqrg<=L1&5}-B554p$%ER3MqBMpR=4>Uv-h4{^Zx?Vu=le48 z-BFNjDx3=FCEWn)K#RN0EwAeqjs8&?%$xf%ZS1C8oDcF-bw!rVh2ZMDtK6$7Iduh* z#}2W2b-EBcuQ}@cU&pVBHp@uTSbCl(E-0fTc+1i8u1Od)3rcC+Vgl-Kj%K4lmn*dV zWs+uyPrHPhe^m(@;XU@{otMb@GTR5IaP|EO2a&T)bLF%QF&~+k=J(gsBgx0uT1n_7 zM{GJ%am=_Q+1q6+pmBuyI$Yv?Pon?PeuuCkdqrpd+&GU^;=hZNx1^M(5Mv3zb;UTA z#9Eu6g^9=oCD8U2sSV~UxzuJZqZoDe-)HYXvi-J`DpDf=mw68OJEQguxL7UVxyl`@rglp((=5MtSyFJ^Xj5$U0A4Knvg;!l)&k&Eh9 zf1G3E*`apIyknBva{y`9x2P>~%5&oiZ+3el>oQpof_y5m@_2A3_<)My*5|>zC2&?i zYmg%a^u+@*ZNTkr621k+N+c0hNW0oiL^Q5EjyvNbSn+2fwWwi4UaCRE*1rhl08(3T zKA6;-jhTa!@8=W&jyMyn)=BW0IZDh8ELi*M}wDU3rv;!Tkc5IadC@Ww=4Uw4Xto(obPezWAG|6z#CeS8?CUi$C1)S z^ET{0#}MCsp(S}xJI;rb;cHZHx3m4c$5IP3@9bKN!iGoN$C{fus}|DV$RUpT`U7{W z3pxrhr%GkC68K=JBMnwzL9uy%a-S z+&jzMYsbtd{PHQ~H6vKh5^fnhD8w8{UDDwjVV;WzwjU#%mHi{ymyeQcCfIhiqBFJx z@x7Q)H#sSVQ37|`5CJ*OzrgI2$2EFi(Kgwa`1Qm_X{=Ozo^=6Bu(M9H&PlM zXsN6Wh*f*tMV{<0z50u}$AAr-B545u0$RT&Fsp>tw$T_ILUdYeyuSmu;`eGE$%1k* z_mlDGGb$elG!}aGWp-e&jt{@-=%V*_J_b7cd)WRv=NDN@A@{j&+83B5q57;aN}+ZZ zcX?V5!WH$7K?;*hNE@O?XSwe_B~ne@e~YH1!v=ic_;paoFt(H&cgZoQTE)iI0L$Fs z%>BEfHHpkmk&aflt9HIX7j}~iUD-+kHac)K`HU*)?JLnncXnrmM4Y`f`$}ol2MnxJ z`T16f99>hCf*r5YQ)z??XD)HAq@MV>a)KO*-*j==O-DX2#4T^FUM@@GGAM79h46Y_ zUiI3JQrTh9`9ckD`7Nzt0X-5QizAm}_h%n3oqhZlr(@aFV|xVge)fN9(?o8RO34$g zmn=fdxqOXMs_?`5us^pQO%KP?cnBG$ZS70SR!g*XVV2Z_9Y^dSc+Hba2x|@_`Y8^UI z&C*dCv0DZ{b!=d1<}u+Nm+gSy_8CH61<^x@1BNs24Y~d&jJL-3;ER)BE;-0it%vA* zL=S|gP-{w8I;k)NDBckpDduZIMqpokw};j38dzfjAJbwOc9wPWz_-=>;ih%Pt5p!| z9u6VoQVz9eV@%BF(w5b^Be6S0a!{jLFdM`t1i`ZPb9=Y4S&4bP2`6V&Q{H~_RSIad z^hbg@q+nyI4#q$c(>^LZ$if6_9a$+<#7s3v3?qL6`A4~+d_D$+e$ql3`u$Q@>xsS2A$N>v_t zPF2`ooQWkcW@a*0fqS8q4ZtFMbQ-7k2@!*#fEU@Dpo)1xdgpwA#}LDPwIihFoUIXu zv3x#lR!fO7B~rODbD%-zhK@ucFtF?$fj&l%!??8WB!U(c0#N1ETd@W3#~w{S5qR@%zmji`sQ@8BUk-b_bIAbBi+F z`Wq)r`v9rz8u>TGrR8#O!>|P5(GG-OvJ?9SnIg;jnHG9Uwu}x91XJ_s?%0FG3T?Ih z*-t;=aCK4e7tW0639F@g@l2d+pvQAOt`XPMfLqjPFfX1+I=hU2ZHJ z1gHWN0qi*~Hg&)e{(HV3Go@RYbYZQen_l1bm-yo@KY>On9la?=Q(@2T5WOF}lg_jI z`WMnYj;@yQ9kUS^JZ++u!kTAeeBZAxs z+(}AFVM#hNL62voHoxBd8yve`lO%Q#%c=K!7uB$9Ty=FuE-D8{Lk)zO8ZG? zZwf?`>tJ8_wDNkOg}1P62y4%?PeYg#MKVvEr!ysfd=C4&$5zP^kSM_yjgLcw0(S05 z3OSqSrt{9yxs;qp)ENE=e!!JHP!89N<}z+&6_nGM#;Q_*v8h-#d@u0vFn-L4%kNzs zs3`+~c3r<<19P&#(`1C9{5Gr555{6VyIYutwjV>_W!u67`GTcv$ila^0OY(!j=6G^ zQAOzB99D%1sZ6CXHe$Zv>k!8-lxpeL1dXN9;WEO=*9S|p;(cZjq9|y2Q0NOH& z8mQ+oLA~BHK#8{XfQ4!Z*Qy`D7{v{Mdn#M%ptVnJimgLms^=M`91`}kwf3p|YW(NK z1Zr$enD6ORYA)q-^?(eVk`U;(fmg zW{zcsLKI9ct2X>)#ZLC-jLd8xHv;Xa@Z(xiiu^=UNUD`Tsj00#$}JGrq$Gdok-MBV z_JWR8#bD!aDbN^DCtVEG*zj7}m^X+@&K9m}l$_jmcjk8}|6q0AEy~(%D%6QPXo;sh zF?A6i*-ypf`P(9?2}o#e6m+AWhF1PO{-ZB!$XVz+F!QR;bVKi5?U;r)KMLMTcQ4$G qo9Q;uaEAo|00007JUTo80i~M+w!t8?k!*`TFb#_W000000a;qcozX1- diff --git a/man/applyTagMappings.Rd b/man/applyTagMappings.Rd new file mode 100644 index 00000000..ae08a548 --- /dev/null +++ b/man/applyTagMappings.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcom.R +\name{applyTagMappings} +\alias{applyTagMappings} +\title{Apply Tag Mappings to a Line} +\usage{ +applyTagMappings(line, record, pattern_rows, tag_mappings) +} +\arguments{ +\item{line}{A character string from the GEDCOM file.} + +\item{record}{A named list representing the individual's record.} + +\item{pattern_rows}{A list with GEDCOM tag counts.} + +\item{tag_mappings}{A list of lists. Each sublist should define: +- \code{tag}: the GEDCOM tag, +- \code{field}: the record field to update, +- \code{mode}: either "replace" or "append", +- \code{extractor}: (optional) a custom extraction function.} +} +\value{ +A list with the updated record (\code{record}) and a logical flag (\code{matched}). +} +\description{ +Iterates over a list of tag mappings and, if a tag matches the line, updates the record. +} diff --git a/man/collapseNames.Rd b/man/collapseNames.Rd index 01a38a6c..b3a0c514 100644 --- a/man/collapseNames.Rd +++ b/man/collapseNames.Rd @@ -11,6 +11,9 @@ collapseNames(verbose, df_temp) \item{df_temp}{A data frame containing the columns to be combined.} } +\value{ +A data frame with the combined columns. +} \description{ This function combines the `name_given` and `name_given_pieces` columns in a data frame. } diff --git a/man/collapseNames.legacy.Rd b/man/collapseNames.legacy.Rd new file mode 100644 index 00000000..dbf12d7c --- /dev/null +++ b/man/collapseNames.legacy.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcomlegacy.R +\name{collapseNames.legacy} +\alias{collapseNames.legacy} +\title{collapse Names} +\usage{ +collapseNames.legacy(verbose, df_temp) +} +\arguments{ +\item{verbose}{A logical value indicating whether to print messages.} + +\item{df_temp}{A data frame containing the columns to be combined.} +} +\description{ +This function combines the `name_given` and `name_given_pieces` columns in a data frame. +} +\keyword{internal} diff --git a/man/combine_columns.Rd b/man/combine_columns.Rd index 43554b0b..9ab750ef 100644 --- a/man/combine_columns.Rd +++ b/man/combine_columns.Rd @@ -1,9 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/readGedcom.R +% Please edit documentation in R/readGedcom.R, R/readGedcomlegacy.R \name{combine_columns} \alias{combine_columns} \title{Combine Columns} \usage{ +combine_columns(col1, col2) + combine_columns(col1, col2) } \arguments{ @@ -12,9 +14,13 @@ combine_columns(col1, col2) \item{col2}{The second column to combine.} } \value{ +A list with the combined column and a flag indicating if the second column should be retained. + A list with the combined column and a flag indicating if the second column should be retained. } \description{ +This function combines two columns, handling conflicts and merging non-conflicting data. + This function combines two columns, handling conflicts and merging non-conflicting data. } \keyword{internal} diff --git a/man/countPatternRows.Rd b/man/countPatternRows.Rd index f9100912..3fe3c3af 100644 --- a/man/countPatternRows.Rd +++ b/man/countPatternRows.Rd @@ -2,17 +2,17 @@ % Please edit documentation in R/readGedcom.R \name{countPatternRows} \alias{countPatternRows} -\title{Check for Pattern Rows} +\title{Count GEDCOM Pattern Rows} \usage{ countPatternRows(file) } \arguments{ -\item{file}{A data frame containing the GEDCOM file.} +\item{file}{A data frame with a column \code{X1} containing GEDCOM lines.} } \value{ -A list with the number of rows containing each pattern. +A list with counts of specific GEDCOM tag occurrences. } \description{ -This function counts the number of rows containing specific patterns. +Counts the number of lines in a file (passed as a data frame with column "X1") +that match various GEDCOM patterns. } -\keyword{internal} diff --git a/man/countPatternRows.legacy.Rd b/man/countPatternRows.legacy.Rd new file mode 100644 index 00000000..c55ccf85 --- /dev/null +++ b/man/countPatternRows.legacy.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcomlegacy.R +\name{countPatternRows.legacy} +\alias{countPatternRows.legacy} +\title{Check for Pattern Rows} +\usage{ +countPatternRows.legacy(file) +} +\arguments{ +\item{file}{A data frame containing the GEDCOM file.} +} +\value{ +A list with the number of rows containing each pattern. +} +\description{ +This function counts the number of rows containing specific patterns. +} +\keyword{internal} diff --git a/man/extract_info.legacy.Rd b/man/extract_info.legacy.Rd new file mode 100644 index 00000000..23ac5bd8 --- /dev/null +++ b/man/extract_info.legacy.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcomlegacy.R +\name{extract_info.legacy} +\alias{extract_info.legacy} +\title{Extract Information from Line} +\usage{ +extract_info.legacy(line, type) +} +\arguments{ +\item{line}{A character string representing a line from a GEDCOM file.} + +\item{type}{A character string representing the type of information to extract.} +} +\value{ +A character string with the extracted information. +} +\description{ +This function extracts information from a line based on a specified type. +} +\keyword{internal} diff --git a/man/initializeRecord.Rd b/man/initializeRecord.Rd new file mode 100644 index 00000000..3c0c08ca --- /dev/null +++ b/man/initializeRecord.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcom.R +\name{initializeRecord} +\alias{initializeRecord} +\title{Initialize an Empty Individual Record} +\usage{ +initializeRecord(all_var_names) +} +\arguments{ +\item{all_var_names}{A character vector of variable names.} +} +\value{ +A named list representing an empty individual record. +} +\description{ +Creates a named list with all GEDCOM fields set to NA. +} diff --git a/man/mapFAMC2parents.legacy.Rd b/man/mapFAMC2parents.legacy.Rd new file mode 100644 index 00000000..f3166e4f --- /dev/null +++ b/man/mapFAMC2parents.legacy.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcomlegacy.R +\name{mapFAMC2parents.legacy} +\alias{mapFAMC2parents.legacy} +\title{Assign momID and dadID based on family mapping} +\usage{ +mapFAMC2parents.legacy(df_temp, family_to_parents) +} +\arguments{ +\item{df_temp}{A data frame containing individual information.} + +\item{family_to_parents}{A list mapping family IDs to parent IDs.} +} +\value{ +A data frame with added momID and dad_ID columns. +} +\description{ +This function assigns mother and father IDs to individuals in the data frame +based on the mapping of family IDs to parent IDs. +} +\keyword{internal} diff --git a/man/mapFAMS2parents.Rd b/man/mapFAMS2parents.Rd index 25d5a9f3..10cfb4e4 100644 --- a/man/mapFAMS2parents.Rd +++ b/man/mapFAMS2parents.Rd @@ -2,17 +2,17 @@ % Please edit documentation in R/readGedcom.R \name{mapFAMS2parents} \alias{mapFAMS2parents} -\title{Create a mapping of family IDs to parent IDs} +\title{Create a Mapping from Family IDs to Parent IDs} \usage{ mapFAMS2parents(df_temp) } \arguments{ -\item{df_temp}{A data frame containing information about individuals.} +\item{df_temp}{A data frame produced by \code{readGedcom()}.} } \value{ -A list mapping family IDs to parent IDs. +A list mapping family IDs to parent information. } \description{ -This function creates a mapping from family IDs to the IDs of the parents. +This function scans the data frame and creates a mapping of family IDs +to the corresponding parent IDs. } -\keyword{internal} diff --git a/man/mapFAMS2parents.legacy.Rd b/man/mapFAMS2parents.legacy.Rd new file mode 100644 index 00000000..798af515 --- /dev/null +++ b/man/mapFAMS2parents.legacy.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcomlegacy.R +\name{mapFAMS2parents.legacy} +\alias{mapFAMS2parents.legacy} +\title{Create a mapping of family IDs to parent IDs} +\usage{ +mapFAMS2parents.legacy(df_temp) +} +\arguments{ +\item{df_temp}{A data frame containing information about individuals.} +} +\value{ +A list mapping family IDs to parent IDs. +} +\description{ +This function creates a mapping from family IDs to the IDs of the parents. +} +\keyword{internal} diff --git a/man/parseIndividualBlock.Rd b/man/parseIndividualBlock.Rd new file mode 100644 index 00000000..8f58554b --- /dev/null +++ b/man/parseIndividualBlock.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcom.R +\name{parseIndividualBlock} +\alias{parseIndividualBlock} +\title{Parse a GEDCOM Individual Block} +\usage{ +parseIndividualBlock(block, pattern_rows, all_var_names, verbose = FALSE) +} +\arguments{ +\item{block}{A character vector containing the GEDCOM lines for one individual.} + +\item{pattern_rows}{A list with counts of lines matching specific GEDCOM tags.} + +\item{all_var_names}{A character vector of variable names.} + +\item{verbose}{Logical indicating whether to print progress messages.} +} +\value{ +A named list representing the parsed record for the individual, or NULL if no ID is found. +} +\description{ +Processes a block of GEDCOM lines corresponding to a single individual. +} +\keyword{internal} diff --git a/man/parseNameLine.Rd b/man/parseNameLine.Rd new file mode 100644 index 00000000..44490d60 --- /dev/null +++ b/man/parseNameLine.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcom.R +\name{parseNameLine} +\alias{parseNameLine} +\title{Parse a Full Name Line} +\usage{ +parseNameLine(line, record) +} +\arguments{ +\item{line}{A character string containing the name line.} + +\item{record}{A named list representing the individual's record.} +} +\value{ +The updated record with parsed name information. +} +\description{ +Extracts full name information from a GEDCOM "NAME" line and updates the record accordingly. +} diff --git a/man/postProcessGedcom.Rd b/man/postProcessGedcom.Rd index 9d0c7b2c..62bbc50e 100644 --- a/man/postProcessGedcom.Rd +++ b/man/postProcessGedcom.Rd @@ -14,21 +14,22 @@ postProcessGedcom( ) } \arguments{ -\item{df_temp}{A data frame containing information about individuals.} +\item{df_temp}{A data frame produced by \code{readGedcom()}.} -\item{remove_empty_cols}{A logical value indicating whether to remove columns with all missing values.} +\item{remove_empty_cols}{Logical indicating whether to remove columns that are entirely missing.} -\item{combine_cols}{A logical value indicating whether to combine columns with duplicate values.} +\item{combine_cols}{Logical indicating whether to combine columns with duplicate values.} -\item{add_parents}{A logical value indicating whether to add parents to the data frame.} +\item{add_parents}{Logical indicating whether to add parent information.} -\item{skinny}{A logical value indicating whether to return a skinny data frame.} +\item{skinny}{Logical indicating whether to slim down the data frame.} -\item{verbose}{A logical value indicating whether to print messages.} +\item{verbose}{Logical indicating whether to print progress messages.} } \value{ -A data frame with processed information. +The post-processed data frame. } \description{ -Post-process GEDCOM Data Frame +This function optionally adds parent information, combines duplicate columns, +and removes empty columns from the GEDCOM data frame. } diff --git a/man/postProcessGedcom.legacy.Rd b/man/postProcessGedcom.legacy.Rd new file mode 100644 index 00000000..060b9367 --- /dev/null +++ b/man/postProcessGedcom.legacy.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcomlegacy.R +\name{postProcessGedcom.legacy} +\alias{postProcessGedcom.legacy} +\title{Post-process GEDCOM Data Frame} +\usage{ +postProcessGedcom.legacy( + df_temp, + remove_empty_cols = TRUE, + combine_cols = TRUE, + add_parents = TRUE, + skinny = TRUE, + verbose = FALSE +) +} +\arguments{ +\item{df_temp}{A data frame containing information about individuals.} + +\item{remove_empty_cols}{A logical value indicating whether to remove columns with all missing values.} + +\item{combine_cols}{A logical value indicating whether to combine columns with duplicate values.} + +\item{add_parents}{A logical value indicating whether to add parents to the data frame.} + +\item{skinny}{A logical value indicating whether to return a skinny data frame.} + +\item{verbose}{A logical value indicating whether to print messages.} +} +\value{ +A data frame with processed information. +} +\description{ +Post-process GEDCOM Data Frame +} diff --git a/man/processEventLine.Rd b/man/processEventLine.Rd new file mode 100644 index 00000000..d4cff3d3 --- /dev/null +++ b/man/processEventLine.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcom.R +\name{processEventLine} +\alias{processEventLine} +\title{Process Event Lines (Birth or Death)} +\usage{ +processEventLine(event, block, i, record, pattern_rows) +} +\arguments{ +\item{event}{A character string indicating the event type ("birth" or "death").} + +\item{block}{A character vector of GEDCOM lines.} + +\item{i}{The current line index where the event tag is found.} + +\item{record}{A named list representing the individual's record.} + +\item{pattern_rows}{A list with counts of GEDCOM tag occurrences.} +} +\value{ +The updated record with parsed event information.# +} +\description{ +Extracts event details (e.g., date, place, cause, latitude, longitude) from a block of GEDCOM lines. +For "birth": expect DATE on line i+1, PLAC on i+2, LATI on i+4, LONG on i+5. +For "death": expect DATE on line i+1, PLAC on i+2, CAUS on i+3, LATI on i+4, LONG on i+5. +} diff --git a/man/processParents.Rd b/man/processParents.Rd index 9aa205a7..aec5deb3 100644 --- a/man/processParents.Rd +++ b/man/processParents.Rd @@ -2,17 +2,18 @@ % Please edit documentation in R/readGedcom.R \name{processParents} \alias{processParents} -\title{Process parents information} +\title{Process Parents Information from GEDCOM Data} \usage{ processParents(df_temp, datasource) } \arguments{ -\item{df_temp}{A data frame containing information about individuals.} +\item{df_temp}{A data frame produced by \code{readGedcom()}.} + +\item{datasource}{Character string indicating the data source ("gedcom" or "wiki").} } \value{ -A data frame with added momID and dadID columns. +The updated data frame with parent IDs added. } \description{ -This function processes the dataframe to add momID and dadID columns. +Adds parent IDs to the individuals based on family relationship data. } -\keyword{internal} diff --git a/man/processParents.legacy.Rd b/man/processParents.legacy.Rd new file mode 100644 index 00000000..774663aa --- /dev/null +++ b/man/processParents.legacy.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcomlegacy.R +\name{processParents.legacy} +\alias{processParents.legacy} +\title{Process parents information} +\usage{ +processParents.legacy(df_temp, datasource) +} +\arguments{ +\item{df_temp}{A data frame containing information about individuals.} +} +\value{ +A data frame with added momID and dadID columns. +} +\description{ +This function processes the dataframe to add momID and dadID columns. +} +\keyword{internal} diff --git a/man/process_tag.legacy.Rd b/man/process_tag.legacy.Rd new file mode 100644 index 00000000..f31cbad6 --- /dev/null +++ b/man/process_tag.legacy.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcomlegacy.R +\name{process_tag.legacy} +\alias{process_tag.legacy} +\title{Process a GEDCOM Tag} +\usage{ +process_tag.legacy( + tag, + field_name, + pattern_rows, + line, + vars, + extractor = NULL, + mode = "replace" +) +} +\arguments{ +\item{tag}{The GEDCOM tag (e.g., "SEX", "CAST", etc.).} + +\item{field_name}{The name of the variable to assign to in `vars`.} + +\item{pattern_rows}{Output from `countPatternRows()`.} + +\item{line}{The GEDCOM line to parse.} + +\item{vars}{The current list of variables to update.} +} +\value{ +A list with updated `vars` and a `matched` flag. +} +\description{ +Extracts and assigns a value to a specified field in `vars` if the pattern is present. +Returns both the updated variable list and a flag indicating whether the tag was matched. +} +\keyword{internal} diff --git a/man/readGedcom.Rd b/man/readGedcom.Rd index a54cd5aa..e345d587 100644 --- a/man/readGedcom.Rd +++ b/man/readGedcom.Rd @@ -99,5 +99,4 @@ A data frame containing information about individuals, with the following potent } \description{ This function reads a GEDCOM file and parses it into a structured data frame of individuals. -Inspired by https://raw.githubusercontent.com/jjfitz/readgedcom/master/R/read_gedcom.R } diff --git a/man/readGedcom.legacy.Rd b/man/readGedcom.legacy.Rd new file mode 100644 index 00000000..e4cf2957 --- /dev/null +++ b/man/readGedcom.legacy.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcomlegacy.R +\name{readGedcom.legacy} +\alias{readGedcom.legacy} +\title{Read a GEDCOM File} +\usage{ +readGedcom.legacy( + file_path, + verbose = FALSE, + add_parents = TRUE, + remove_empty_cols = TRUE, + combine_cols = TRUE, + skinny = FALSE, + update_rate = 1000, + post_process = TRUE, + ... +) +} +\arguments{ +\item{file_path}{The path to the GEDCOM file.} + +\item{verbose}{A logical value indicating whether to print messages.} + +\item{add_parents}{A logical value indicating whether to add parents to the data frame.} + +\item{remove_empty_cols}{A logical value indicating whether to remove columns with all missing values.} + +\item{combine_cols}{A logical value indicating whether to combine columns with duplicate values.} + +\item{skinny}{A logical value indicating whether to return a skinny data frame.} + +\item{update_rate}{numeric. The rate at which to print progress} + +\item{...}{Additional arguments to be passed to the function.} +} +\value{ +A data frame containing information about individuals, with the following potential columns: +- `id`: ID of the individual +- `momID`: ID of the individual's mother +- `dadID`: ID of the individual's father +- `sex`: Sex of the individual +- `name`: Full name of the individual +- `name_given`: First name of the individual +- `name_surn`: Last name of the individual +- `name_marriedsurn`: Married name of the individual +- `name_nick`: Nickname of the individual +- `name_npfx`: Name prefix +- `name_nsfx`: Name suffix +- `birth_date`: Birth date of the individual +- `birth_lat`: Latitude of the birthplace +- `birth_long`: Longitude of the birthplace +- `birth_place`: Birthplace of the individual +- `death_caus`: Cause of death +- `death_date`: Death date of the individual +- `death_lat`: Latitude of the place of death +- `death_long`: Longitude of the place of death +- `death_place`: Place of death of the individual +- `attribute_caste`: Caste of the individual +- `attribute_children`: Number of children of the individual +- `attribute_description`: Description of the individual +- `attribute_education`: Education of the individual +- `attribute_idnumber`: Identification number of the individual +- `attribute_marriages`: Number of marriages of the individual +- `attribute_nationality`: Nationality of the individual +- `attribute_occupation`: Occupation of the individual +- `attribute_property`: Property owned by the individual +- `attribute_religion`: Religion of the individual +- `attribute_residence`: Residence of the individual +- `attribute_ssn`: Social security number of the individual +- `attribute_title`: Title of the individual +- `FAMC`: ID(s) of the family where the individual is a child +- `FAMS`: ID(s) of the family where the individual is a spouse +} +\description{ +This function reads a GEDCOM file and parses it into a structured data frame of individuals. +Inspired by https://raw.githubusercontent.com/jjfitz/readgedcom/master/R/read_gedcom.R +} +\keyword{internal} diff --git a/man/splitIndividuals.Rd b/man/splitIndividuals.Rd new file mode 100644 index 00000000..dcb9a7e8 --- /dev/null +++ b/man/splitIndividuals.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readGedcom.R +\name{splitIndividuals} +\alias{splitIndividuals} +\title{Split GEDCOM Lines into Individual Blocks} +\usage{ +splitIndividuals(lines, verbose = FALSE) +} +\arguments{ +\item{lines}{A character vector of lines from the GEDCOM file.} + +\item{verbose}{Logical indicating whether to output progress messages.} +} +\value{ +A list of character vectors, each representing one individual. +} +\description{ +This function partitions the GEDCOM file (as a vector of lines) into a list of blocks, +where each block corresponds to a single individual starting with an "@ INDI" line. +} diff --git a/tests/testthat/test-readPedigrees_alpha.R b/tests/testthat/test-readWikiTree.R similarity index 100% rename from tests/testthat/test-readPedigrees_alpha.R rename to tests/testthat/test-readWikiTree.R From 8039214165967e06744a21285739ed0a1e74c2b7 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Wed, 16 Apr 2025 15:53:47 -0400 Subject: [PATCH 31/35] potential more direct methods --- R/convertPedigree.R | 257 ++++++++++++++++++++++++-- data-raw/benchmark.R | 180 +++++++++++++++++- tests/testthat/test-convertPedigree.R | 20 +- 3 files changed, 425 insertions(+), 32 deletions(-) diff --git a/R/convertPedigree.R b/R/convertPedigree.R index 3d5ac7ca..4b033c82 100644 --- a/R/convertPedigree.R +++ b/R/convertPedigree.R @@ -40,6 +40,7 @@ ped2com <- function(ped, component, save_rate_parlist = 100000 * save_rate, update_rate = 100, save_path = "checkpoint/", + method_approach=NULL, ...) { #------ # Checkpointing @@ -160,7 +161,8 @@ ped2com <- function(ped, component, lastComputed = lastComputed, nr = nr, parList = parList, - lens = lens + lens = lens, + method_approach = method_approach ) # Construct sparse matrix @@ -406,7 +408,8 @@ ped2mit <- ped2mt <- function(ped, max.gen = 25, resume = resume, save_rate_gen = save_rate_gen, save_rate_parlist = save_rate_parlist, - save_path = save_path + save_path = save_path, + ... ) } @@ -442,7 +445,8 @@ ped2cn <- function(ped, max.gen = 25, sparse = TRUE, verbose = FALSE, resume = resume, save_rate_gen = save_rate_gen, save_rate_parlist = save_rate_parlist, - save_path = save_path + save_path = save_path, + ... ) } #' Take a pedigree and turn it into an extended environmental relatedness matrix @@ -540,8 +544,7 @@ ped2ce <- function(ped, .adjIndexed <- function(ped, component, saveable, resume, save_path, verbose, lastComputed, nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist, - ...) { + parList, lens, save_rate_parlist) { # Loop through each individual in the pedigree # Build the adjacency matrix for parent-child relationships # Is person in column j the parent of the person in row i? .5 for yes, 0 for no. @@ -599,7 +602,7 @@ ped2ce <- function(ped, .adjDirect <- function(ped, component, saveable, resume, save_path, verbose, lastComputed, nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist, + parList, lens, save_rate_parlist,method_approach, ...) { # Loop through each individual in the pedigree # Build the adjacency matrix for parent-child relationships @@ -615,17 +618,19 @@ ped2ce <- function(ped, iss <- c(mIDs$rID, dIDs$rID) jss <- c(mIDs$cID, dIDs$cID) } else if (component %in% c("common nuclear")) { - message("Common Nuclear component is not yet implemented for direct method. Using index method.\n") - # change to warning and call indexed version - list_of_adjacency <- .adjIndexed(ped = ped, component = component, - saveable = saveable, resume = resume, - save_path = save_path, verbose = verbose, - lastComputed = lastComputed, nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, parList = parList, - lens = lens, save_rate_parlist = save_rate_parlist, - ... - ) + # message("Common Nuclear component is not yet implemented for direct method. Using index method.\n") + + list_of_adjacency <- cnmethods(ped=ped,method_approach=method_approach, + component = component, + saveable = saveable, resume = resume, + save_path = save_path, verbose = verbose, + lastComputed = lastComputed, nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, save_rate_parlist = save_rate_parlist, + ...) + return(list_of_adjacency) } else if (component %in% c("mitochondrial")) { mIDs <- stats::na.omit(data.frame(rID = ped$ID, cID = ped$momID)) @@ -653,9 +658,9 @@ ped2ce <- function(ped, compute_parent_adjacency <- function(ped, component, adjacency_method = "direct", saveable, resume, - save_path, verbose, - lastComputed, nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist, + save_path, verbose=FALSE, + lastComputed=0, nr, checkpoint_files, update_rate, + parList, lens, save_rate_parlist,method_approach=NULL, ...) { if (adjacency_method == "loop") { if (lastComputed < nr) { # Original version @@ -711,6 +716,7 @@ compute_parent_adjacency <- function(ped, component, parList = parList, lens = lens, save_rate_parlist = save_rate_parlist, + method_approach = method_approach, ... ) } @@ -745,3 +751,214 @@ isChild <- function(isChild_method, ped) { }) } } + + +cnmethods <- function(ped,component = "common nuclear", + method_approach=NULL, + parList=NULL, + lastComputed=0, + nr=NULL, + lens=NULL, + saveable=FALSE, + resume=FALSE, + save_path=NULL, + verbose=FALSE, + checkpoint_files=NULL, + ...){# 1) Pairwise compare mother IDs + if(method_approach == 1){ + + # gets slow when data are bigger. much slower than indexed + momMatch <- outer(ped$momID, ped$momID, FUN = "==") + momMatch[is.na(momMatch)] <- FALSE + + # 2) Pairwise compare father IDs + dadMatch <- outer(ped$dadID, ped$dadID, FUN = "==") + dadMatch[is.na(dadMatch)] <- FALSE + + # 3) Sibling adjacency if both mom & dad match + adj <- momMatch & dadMatch + + # 4) Extract indices where adj[i,j] is TRUE + w <- which(adj, arr.ind = TRUE) + # iss <- w[, 1] + # jss <- w[, 2] +# + list_of_adjacency <- list( + iss = w[, 1], + jss = w[, 2] + ) + } else if(method_approach == 2){ + # 1) Create a logical mask for known parents + mask <- !is.na(ped$momID) & !is.na(ped$dadID) + + # 2) Create a single string label for each known (momID, dadID) pair + pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) + + # 3) Factor that label => each row with the same (mom,dad) gets the same integer code + # This is "creating a new ID" for each unique parent pair + pairCode <- match(pairLabel, unique(pairLabel)) + + # childVec are the row indices in 'ped' that have known parents + childVec <- which(mask) # length(childVec) = sum(mask) + + # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" + groupList <- split(childVec, pairCode) + + # 5) For each group with >1 children, form pairwise adjacency i->j + iss_list <- list() + jss_list <- list() + counter <- 1 + + for (g in groupList) { + if (length(g) > 1) { + combos <- expand.grid(g, g, KEEP.OUT.ATTRS = FALSE) + combos <- combos[combos[,1] != combos[,2], , drop = FALSE] + iss_list[[counter]] <- combos[,1] + jss_list[[counter]] <- combos[,2] + counter <- counter + 1 + } + } + # iss <- unlist(iss_list, use.names = FALSE) + # jss <- unlist(jss_list, use.names = FALSE) + + list_of_adjacency <- list( + iss = unlist(iss_list, use.names = FALSE), + jss = unlist(jss_list, use.names = FALSE) + ) + } else if(method_approach == 3){ + nr <- nrow(ped) +# terrible + # Define a scalar-checking function: + f_check <- function(i, j) { + # i, j are each single integers + # Return one boolean: do they share both parents? + !is.na(ped$momID[i]) && !is.na(ped$dadID[i]) && + !is.na(ped$momID[j]) && !is.na(ped$dadID[j]) && + (ped$momID[i] == ped$momID[j]) && + (ped$dadID[i] == ped$dadID[j]) + } + + # Vectorize it so outer() will produce an nr x nr matrix + vf_check <- Vectorize(f_check) + + # Now outer() calls vf_check(...) in a way that yields scalar results + adj <- outer(seq_len(nr), seq_len(nr), FUN = vf_check) + + # Extract which cells of adj are TRUE + w <- which(adj, arr.ind = TRUE) + # iss <- w[, 1] + # jss <- w[, 2] + + list_of_adjacency <- list( + iss = iss <- w[, 1], + jss = jss <- w[, 2] + ) +}else if(method_approach == 4){ + + # 1) Create a logical mask for known parents + mask <- !is.na(ped$momID) & !is.na(ped$dadID) + + # 2) Create a single string label for each known (momID, dadID) pair + pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) + + # 3) Factor that label => each row with the same (mom,dad) gets the same integer code + pairCode <- match(pairLabel, unique(pairLabel)) + + # childVec are the row indices in 'ped' that have known parents + childVec <- which(mask) + + # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" + groupList <- split(childVec, pairCode) + + # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j + iss_list <- vector("list", length(groupList)) + jss_list <- vector("list", length(groupList)) + counter <- 1 + + for (g in groupList) { + k <- length(g) + if (k > 1) { + # We'll form all k^2 combos, then remove the diagonal i=j + # Instead of expand.grid, do rep() calls: + + # v = each child repeated k times + # w = entire group repeated once for each child + v <- rep(g, each = k) # row index + w <- rep(g, times = k) # col index + + keep <- (v != w) # remove diagonal where v == w + iss_list[[counter]] <- v[keep] + jss_list[[counter]] <- w[keep] + counter <- counter + 1 + } + } + + list_of_adjacency <- list( + iss = unlist(iss_list, use.names = FALSE), + jss = unlist(jss_list, use.names = FALSE) + ) + + } else if(method_approach == 5){ + # 1) Create a logical mask for known parents + mask <- !is.na(ped$momID) & !is.na(ped$dadID) + + # 2) Create a single string label for each known (momID, dadID) pair + #pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) + base <- max(ped$ID, na.rm = TRUE) + 1L + pairCode <- ped$momID[mask] + base * ped$dadID[mask] + + # 3) Factor that label => each row with the same (mom,dad) gets the same integer code + # pairCode <- match(pairLabel, unique(pairLabel)) + + # childVec are the row indices in 'ped' that have known parents + childVec <- which(mask) + + # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" + groupList <- split(childVec, pairCode) + + # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j + iss_list <- vector("list", length(groupList)) + jss_list <- vector("list", length(groupList)) + counter <- 1 + + for (g in groupList) { + k <- length(g) + if (k > 1) { + # We'll form all k^2 combos, then remove the diagonal i=j + # Instead of expand.grid, do rep() calls: + + # v = each child repeated k times + # w = entire group repeated once for each child + v <- rep(g, each = k) # row index + w <- rep(g, times = k) # col index + + keep <- (v != w) # remove diagonal where v == w + iss_list[[counter]] <- v[keep] + jss_list[[counter]] <- w[keep] + counter <- counter + 1 + } + } + + list_of_adjacency <- list( + iss = unlist(iss_list, use.names = FALSE), + jss = unlist(jss_list, use.names = FALSE) + ) + + }else{ + + list_of_adjacency <- .adjIndexed(ped = ped, component = component, + saveable = saveable, resume = resume, + save_path = save_path, verbose = verbose, + lastComputed = lastComputed, nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, parList = parList, + lens = lens, save_rate_parlist = save_rate_parlist + ) + + } + + + + return(list_of_adjacency) + +} diff --git a/data-raw/benchmark.R b/data-raw/benchmark.R index 536190b1..96d25d67 100644 --- a/data-raw/benchmark.R +++ b/data-raw/benchmark.R @@ -27,6 +27,7 @@ ped2 <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) %>% ) set.seed(1151) kpc <- 8 +Ngen <- 10 ped3 <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) %>% mutate( fam = "fam 3", @@ -35,25 +36,62 @@ ped3 <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) %>% dadID = dadID + 20000, spID = spID + 20000 ) +ped3 <- ped3 %>% + mutate( + fam = "fam 4", + ID = ID + 10000, + momID = momID + 10000, + dadID = dadID + 10000, + spID = spID + 10000 + ) %>% rbind(ped3) + +set.seed(1151) +kpc <- 2 +Ngen <- 10 +ped4 <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) %>% + mutate( + fam = "fam 5", + ID = ID + 40000, + momID = momID + 40000, + dadID = dadID + 40000, + spID = spID + 40000 + ) + + ped <- rbind(ped, ped2) ped <- rbind(ped, ped3) -if(FALSE){ +ped <- rbind(ped, ped4) + +if(TRUE){ # Define parameters -component <- "additive" # Change this to test different components +component <- "common nuclear"#"additive" # Change this to test different components saveable <- FALSE # Disable saving to avoid disk I/O slowing down benchmarking resume <- FALSE # Disable resume to ensure full fresh runs save_path <- "checkpoint/" verbose <- FALSE # Turn off verbose for cleaner output update_rate <- 100 save_rate_parlist <- 1000 - +#method_approach <- 1 # Run benchmarking for "loop" and "indexed" methods in ped2com() benchmark_results <- microbenchmark( - loop = { +# loop_big = { +# ped2com( +# ped = ped, +# component = component, +# adjacency_method = "loop", # Test "loop" method +# saveable = saveable, +# resume = resume, +# save_path = save_path, +# verbose = verbose, +# update_rate = update_rate, +# save_rate_parlist = save_rate_parlist +# ) +# }, + indexed_big = { ped2com( ped = ped, component = component, - adjacency_method = "loop", # Test "loop" method + adjacency_method = "indexed", # Test "indexed" method saveable = saveable, resume = resume, save_path = save_path, @@ -62,10 +100,66 @@ benchmark_results <- microbenchmark( save_rate_parlist = save_rate_parlist ) }, - indexed = { + direct4_big = { ped2com( ped = ped, component = component, + adjacency_method = "direct", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + method_approach = 4, + save_rate_parlist = save_rate_parlist + ) + }, + direct2_big = { + ped2com( + ped = ped, + component = component, + adjacency_method = "direct", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + method_approach = 2, + save_rate_parlist = save_rate_parlist + ) + }, + direct5_big = { + ped2com( + ped = ped, + component = component, + adjacency_method = "direct", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + method_approach = 5, + save_rate_parlist = save_rate_parlist + ) + }, +# loop = { +# ped2com( +# ped = ped2, +# component = component, +# adjacency_method = "loop", # Test "loop" method +# saveable = saveable, +# resume = resume, +# save_path = save_path, +# verbose = verbose, +# update_rate = update_rate, +# save_rate_parlist = save_rate_parlist +# +# ) +# }, + indexed = { + ped2com( + ped = ped2, + component = component, adjacency_method = "indexed", # Test "indexed" method saveable = saveable, resume = resume, @@ -75,14 +169,81 @@ benchmark_results <- microbenchmark( save_rate_parlist = save_rate_parlist ) }, + direct4 = { + ped2com( + ped = ped2, + component = component, + adjacency_method = "direct", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + method_approach = 4, + save_rate_parlist = save_rate_parlist + ) + }, + direct2 = { + ped2com( + ped = ped2, + component = component, + adjacency_method = "direct", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + method_approach = 2, + save_rate_parlist = save_rate_parlist + ) + }, + direct5 = { + ped2com( + ped = ped2, + component = component, + adjacency_method = "direct", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + method_approach = 5, + save_rate_parlist = save_rate_parlist + ) + }, times = 100 # Run each method 100 times ) + summary(benchmark_results) -lm(benchmark_results$time ~ benchmark_results$expr) %>% - summary() +df_plot <- benchmark_results %>% mutate(size = case_when(expr %in% c("loop", "indexed", "direct4", "direct2", "direct5") ~ "small", + expr %in% c("loop_big", "indexed_big", "direct4_big", "direct2_big", "direct5_big") ~ "big"), + method = case_when(expr %in% c("loop", "loop_big") ~ "loop", + expr %in% c("indexed", "indexed_big") ~ "indexed", + expr %in% c("direct4", "direct4_big") ~ "direct4", + expr %in% c("direct2", "direct2_big") ~ "direct2", + expr %in% c("direct5", "direct5_big") ~ "direct5"))# %>% + +# set indexed as reference level +df_plot$method <- factor(df_plot$method, levels = c("indexed", "loop","direct2", "direct4", "direct5")) +df_plot$size <- factor(df_plot$size, levels = c("small", "big")) + + +lm(time ~ method*size,data=df_plot) %>% + summary() %>% print() + + +p<-ggplot(df_plot, aes(x = method, y = time)) + + geom_boxplot(aes(fill = size), alpha = 0.5) + + labs(title = "Benchmarking Results", + x = "Method", + y = "Time (seconds)") + + theme_minimal() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Print benchmark results + +p print(benchmark_results) # Optional: Save results to CSV for later analysis @@ -92,6 +253,8 @@ write.csv(summary(benchmark_results), ) # Print benchmark } + +if(FALSE){ verbose=FALSE ad_ped_matrix <- ped2com(ped, component = "additive", adjacency_method = "direct", sparse = TRUE) mit_ped_matrix <- ped2com(ped, component = "mitochondrial", adjacency_method = "direct", sparse = TRUE) @@ -138,3 +301,4 @@ write.csv(summary(benchmark_results), "benchmark_results.csv", row.names = FALSE ) +} diff --git a/tests/testthat/test-convertPedigree.R b/tests/testthat/test-convertPedigree.R index ec5af8b1..9e18c892 100644 --- a/tests/testthat/test-convertPedigree.R +++ b/tests/testthat/test-convertPedigree.R @@ -297,14 +297,26 @@ test_that("adjacency_method 'indexed', 'loop', and direct produce the same resu test_that("adjacency_method 'indexed', 'loop', and direct produce the same results for common nuclear matrix", { data(hazard) tolerance <- 1e-10 - + method_approach_1 <- 1 + method_approach_2 <- 4 + method_approach_3 <- 5 # common nuclear ped_common_indexed <- ped2com(hazard, component = "common nuclear", adjacency_method = "indexed") ped_common_loop <- ped2com(hazard, component = "common nuclear", adjacency_method = "loop") - ped_common_direct <- ped2com(hazard, component = "common nuclear", adjacency_method = "direct") + ped_common_direct1 <- ped2com(hazard, component = "common nuclear", adjacency_method = "direct",method_approach = method_approach_1) + ped_common_direct2 <- ped2com(hazard, component = "common nuclear", adjacency_method = "direct",method_approach = method_approach_2) + ped_common_direct3 <- ped2com(hazard, component = "common nuclear", adjacency_method = "direct",method_approach = method_approach_3) + expect_equal(ped_common_indexed, ped_common_loop, tolerance = tolerance) - expect_equal(ped_common_loop, ped_common_direct, tolerance = tolerance) - expect_equal(ped_common_indexed, ped_common_direct, tolerance = tolerance) + expect_equal(ped_common_loop, ped_common_direct1, tolerance = tolerance) + expect_equal(ped_common_indexed, ped_common_direct1, tolerance = tolerance) + expect_equal(ped_common_loop, ped_common_direct2, tolerance = tolerance) + expect_equal(ped_common_indexed, ped_common_direct2, tolerance = tolerance) + expect_equal(ped_common_loop, ped_common_direct3, tolerance = tolerance) + expect_equal(ped_common_indexed, ped_common_direct3, tolerance = tolerance) + expect_equal(ped_common_direct1, ped_common_direct2, tolerance = tolerance) + expect_equal(ped_common_direct1, ped_common_direct3, tolerance = tolerance) + expect_equal(ped_common_direct2, ped_common_direct3, tolerance = tolerance) }) From 3293ebdf49b4b995e3c1669a65e94b94ea706627 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Wed, 16 Apr 2025 16:09:41 -0400 Subject: [PATCH 32/35] BENCHMARCKING --- R/convertPedigree.R | 5 +---- data-raw/benchmark.R | 36 ++++++++++++++++++------------------ 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/R/convertPedigree.R b/R/convertPedigree.R index 4b033c82..f099df8a 100644 --- a/R/convertPedigree.R +++ b/R/convertPedigree.R @@ -902,15 +902,12 @@ cnmethods <- function(ped,component = "common nuclear", # 1) Create a logical mask for known parents mask <- !is.na(ped$momID) & !is.na(ped$dadID) - # 2) Create a single string label for each known (momID, dadID) pair + # 2) Create a single hash label for each known (momID, dadID) pair #pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) base <- max(ped$ID, na.rm = TRUE) + 1L pairCode <- ped$momID[mask] + base * ped$dadID[mask] # 3) Factor that label => each row with the same (mom,dad) gets the same integer code - # pairCode <- match(pairLabel, unique(pairLabel)) - - # childVec are the row indices in 'ped' that have known parents childVec <- which(mask) # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" diff --git a/data-raw/benchmark.R b/data-raw/benchmark.R index 96d25d67..4a615af2 100644 --- a/data-raw/benchmark.R +++ b/data-raw/benchmark.R @@ -20,29 +20,29 @@ marR <- .8 ped2 <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) %>% mutate( fam = "fam 2", - ID = ID + 10000, - momID = momID + 10000, - dadID = dadID + 10000, - spID = spID + 10000 + ID = ID + max(ped2$ID, na.rm = TRUE), + momID = momID + max(ped$ID, na.rm = TRUE), + dadID = dadID + max(ped$ID, na.rm = TRUE), + spID = spID + max(ped$ID, na.rm = TRUE) ) set.seed(1151) kpc <- 8 -Ngen <- 10 +Ngen <- 6 ped3 <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) %>% mutate( fam = "fam 3", - ID = ID + 20000, - momID = momID + 20000, - dadID = dadID + 20000, - spID = spID + 20000 + ID = ID + max(ped2$ID, na.rm = TRUE), + momID = momID + max(ped2$ID, na.rm = TRUE), + dadID = dadID + max(ped2$ID, na.rm = TRUE), + spID = spID + max(ped2$ID, na.rm = TRUE) ) ped3 <- ped3 %>% mutate( fam = "fam 4", - ID = ID + 10000, - momID = momID + 10000, - dadID = dadID + 10000, - spID = spID + 10000 + ID = ID + max(ped3$ID, na.rm = TRUE), + momID = momID + max(ped3$ID, na.rm = TRUE), + dadID = dadID + max(ped3$ID, na.rm = TRUE), + spID = spID + max(ped3$ID, na.rm = TRUE) ) %>% rbind(ped3) set.seed(1151) @@ -51,10 +51,10 @@ Ngen <- 10 ped4 <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) %>% mutate( fam = "fam 5", - ID = ID + 40000, - momID = momID + 40000, - dadID = dadID + 40000, - spID = spID + 40000 + ID = ID + max(ped3$ID, na.rm = TRUE), + momID = momID + max(ped3$ID, na.rm = TRUE), + dadID = dadID + max(ped3$ID, na.rm = TRUE), + spID = spID + max(ped3$ID, na.rm = TRUE) ) @@ -211,7 +211,7 @@ benchmark_results <- microbenchmark( save_rate_parlist = save_rate_parlist ) }, - times = 100 # Run each method 100 times + times = 10 # Run each method 100 times ) From 2ae433616e1db4752fb2f9b2c4a53bfdbabb1a86 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Wed, 16 Apr 2025 20:00:26 -0400 Subject: [PATCH 33/35] rolling out a direct(ish) method --- NEWS.md | 1 + R/convertPedigree.R | 120 +++++++++++++++++++------- R/helpPedigree.R | 2 + R/readGedcom.R | 3 +- man/compute_parent_adjacency.Rd | 9 +- man/determineSex.Rd | 4 + man/ped2add.Rd | 2 +- man/ped2cn.Rd | 4 +- man/ped2com.Rd | 5 +- man/ped2mit.Rd | 2 +- man/readGedcom.Rd | 2 + tests/testthat/test-convertPedigree.R | 30 +++---- 12 files changed, 127 insertions(+), 57 deletions(-) diff --git a/NEWS.md b/NEWS.md index fd941969..c0daa9a2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,7 @@ * allow verbose argument to be passed to standardizeColnames * list SimPed and related_coef as aliases for functions * harmonizing function names like calcFamilySize from famSizeCal +* implemented adjBeta function to evaluation alternative build method # BGmisc 1.3.5.1 * Setting the default for the `sparse` argument in `ped2com()` to TRUE diff --git a/R/convertPedigree.R b/R/convertPedigree.R index f099df8a..ae76cab0 100644 --- a/R/convertPedigree.R +++ b/R/convertPedigree.R @@ -17,8 +17,9 @@ #' @param flatten.diag logical. If TRUE, overwrite the diagonal of the final relatedness matrix with ones #' @param standardize.colnames logical. If TRUE, standardize the column names of the pedigree dataset #' @param transpose_method character. The method to use for computing the transpose. Options are "tcrossprod", "crossprod", or "star" -#' @param adjacency_method character. The method to use for computing the adjacency matrix. Options are "loop" or "indexed" +#' @param adjacency_method character. The method to use for computing the adjacency matrix. Options are "loop", "indexed", direct or beta #' @param isChild_method character. The method to use for computing the isChild matrix. Options are "classic" or "partialparent" +#' @param adjBeta_method numeric The method to use for computing the building the adjacency_method matrix when using the "beta" build #' @param ... additional arguments to be passed to \code{\link{ped2com}} #' @details The algorithms and methodologies used in this function are further discussed and exemplified in the vignette titled "examplePedigreeFunctions". For more advanced scenarios and detailed explanations, consult this vignette. #' @export @@ -40,7 +41,7 @@ ped2com <- function(ped, component, save_rate_parlist = 100000 * save_rate, update_rate = 100, save_path = "checkpoint/", - method_approach=NULL, + adjBeta_method=NULL, ...) { #------ # Checkpointing @@ -90,8 +91,8 @@ ped2com <- function(ped, component, if (!transpose_method %in% c("tcrossprod", "crossprod", "star", "tcross.alt.crossprod", "tcross.alt.star")) { stop("Invalid method specified. Choose from 'tcrossprod', 'crossprod', or 'star' or 'tcross.alt.crossprod' or 'tcross.alt.star'.") } - if (!adjacency_method %in% c("indexed", "loop", "direct")) { - stop("Invalid method specified. Choose from 'indexed', 'loop', or 'direct'.") + if (!adjacency_method %in% c("indexed", "loop", "direct", "beta")) { + stop("Invalid method specified. Choose from 'indexed', 'loop', 'direct', or 'beta'.") } # standardize colnames @@ -162,7 +163,7 @@ ped2com <- function(ped, component, nr = nr, parList = parList, lens = lens, - method_approach = method_approach + adjBeta_method = adjBeta_method ) # Construct sparse matrix @@ -425,7 +426,7 @@ ped2cn <- function(ped, max.gen = 25, sparse = TRUE, verbose = FALSE, saveable = FALSE, resume = FALSE, save_rate = 5, - adjacency_method = "indexed", + adjacency_method = "direct", save_rate_gen = save_rate, save_rate_parlist = 1000 * save_rate, save_path = "checkpoint/", @@ -602,7 +603,7 @@ ped2ce <- function(ped, .adjDirect <- function(ped, component, saveable, resume, save_path, verbose, lastComputed, nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist,method_approach, + parList, lens, save_rate_parlist,adjBeta_method, ...) { # Loop through each individual in the pedigree # Build the adjacency matrix for parent-child relationships @@ -620,18 +621,58 @@ ped2ce <- function(ped, } else if (component %in% c("common nuclear")) { # message("Common Nuclear component is not yet implemented for direct method. Using index method.\n") - list_of_adjacency <- cnmethods(ped=ped,method_approach=method_approach, - component = component, - saveable = saveable, resume = resume, - save_path = save_path, verbose = verbose, - lastComputed = lastComputed, nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, save_rate_parlist = save_rate_parlist, - ...) - - return(list_of_adjacency) + # 1) Create a logical mask for only known parents + mask <- !is.na(ped$momID) & !is.na(ped$dadID) + + # 2) Create a single hash label for each known (momID, dadID) pair + base <- max(ped$ID, na.rm = TRUE) + 1L + pairCode <- ped$momID[mask] + base * ped$dadID[mask] + + # 3) Factor that label => each row with the same (mom,dad) gets the same integer code + childVec <- which(mask) + + # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" + groupList <- split(childVec, pairCode) + + # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j + iss_list <- vector("list", length(groupList)) + jss_list <- vector("list", length(groupList)) + counter <- 1 + + for (g in groupList) { + k <- length(g) + if (k > 1) { + # We'll form all k^2 combos, then remove the diagonal i=j + # rep() calls faster than expand.grid + + # v = each child repeated k times + # w = entire group repeated once for each child + v <- rep(g, each = k) # row index + w <- rep(g, times = k) # col index + + keep <- (v != w) # remove diagonal where v == w + iss_list[[counter]] <- v[keep] + jss_list[[counter]] <- w[keep] + counter <- counter + 1 + } + } + + + iss <- unlist(iss_list, use.names = FALSE) + jss <- unlist(jss_list, use.names = FALSE) + + # list_of_adjacency <- .adjBeta(ped=ped,adjBeta_method=adjBeta_method, +# component = component, + # saveable = saveable, resume = resume, + # save_path = save_path, verbose = verbose, + # lastComputed = lastComputed, nr = nr, + # checkpoint_files = checkpoint_files, + # update_rate = update_rate, + # parList = parList, + # lens = lens, save_rate_parlist = save_rate_parlist, + # ...) + + # return(list_of_adjacency) } else if (component %in% c("mitochondrial")) { mIDs <- stats::na.omit(data.frame(rID = ped$ID, cID = ped$momID)) iss <- c(mIDs$rID) @@ -660,7 +701,7 @@ compute_parent_adjacency <- function(ped, component, saveable, resume, save_path, verbose=FALSE, lastComputed=0, nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist,method_approach=NULL, + parList, lens, save_rate_parlist,adjBeta_method=NULL, ...) { if (adjacency_method == "loop") { if (lastComputed < nr) { # Original version @@ -716,12 +757,27 @@ compute_parent_adjacency <- function(ped, component, parList = parList, lens = lens, save_rate_parlist = save_rate_parlist, - method_approach = method_approach, ... ) } + } else if (adjacency_method == "beta") { + list_of_adjacency <- .adjBeta(ped = ped, + adjBeta_method = adjBeta_method, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ...) } else { - stop("Invalid method specified. Choose from 'loop', 'direct', or 'indexed'.") + stop("Invalid method specified. Choose from 'loop', 'direct', 'indexed', or beta") } if (saveable) { saveRDS(parList, file = checkpoint_files$parList) @@ -753,8 +809,8 @@ isChild <- function(isChild_method, ped) { } -cnmethods <- function(ped,component = "common nuclear", - method_approach=NULL, +.adjBeta <- function(ped,component, + adjBeta_method=5, parList=NULL, lastComputed=0, nr=NULL, @@ -763,9 +819,11 @@ cnmethods <- function(ped,component = "common nuclear", resume=FALSE, save_path=NULL, verbose=FALSE, + save_rate_parlist=NULL, + update_rate=NULL, checkpoint_files=NULL, ...){# 1) Pairwise compare mother IDs - if(method_approach == 1){ + if(adjBeta_method == 1){ # gets slow when data are bigger. much slower than indexed momMatch <- outer(ped$momID, ped$momID, FUN = "==") @@ -787,7 +845,7 @@ cnmethods <- function(ped,component = "common nuclear", iss = w[, 1], jss = w[, 2] ) - } else if(method_approach == 2){ + } else if(adjBeta_method == 2){ # 1) Create a logical mask for known parents mask <- !is.na(ped$momID) & !is.na(ped$dadID) @@ -825,7 +883,7 @@ cnmethods <- function(ped,component = "common nuclear", iss = unlist(iss_list, use.names = FALSE), jss = unlist(jss_list, use.names = FALSE) ) - } else if(method_approach == 3){ + } else if(adjBeta_method == 3){ nr <- nrow(ped) # terrible # Define a scalar-checking function: @@ -853,7 +911,7 @@ cnmethods <- function(ped,component = "common nuclear", iss = iss <- w[, 1], jss = jss <- w[, 2] ) -}else if(method_approach == 4){ +}else if(adjBeta_method == 4){ # 1) Create a logical mask for known parents mask <- !is.na(ped$momID) & !is.na(ped$dadID) @@ -898,7 +956,7 @@ cnmethods <- function(ped,component = "common nuclear", jss = unlist(jss_list, use.names = FALSE) ) - } else if(method_approach == 5){ + } else if(adjBeta_method == 5){ # 1) Create a logical mask for known parents mask <- !is.na(ped$momID) & !is.na(ped$dadID) @@ -953,9 +1011,5 @@ cnmethods <- function(ped,component = "common nuclear", ) } - - - return(list_of_adjacency) - } diff --git a/R/helpPedigree.R b/R/helpPedigree.R index f9e7c855..e46d49cf 100644 --- a/R/helpPedigree.R +++ b/R/helpPedigree.R @@ -39,6 +39,8 @@ createGenDataFrame <- function(sizeGens, genIndex, idGen) { #' #' @param idGen Vector of IDs for the generation. #' @param sexR Numeric value indicating the sex ratio (proportion of males). +#' @param recode_male The value to use for males. Default is "M" +#' @param recode_female The value to use for females. Default is "F" #' @return Vector of sexes ("M" for male, "F" for female) for the offspring. #' @importFrom stats runif determineSex <- function(idGen, sexR, code_male = "M", code_female = "F") { diff --git a/R/readGedcom.R b/R/readGedcom.R index 109377e1..29462ad2 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -9,6 +9,7 @@ #' @param verbose A logical value indicating whether to print messages. #' @param skinny A logical value indicating whether to return a skinny data frame. #' @param update_rate numeric. The rate at which to print progress +#' @param post_process A logical value indicating whether to post-process the data frame. #' @param ... Additional arguments to be passed to the function. #' @return A data frame containing information about individuals, with the following potential columns: #' - `id`: ID of the individual @@ -154,7 +155,7 @@ splitIndividuals <- function(lines, verbose = FALSE) { #' @param all_var_names A character vector of variable names. #' @return A named list representing an empty individual record. initializeRecord <- function(all_var_names) { - setNames(as.list(rep(NA_character_, length(all_var_names))), all_var_names) + stats::setNames(as.list(rep(NA_character_, length(all_var_names))), all_var_names) } #' Parse a GEDCOM Individual Block diff --git a/man/compute_parent_adjacency.Rd b/man/compute_parent_adjacency.Rd index 21974673..f6364808 100644 --- a/man/compute_parent_adjacency.Rd +++ b/man/compute_parent_adjacency.Rd @@ -11,14 +11,15 @@ compute_parent_adjacency( saveable, resume, save_path, - verbose, - lastComputed, + verbose = FALSE, + lastComputed = 0, nr, checkpoint_files, update_rate, parList, lens, save_rate_parlist, + adjBeta_method = NULL, ... ) } @@ -27,7 +28,7 @@ compute_parent_adjacency( \item{component}{character. Which component of the pedigree to return. See Details.} -\item{adjacency_method}{character. The method to use for computing the adjacency matrix. Options are "loop" or "indexed"} +\item{adjacency_method}{character. The method to use for computing the adjacency matrix. Options are "loop", "indexed", direct or beta} \item{saveable}{logical. If TRUE, save the intermediate results to disk} @@ -51,6 +52,8 @@ compute_parent_adjacency( \item{save_rate_parlist}{numeric. The rate at which to save the intermediate results by parent list. If NULL, defaults to save_rate*1000} +\item{adjBeta_method}{numeric The method to use for computing the building the adjacency_method matrix when using the "beta" build} + \item{...}{additional arguments to be passed to \code{\link{ped2com}}} } \description{ diff --git a/man/determineSex.Rd b/man/determineSex.Rd index c1c096af..4ea9498a 100644 --- a/man/determineSex.Rd +++ b/man/determineSex.Rd @@ -10,6 +10,10 @@ determineSex(idGen, sexR, code_male = "M", code_female = "F") \item{idGen}{Vector of IDs for the generation.} \item{sexR}{Numeric value indicating the sex ratio (proportion of males).} + +\item{recode_male}{The value to use for males. Default is "M"} + +\item{recode_female}{The value to use for females. Default is "F"} } \value{ Vector of sexes ("M" for male, "F" for female) for the offspring. diff --git a/man/ped2add.Rd b/man/ped2add.Rd index 79803b21..c2179e99 100644 --- a/man/ped2add.Rd +++ b/man/ped2add.Rd @@ -42,7 +42,7 @@ ped2add( \item{transpose_method}{character. The method to use for computing the transpose. Options are "tcrossprod", "crossprod", or "star"} -\item{adjacency_method}{character. The method to use for computing the adjacency matrix. Options are "loop" or "indexed"} +\item{adjacency_method}{character. The method to use for computing the adjacency matrix. Options are "loop", "indexed", direct or beta} \item{saveable}{logical. If TRUE, save the intermediate results to disk} diff --git a/man/ped2cn.Rd b/man/ped2cn.Rd index 82d25902..c738d13b 100644 --- a/man/ped2cn.Rd +++ b/man/ped2cn.Rd @@ -16,7 +16,7 @@ ped2cn( saveable = FALSE, resume = FALSE, save_rate = 5, - adjacency_method = "indexed", + adjacency_method = "direct", save_rate_gen = save_rate, save_rate_parlist = 1000 * save_rate, save_path = "checkpoint/", @@ -48,7 +48,7 @@ ped2cn( \item{save_rate}{numeric. The rate at which to save the intermediate results} -\item{adjacency_method}{character. The method to use for computing the adjacency matrix. Options are "loop" or "indexed"} +\item{adjacency_method}{character. The method to use for computing the adjacency matrix. Options are "loop", "indexed", direct or beta} \item{save_rate_gen}{numeric. The rate at which to save the intermediate results by generation. If NULL, defaults to save_rate} diff --git a/man/ped2com.Rd b/man/ped2com.Rd index c47d5982..58c0fc47 100644 --- a/man/ped2com.Rd +++ b/man/ped2com.Rd @@ -23,6 +23,7 @@ ped2com( save_rate_parlist = 1e+05 * save_rate, update_rate = 100, save_path = "checkpoint/", + adjBeta_method = NULL, ... ) } @@ -47,7 +48,7 @@ ped2com( \item{transpose_method}{character. The method to use for computing the transpose. Options are "tcrossprod", "crossprod", or "star"} -\item{adjacency_method}{character. The method to use for computing the adjacency matrix. Options are "loop" or "indexed"} +\item{adjacency_method}{character. The method to use for computing the adjacency matrix. Options are "loop", "indexed", direct or beta} \item{isChild_method}{character. The method to use for computing the isChild matrix. Options are "classic" or "partialparent"} @@ -65,6 +66,8 @@ ped2com( \item{save_path}{character. The path to save the checkpoint files} +\item{adjBeta_method}{numeric The method to use for computing the building the adjacency_method matrix when using the "beta" build} + \item{...}{additional arguments to be passed to \code{\link{ped2com}}} } \description{ diff --git a/man/ped2mit.Rd b/man/ped2mit.Rd index d11480d1..c19e9ba7 100644 --- a/man/ped2mit.Rd +++ b/man/ped2mit.Rd @@ -43,7 +43,7 @@ ped2mit( \item{transpose_method}{character. The method to use for computing the transpose. Options are "tcrossprod", "crossprod", or "star"} -\item{adjacency_method}{character. The method to use for computing the adjacency matrix. Options are "loop" or "indexed"} +\item{adjacency_method}{character. The method to use for computing the adjacency matrix. Options are "loop", "indexed", direct or beta} \item{saveable}{logical. If TRUE, save the intermediate results to disk} diff --git a/man/readGedcom.Rd b/man/readGedcom.Rd index e345d587..d9701427 100644 --- a/man/readGedcom.Rd +++ b/man/readGedcom.Rd @@ -57,6 +57,8 @@ readgedcom( \item{update_rate}{numeric. The rate at which to print progress} +\item{post_process}{A logical value indicating whether to post-process the data frame.} + \item{...}{Additional arguments to be passed to the function.} } \value{ diff --git a/tests/testthat/test-convertPedigree.R b/tests/testthat/test-convertPedigree.R index 9e18c892..a4b747e1 100644 --- a/tests/testthat/test-convertPedigree.R +++ b/tests/testthat/test-convertPedigree.R @@ -297,26 +297,26 @@ test_that("adjacency_method 'indexed', 'loop', and direct produce the same resu test_that("adjacency_method 'indexed', 'loop', and direct produce the same results for common nuclear matrix", { data(hazard) tolerance <- 1e-10 - method_approach_1 <- 1 - method_approach_2 <- 4 - method_approach_3 <- 5 + adjBeta_method_1 <- 1 + adjBeta_method_2 <- 4 + adjBeta_method_3 <- 5 # common nuclear ped_common_indexed <- ped2com(hazard, component = "common nuclear", adjacency_method = "indexed") ped_common_loop <- ped2com(hazard, component = "common nuclear", adjacency_method = "loop") - ped_common_direct1 <- ped2com(hazard, component = "common nuclear", adjacency_method = "direct",method_approach = method_approach_1) - ped_common_direct2 <- ped2com(hazard, component = "common nuclear", adjacency_method = "direct",method_approach = method_approach_2) - ped_common_direct3 <- ped2com(hazard, component = "common nuclear", adjacency_method = "direct",method_approach = method_approach_3) + ped_common_direct <- ped2com(hazard, component = "common nuclear", adjacency_method = "direct") + ped_common_adjBeta_1 <- ped2com(hazard, component = "common nuclear", adjacency_method = "beta",adjBeta_method = adjBeta_method_2) + ped_common_adjBeta_2 <- ped2com(hazard, component = "common nuclear", adjacency_method = "beta",adjBeta_method = adjBeta_method_3) expect_equal(ped_common_indexed, ped_common_loop, tolerance = tolerance) - expect_equal(ped_common_loop, ped_common_direct1, tolerance = tolerance) - expect_equal(ped_common_indexed, ped_common_direct1, tolerance = tolerance) - expect_equal(ped_common_loop, ped_common_direct2, tolerance = tolerance) - expect_equal(ped_common_indexed, ped_common_direct2, tolerance = tolerance) - expect_equal(ped_common_loop, ped_common_direct3, tolerance = tolerance) - expect_equal(ped_common_indexed, ped_common_direct3, tolerance = tolerance) - expect_equal(ped_common_direct1, ped_common_direct2, tolerance = tolerance) - expect_equal(ped_common_direct1, ped_common_direct3, tolerance = tolerance) - expect_equal(ped_common_direct2, ped_common_direct3, tolerance = tolerance) + expect_equal(ped_common_loop, ped_common_direct, tolerance = tolerance) + expect_equal(ped_common_indexed, ped_common_direct, tolerance = tolerance) + expect_equal(ped_common_loop, ped_common_adjBeta_1, tolerance = tolerance) + expect_equal(ped_common_indexed, ped_common_adjBeta_1, tolerance = tolerance) + expect_equal(ped_common_loop, ped_common_adjBeta_2, tolerance = tolerance) + expect_equal(ped_common_indexed, ped_common_adjBeta_2, tolerance = tolerance) + expect_equal(ped_common_direct, ped_common_adjBeta_1, tolerance = tolerance) + expect_equal(ped_common_direct, ped_common_adjBeta_2, tolerance = tolerance) + expect_equal(ped_common_adjBeta_1, ped_common_adjBeta_2, tolerance = tolerance) }) From 76ebddc133ff4041b228194e8e8e8a95a5b82ed6 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Wed, 16 Apr 2025 20:06:45 -0400 Subject: [PATCH 34/35] styler --- R/checkParents.R | 12 +- R/convertPedigree.R | 293 ++++++++-------- R/makeLinks.R | 407 ++++++++++++----------- R/makeLinkslegacy.R | 1 - R/readGedcom.R | 105 +++--- R/readGedcomlegacy.R | 70 ++-- R/simulatePedigree.R | 12 +- data-raw/benchged.R | 4 +- data-raw/benchmark.R | 462 +++++++++++++------------- tests/testthat/test-convertPedigree.R | 4 +- tests/testthat/test-makeLinks.R | 16 +- tests/testthat/test-plotPedigree.R | 6 +- tests/testthat/test-readPedigrees.R | 1 - tests/testthat/test-readWikiTree.R | 1 - vignettes/ASOIAF.Rmd | 2 - vignettes/partial.Rmd | 104 +++--- vignettes/partial.html | 104 +++--- 17 files changed, 829 insertions(+), 775 deletions(-) diff --git a/R/checkParents.R b/R/checkParents.R index 7d142af1..43c8a924 100644 --- a/R/checkParents.R +++ b/R/checkParents.R @@ -145,7 +145,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, # Are any parents in both momID and dadID? momdad <- intersect(ped$dadID, ped$momID) - if (length(momdad) > 0&& !is.na(momdad)) { + if (length(momdad) > 0 && !is.na(momdad)) { validation_results$parents_in_both <- momdad if (verbose) { cat(paste( @@ -185,7 +185,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, - if (length(validation_results$female_var) > 0 && !is.na(validation_results$female_var)){ + if (length(validation_results$female_var) > 0 && !is.na(validation_results$female_var)) { corrected_moms <- ped$ID[mom_indices[!is.na(mom_indices)]] ped$sex[mom_indices[!is.na(mom_indices)]] <- validation_results$female_var changes$corrected_mom_sex <- corrected_moms @@ -200,9 +200,8 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, if (verbose && length(corrected_moms) > 0) { cat("Corrected sex of moms for:", paste(corrected_moms, collapse = ", "), "\n") } - } - if (length(validation_results$male_var) > 0 && !is.na(validation_results$male_var)){ + if (length(validation_results$male_var) > 0 && !is.na(validation_results$male_var)) { corrected_dads <- ped$ID[dad_indices[!is.na(dad_indices)]] ped$sex[dad_indices[!is.na(dad_indices)]] <- validation_results$male_var changes$corrected_dad_sex <- corrected_dads @@ -216,7 +215,6 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, if (verbose && length(corrected_dads) > 0) { cat("Corrected sex of dads for:", paste(corrected_dads, collapse = ", "), "\n") } - } } } @@ -235,7 +233,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, new_entry$ID <- new_id new_entry$dadID <- NA new_entry$momID <- NA - new_entry$sex <- if(length(validation_results$male_var) > 0 && !is.na(validation_results$male_var)) validation_results$male_var else 1 + new_entry$sex <- if (length(validation_results$male_var) > 0 && !is.na(validation_results$male_var)) validation_results$male_var else 1 new_entries <- rbind(new_entries, new_entry) } @@ -248,7 +246,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, new_entry$ID <- new_id new_entry$dadID <- NA new_entry$momID <- NA - new_entry$sex <- if(length(validation_results$female_var) > 0 && !is.na(validation_results$female_var)) validation_results$female_var else 0 + new_entry$sex <- if (length(validation_results$female_var) > 0 && !is.na(validation_results$female_var)) validation_results$female_var else 0 new_entries <- rbind(new_entries, new_entry) } diff --git a/R/convertPedigree.R b/R/convertPedigree.R index ae76cab0..86e9970a 100644 --- a/R/convertPedigree.R +++ b/R/convertPedigree.R @@ -41,7 +41,7 @@ ped2com <- function(ped, component, save_rate_parlist = 100000 * save_rate, update_rate = 100, save_path = "checkpoint/", - adjBeta_method=NULL, + adjBeta_method = NULL, ...) { #------ # Checkpointing @@ -447,7 +447,7 @@ ped2cn <- function(ped, max.gen = 25, sparse = TRUE, verbose = FALSE, save_rate_gen = save_rate_gen, save_rate_parlist = save_rate_parlist, save_path = save_path, - ... + ... ) } #' Take a pedigree and turn it into an extended environmental relatedness matrix @@ -603,7 +603,7 @@ ped2ce <- function(ped, .adjDirect <- function(ped, component, saveable, resume, save_path, verbose, lastComputed, nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist,adjBeta_method, + parList, lens, save_rate_parlist, adjBeta_method, ...) { # Loop through each individual in the pedigree # Build the adjacency matrix for parent-child relationships @@ -619,7 +619,7 @@ ped2ce <- function(ped, iss <- c(mIDs$rID, dIDs$rID) jss <- c(mIDs$cID, dIDs$cID) } else if (component %in% c("common nuclear")) { - # message("Common Nuclear component is not yet implemented for direct method. Using index method.\n") + # message("Common Nuclear component is not yet implemented for direct method. Using index method.\n") # 1) Create a logical mask for only known parents mask <- !is.na(ped$momID) & !is.na(ped$dadID) @@ -637,7 +637,7 @@ ped2ce <- function(ped, # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j iss_list <- vector("list", length(groupList)) jss_list <- vector("list", length(groupList)) - counter <- 1 + counter <- 1 for (g in groupList) { k <- length(g) @@ -647,10 +647,10 @@ ped2ce <- function(ped, # v = each child repeated k times # w = entire group repeated once for each child - v <- rep(g, each = k) # row index - w <- rep(g, times = k) # col index + v <- rep(g, each = k) # row index + w <- rep(g, times = k) # col index - keep <- (v != w) # remove diagonal where v == w + keep <- (v != w) # remove diagonal where v == w iss_list[[counter]] <- v[keep] jss_list[[counter]] <- w[keep] counter <- counter + 1 @@ -658,21 +658,21 @@ ped2ce <- function(ped, } - iss <- unlist(iss_list, use.names = FALSE) - jss <- unlist(jss_list, use.names = FALSE) + iss <- unlist(iss_list, use.names = FALSE) + jss <- unlist(jss_list, use.names = FALSE) - # list_of_adjacency <- .adjBeta(ped=ped,adjBeta_method=adjBeta_method, -# component = component, - # saveable = saveable, resume = resume, - # save_path = save_path, verbose = verbose, - # lastComputed = lastComputed, nr = nr, - # checkpoint_files = checkpoint_files, - # update_rate = update_rate, - # parList = parList, - # lens = lens, save_rate_parlist = save_rate_parlist, - # ...) + # list_of_adjacency <- .adjBeta(ped=ped,adjBeta_method=adjBeta_method, + # component = component, + # saveable = saveable, resume = resume, + # save_path = save_path, verbose = verbose, + # lastComputed = lastComputed, nr = nr, + # checkpoint_files = checkpoint_files, + # update_rate = update_rate, + # parList = parList, + # lens = lens, save_rate_parlist = save_rate_parlist, + # ...) - # return(list_of_adjacency) + # return(list_of_adjacency) } else if (component %in% c("mitochondrial")) { mIDs <- stats::na.omit(data.frame(rID = ped$ID, cID = ped$momID)) iss <- c(mIDs$rID) @@ -699,9 +699,9 @@ ped2ce <- function(ped, compute_parent_adjacency <- function(ped, component, adjacency_method = "direct", saveable, resume, - save_path, verbose=FALSE, - lastComputed=0, nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist,adjBeta_method=NULL, + save_path, verbose = FALSE, + lastComputed = 0, nr, checkpoint_files, update_rate, + parList, lens, save_rate_parlist, adjBeta_method = NULL, ...) { if (adjacency_method == "loop") { if (lastComputed < nr) { # Original version @@ -761,21 +761,23 @@ compute_parent_adjacency <- function(ped, component, ) } } else if (adjacency_method == "beta") { - list_of_adjacency <- .adjBeta(ped = ped, - adjBeta_method = adjBeta_method, - component = component, - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - lastComputed = lastComputed, - nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, - save_rate_parlist = save_rate_parlist, - ...) + list_of_adjacency <- .adjBeta( + ped = ped, + adjBeta_method = adjBeta_method, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ... + ) } else { stop("Invalid method specified. Choose from 'loop', 'direct', 'indexed', or beta") } @@ -809,22 +811,21 @@ isChild <- function(isChild_method, ped) { } -.adjBeta <- function(ped,component, - adjBeta_method=5, - parList=NULL, - lastComputed=0, - nr=NULL, - lens=NULL, - saveable=FALSE, - resume=FALSE, - save_path=NULL, - verbose=FALSE, - save_rate_parlist=NULL, - update_rate=NULL, - checkpoint_files=NULL, - ...){# 1) Pairwise compare mother IDs - if(adjBeta_method == 1){ - +.adjBeta <- function(ped, component, + adjBeta_method = 5, + parList = NULL, + lastComputed = 0, + nr = NULL, + lens = NULL, + saveable = FALSE, + resume = FALSE, + save_path = NULL, + verbose = FALSE, + save_rate_parlist = NULL, + update_rate = NULL, + checkpoint_files = NULL, + ...) { # 1) Pairwise compare mother IDs + if (adjBeta_method == 1) { # gets slow when data are bigger. much slower than indexed momMatch <- outer(ped$momID, ped$momID, FUN = "==") momMatch[is.na(momMatch)] <- FALSE @@ -837,15 +838,15 @@ isChild <- function(isChild_method, ped) { adj <- momMatch & dadMatch # 4) Extract indices where adj[i,j] is TRUE - w <- which(adj, arr.ind = TRUE) - # iss <- w[, 1] - # jss <- w[, 2] -# + w <- which(adj, arr.ind = TRUE) + # iss <- w[, 1] + # jss <- w[, 2] + # list_of_adjacency <- list( iss = w[, 1], jss = w[, 2] ) - } else if(adjBeta_method == 2){ + } else if (adjBeta_method == 2) { # 1) Create a logical mask for known parents mask <- !is.na(ped$momID) & !is.na(ped$dadID) @@ -857,7 +858,7 @@ isChild <- function(isChild_method, ped) { pairCode <- match(pairLabel, unique(pairLabel)) # childVec are the row indices in 'ped' that have known parents - childVec <- which(mask) # length(childVec) = sum(mask) + childVec <- which(mask) # length(childVec) = sum(mask) # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" groupList <- split(childVec, pairCode) @@ -865,27 +866,27 @@ isChild <- function(isChild_method, ped) { # 5) For each group with >1 children, form pairwise adjacency i->j iss_list <- list() jss_list <- list() - counter <- 1 + counter <- 1 for (g in groupList) { if (length(g) > 1) { combos <- expand.grid(g, g, KEEP.OUT.ATTRS = FALSE) - combos <- combos[combos[,1] != combos[,2], , drop = FALSE] - iss_list[[counter]] <- combos[,1] - jss_list[[counter]] <- combos[,2] + combos <- combos[combos[, 1] != combos[, 2], , drop = FALSE] + iss_list[[counter]] <- combos[, 1] + jss_list[[counter]] <- combos[, 2] counter <- counter + 1 } } - # iss <- unlist(iss_list, use.names = FALSE) - # jss <- unlist(jss_list, use.names = FALSE) + # iss <- unlist(iss_list, use.names = FALSE) + # jss <- unlist(jss_list, use.names = FALSE) list_of_adjacency <- list( iss = unlist(iss_list, use.names = FALSE), jss = unlist(jss_list, use.names = FALSE) ) - } else if(adjBeta_method == 3){ + } else if (adjBeta_method == 3) { nr <- nrow(ped) -# terrible + # terrible # Define a scalar-checking function: f_check <- function(i, j) { # i, j are each single integers @@ -903,105 +904,102 @@ isChild <- function(isChild_method, ped) { adj <- outer(seq_len(nr), seq_len(nr), FUN = vf_check) # Extract which cells of adj are TRUE - w <- which(adj, arr.ind = TRUE) - # iss <- w[, 1] - # jss <- w[, 2] + w <- which(adj, arr.ind = TRUE) + # iss <- w[, 1] + # jss <- w[, 2] list_of_adjacency <- list( iss = iss <- w[, 1], jss = jss <- w[, 2] ) -}else if(adjBeta_method == 4){ - - # 1) Create a logical mask for known parents - mask <- !is.na(ped$momID) & !is.na(ped$dadID) + } else if (adjBeta_method == 4) { + # 1) Create a logical mask for known parents + mask <- !is.na(ped$momID) & !is.na(ped$dadID) - # 2) Create a single string label for each known (momID, dadID) pair - pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) + # 2) Create a single string label for each known (momID, dadID) pair + pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) - # 3) Factor that label => each row with the same (mom,dad) gets the same integer code - pairCode <- match(pairLabel, unique(pairLabel)) + # 3) Factor that label => each row with the same (mom,dad) gets the same integer code + pairCode <- match(pairLabel, unique(pairLabel)) - # childVec are the row indices in 'ped' that have known parents - childVec <- which(mask) + # childVec are the row indices in 'ped' that have known parents + childVec <- which(mask) - # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" - groupList <- split(childVec, pairCode) + # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" + groupList <- split(childVec, pairCode) - # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j - iss_list <- vector("list", length(groupList)) - jss_list <- vector("list", length(groupList)) - counter <- 1 + # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j + iss_list <- vector("list", length(groupList)) + jss_list <- vector("list", length(groupList)) + counter <- 1 - for (g in groupList) { - k <- length(g) - if (k > 1) { - # We'll form all k^2 combos, then remove the diagonal i=j - # Instead of expand.grid, do rep() calls: + for (g in groupList) { + k <- length(g) + if (k > 1) { + # We'll form all k^2 combos, then remove the diagonal i=j + # Instead of expand.grid, do rep() calls: - # v = each child repeated k times - # w = entire group repeated once for each child - v <- rep(g, each = k) # row index - w <- rep(g, times = k) # col index + # v = each child repeated k times + # w = entire group repeated once for each child + v <- rep(g, each = k) # row index + w <- rep(g, times = k) # col index - keep <- (v != w) # remove diagonal where v == w - iss_list[[counter]] <- v[keep] - jss_list[[counter]] <- w[keep] - counter <- counter + 1 + keep <- (v != w) # remove diagonal where v == w + iss_list[[counter]] <- v[keep] + jss_list[[counter]] <- w[keep] + counter <- counter + 1 + } } - } - list_of_adjacency <- list( - iss = unlist(iss_list, use.names = FALSE), - jss = unlist(jss_list, use.names = FALSE) - ) - - } else if(adjBeta_method == 5){ + list_of_adjacency <- list( + iss = unlist(iss_list, use.names = FALSE), + jss = unlist(jss_list, use.names = FALSE) + ) + } else if (adjBeta_method == 5) { # 1) Create a logical mask for known parents mask <- !is.na(ped$momID) & !is.na(ped$dadID) - # 2) Create a single hash label for each known (momID, dadID) pair - #pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) - base <- max(ped$ID, na.rm = TRUE) + 1L - pairCode <- ped$momID[mask] + base * ped$dadID[mask] - - # 3) Factor that label => each row with the same (mom,dad) gets the same integer code - childVec <- which(mask) - - # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" - groupList <- split(childVec, pairCode) - - # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j - iss_list <- vector("list", length(groupList)) - jss_list <- vector("list", length(groupList)) - counter <- 1 - - for (g in groupList) { - k <- length(g) - if (k > 1) { - # We'll form all k^2 combos, then remove the diagonal i=j - # Instead of expand.grid, do rep() calls: - - # v = each child repeated k times - # w = entire group repeated once for each child - v <- rep(g, each = k) # row index - w <- rep(g, times = k) # col index - - keep <- (v != w) # remove diagonal where v == w - iss_list[[counter]] <- v[keep] - jss_list[[counter]] <- w[keep] - counter <- counter + 1 - } - } + # 2) Create a single hash label for each known (momID, dadID) pair + # pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) + base <- max(ped$ID, na.rm = TRUE) + 1L + pairCode <- ped$momID[mask] + base * ped$dadID[mask] - list_of_adjacency <- list( - iss = unlist(iss_list, use.names = FALSE), - jss = unlist(jss_list, use.names = FALSE) - ) + # 3) Factor that label => each row with the same (mom,dad) gets the same integer code + childVec <- which(mask) + + # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" + groupList <- split(childVec, pairCode) + + # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j + iss_list <- vector("list", length(groupList)) + jss_list <- vector("list", length(groupList)) + counter <- 1 - }else{ + for (g in groupList) { + k <- length(g) + if (k > 1) { + # We'll form all k^2 combos, then remove the diagonal i=j + # Instead of expand.grid, do rep() calls: - list_of_adjacency <- .adjIndexed(ped = ped, component = component, + # v = each child repeated k times + # w = entire group repeated once for each child + v <- rep(g, each = k) # row index + w <- rep(g, times = k) # col index + + keep <- (v != w) # remove diagonal where v == w + iss_list[[counter]] <- v[keep] + jss_list[[counter]] <- w[keep] + counter <- counter + 1 + } + } + + list_of_adjacency <- list( + iss = unlist(iss_list, use.names = FALSE), + jss = unlist(jss_list, use.names = FALSE) + ) + } else { + list_of_adjacency <- .adjIndexed( + ped = ped, component = component, saveable = saveable, resume = resume, save_path = save_path, verbose = verbose, lastComputed = lastComputed, nr = nr, @@ -1009,7 +1007,6 @@ isChild <- function(isChild_method, ped) { update_rate = update_rate, parList = parList, lens = lens, save_rate_parlist = save_rate_parlist ) - } return(list_of_adjacency) } diff --git a/R/makeLinks.R b/R/makeLinks.R index 46e7d743..37c44274 100644 --- a/R/makeLinks.R +++ b/R/makeLinks.R @@ -40,7 +40,7 @@ com2links <- function( legacy = FALSE, outcome_name = "data", drop_upper_triangular = TRUE, - include_all_links_1ped=FALSE, + include_all_links_1ped = FALSE, ...) { # --- Input Validations and Preprocessing --- @@ -108,118 +108,118 @@ com2links <- function( } switch(matrix_case, - "ad" = process_one( - matrix = ad_ped_matrix, - rel_name = "addRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - include_all_links = include_all_links_1ped, - ... - ), - "mt" = process_one( - matrix = mit_ped_matrix, - rel_name = "mitRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - include_all_links = include_all_links_1ped, - ... - ), - "cn" = process_one( - matrix = cn_ped_matrix, - rel_name = "cnuRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - include_all_links = include_all_links_1ped, - ... - ), - "ad-mt" = process_two( - matrix1 = ad_ped_matrix, - name1 = "addRel", - matrix2 = mit_ped_matrix, - name2 = "mitRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "ad-cn" = process_two( - matrix1 = ad_ped_matrix, - name1 = "addRel", - matrix2 = cn_ped_matrix, - name2 = "cnuRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "cn-mt" = process_two( - matrix1 = cn_ped_matrix, - name1 = "cnuRel", - matrix2 = mit_ped_matrix, - name2 = "mitRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - "ad-cn-mt" = process_all_three( - mat1 = ad_ped_matrix, - name1 = "addRel", - mat2 = mit_ped_matrix, - name2 = "mitRel", - mat3 = cn_ped_matrix, - name3 = "cnuRel", - ids = ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc, - ... - ), - stop("Unsupported matrix combination") + "ad" = process_one( + matrix = ad_ped_matrix, + rel_name = "addRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + include_all_links = include_all_links_1ped, + ... + ), + "mt" = process_one( + matrix = mit_ped_matrix, + rel_name = "mitRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + include_all_links = include_all_links_1ped, + ... + ), + "cn" = process_one( + matrix = cn_ped_matrix, + rel_name = "cnuRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + include_all_links = include_all_links_1ped, + ... + ), + "ad-mt" = process_two( + matrix1 = ad_ped_matrix, + name1 = "addRel", + matrix2 = mit_ped_matrix, + name2 = "mitRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "ad-cn" = process_two( + matrix1 = ad_ped_matrix, + name1 = "addRel", + matrix2 = cn_ped_matrix, + name2 = "cnuRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "cn-mt" = process_two( + matrix1 = cn_ped_matrix, + name1 = "cnuRel", + matrix2 = mit_ped_matrix, + name2 = "mitRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + "ad-cn-mt" = process_all_three( + mat1 = ad_ped_matrix, + name1 = "addRel", + mat2 = mit_ped_matrix, + name2 = "mitRel", + mat3 = cn_ped_matrix, + name3 = "cnuRel", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc, + ... + ), + stop("Unsupported matrix combination") ) } #' Convert Sparse Relationship Matrices to Kinship Links for one Matrix @@ -231,112 +231,111 @@ com2links <- function( process_one <- function(matrix, rel_name, ids, nc, rel_pairs_file, writetodisk, write_buffer_size, drop_upper_triangular, update_rate, verbose, gc, - include_all_links=TRUE, ...) { + include_all_links = TRUE, ...) { if (include_all_links == FALSE) { - # Extract pointers and indices from the matrix. - newColPos <- matrix@p + 1L - iss <- matrix@i + 1L - x <- matrix@x - - # Initialize the related pairs file with headers. - df_relpairs <- initialize_empty_df(relNames = rel_name) - - if (writetodisk == TRUE) { - utils::write.table( - df_relpairs, - file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE - ) - - # Prepare an empty buffer for batching writes. - write_buffer <- list() - remove(df_relpairs) - } + # Extract pointers and indices from the matrix. + newColPos <- matrix@p + 1L + iss <- matrix@i + 1L + x <- matrix@x + + # Initialize the related pairs file with headers. + df_relpairs <- initialize_empty_df(relNames = rel_name) + + if (writetodisk == TRUE) { + utils::write.table( + df_relpairs, + file = rel_pairs_file, sep = ",", append = FALSE, row.names = FALSE + ) - # Process each column in the matrix. - for (j in 1L:nc) { + # Prepare an empty buffer for batching writes. + write_buffer <- list() + remove(df_relpairs) + } + # Process each column in the matrix. + for (j in 1L:nc) { ID2 <- ids[j] - # Extract column indices - ncp <- newColPos[j] - ncpp <- newColPos[j + 1L] - cond <- ncp < ncpp - if (cond) { - vv <- ncp:(ncpp - 1L) - issvv <- iss[vv] - } + # Extract column indices + ncp <- newColPos[j] + ncpp <- newColPos[j + 1L] + cond <- ncp < ncpp + if (cond) { + vv <- ncp:(ncpp - 1L) + issvv <- iss[vv] + } - # Create a unique set of row indices. - u <- sort(issvv) + # Create a unique set of row indices. + u <- sort(issvv) - # If any relationships exist for this individual, build the related pairs. - if (cond) { + # If any relationships exist for this individual, build the related pairs. + if (cond) { # Create a data frame with unique pairs. ID1 <- ids[u] tds <- data.frame(ID1 = ID1, ID2 = ID2) tds[[rel_name]] <- 0 - if (cond) { - tds[u %in% issvv, rel_name] <- x[vv] - } - if (drop_upper_triangular == TRUE) { - tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle - } - - # Write the batch to disk or accumulate in the data frame. - if (nrow(tds) > 0) { - if (writetodisk == TRUE) { - write_buffer[[length(write_buffer) + 1]] <- tds + if (cond) { + tds[u %in% issvv, rel_name] <- x[vv] + } + if (drop_upper_triangular == TRUE) { + tds <- tds[tds$ID1 <= tds$ID2, ] # or < if you want strictly lower triangle + } - if (length(write_buffer) >= write_buffer_size) { # Write in batches - utils::write.table(do.call(rbind, write_buffer), - file = rel_pairs_file, - row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," - ) - write_buffer <- list() + # Write the batch to disk or accumulate in the data frame. + if (nrow(tds) > 0) { + if (writetodisk == TRUE) { + write_buffer[[length(write_buffer) + 1]] <- tds + + if (length(write_buffer) >= write_buffer_size) { # Write in batches + utils::write.table(do.call(rbind, write_buffer), + file = rel_pairs_file, + row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," + ) + write_buffer <- list() + } + } else { + df_relpairs <- rbind(df_relpairs, tds) } - } else { - df_relpairs <- rbind(df_relpairs, tds) } } + if (verbose && (j %% update_rate == 0L)) { + cat("Done with", j, "of", nc, "\n") + } } - if (verbose && (j %% update_rate == 0L)) { - cat("Done with", j, "of", nc, "\n") + # If not writing to disk, return the accumulated data frame. + if (writetodisk == FALSE) { + return(df_relpairs) + } else { + # Write any remaining buffered rows. + if (length(write_buffer) > 0) { + utils::write.table(do.call(rbind, write_buffer), + file = rel_pairs_file, + row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," + ) + } } - } - # If not writing to disk, return the accumulated data frame. - if (writetodisk == FALSE) { - return(df_relpairs) - } else { - # Write any remaining buffered rows. - if (length(write_buffer) > 0) { - utils::write.table(do.call(rbind, write_buffer), - file = rel_pairs_file, - row.names = FALSE, col.names = FALSE, append = TRUE, sep = "," - ) + if (gc == TRUE) { + remove(newColPos, iss, x) } - } - if (gc == TRUE) { - remove(newColPos, iss, x) - } - }else{ - matrix2= matrix(rep(1,length(ids)^2), - nrow = length(ids), - dimnames = list(ids, ids)) - process_two(matrix2=matrix, name2=rel_name, - matrix1=methods::as(matrix2,"CsparseMatrix"), - name1="phantom", - ids=ids, - nc = nc, - rel_pairs_file = rel_pairs_file, - writetodisk = writetodisk, - write_buffer_size = write_buffer_size, - drop_upper_triangular = drop_upper_triangular, - update_rate = update_rate, - verbose = verbose, - gc = gc) - - - + } else { + matrix2 <- matrix(rep(1, length(ids)^2), + nrow = length(ids), + dimnames = list(ids, ids) + ) + process_two( + matrix2 = matrix, name2 = rel_name, + matrix1 = methods::as(matrix2, "CsparseMatrix"), + name1 = "phantom", + ids = ids, + nc = nc, + rel_pairs_file = rel_pairs_file, + writetodisk = writetodisk, + write_buffer_size = write_buffer_size, + drop_upper_triangular = drop_upper_triangular, + update_rate = update_rate, + verbose = verbose, + gc = gc + ) } } @@ -553,8 +552,10 @@ process_two <- function( #' #' @return The validated and converted matrix. validate_and_convert_matrix <- function(mat, name, ensure_symmetric = FALSE, force_binary = FALSE) { - if (!inherits(mat, c("matrix", "dgCMatrix", "dsCMatrix","generalMatrix", - "symmetricMatrix", "triangularMatrix", "dsyMatrix", "dspMatrix", "dsyMatrix",'CsparseMatrix'))) { + if (!inherits(mat, c( + "matrix", "dgCMatrix", "dsCMatrix", "generalMatrix", + "symmetricMatrix", "triangularMatrix", "dsyMatrix", "dspMatrix", "dsyMatrix", "CsparseMatrix" + ))) { stop(paste0("The '", name, "' must be a matrix or generalMatrix")) } if (!inherits(mat, "generalMatrix")) { diff --git a/R/makeLinkslegacy.R b/R/makeLinkslegacy.R index 27a6e45a..23265d05 100644 --- a/R/makeLinkslegacy.R +++ b/R/makeLinkslegacy.R @@ -628,4 +628,3 @@ com2links.og <- function( } return(NULL) } - diff --git a/R/readGedcom.R b/R/readGedcom.R index 29462ad2..7181fac4 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -57,7 +57,6 @@ readGedcom <- function(file_path, update_rate = 1000, post_process = TRUE, ...) { - # Ensure the file exists and read all lines. if (!file.exists(file_path)) { stop("File does not exist: ", file_path) @@ -73,15 +72,19 @@ readGedcom <- function(file_path, # List of variables to initialize all_var_names <- unlist(list( identifiers = c("id", "momID", "dadID"), - names = c("name", "name_given", "name_given_pieces", "name_surn", "name_surn_pieces", "name_marriedsurn", - "name_nick", "name_npfx", "name_nsfx"), - sex = c("sex"), - birth = c("birth_date", "birth_lat", "birth_long", "birth_place"), - death = c("death_caus", "death_date", "death_lat", "death_long", "death_place"), - attributes = c("attribute_caste", "attribute_children", "attribute_description", "attribute_education", - "attribute_idnumber", "attribute_marriages", "attribute_nationality", "attribute_occupation", - "attribute_property", "attribute_religion", "attribute_residence", "attribute_ssn", - "attribute_title"), + names = c( + "name", "name_given", "name_given_pieces", "name_surn", "name_surn_pieces", "name_marriedsurn", + "name_nick", "name_npfx", "name_nsfx" + ), + sex = c("sex"), + birth = c("birth_date", "birth_lat", "birth_long", "birth_place"), + death = c("death_caus", "death_date", "death_lat", "death_long", "death_place"), + attributes = c( + "attribute_caste", "attribute_children", "attribute_description", "attribute_education", + "attribute_idnumber", "attribute_marriages", "attribute_nationality", "attribute_occupation", + "attribute_property", "attribute_religion", "attribute_residence", "attribute_ssn", + "attribute_title" + ), relationships = c("FAMC", "FAMS") ), use.names = FALSE) @@ -90,8 +93,9 @@ readGedcom <- function(file_path, # Parse each individual block into a record (a named list) records <- lapply(blocks, parseIndividualBlock, - pattern_rows = pattern_rows, - all_var_names = all_var_names, verbose = verbose) + pattern_rows = pattern_rows, + all_var_names = all_var_names, verbose = verbose + ) # Remove any NULLs (if a block did not contain an individual id) records <- Filter(Negate(is.null), records) @@ -135,7 +139,9 @@ readGedcom <- function(file_path, #' @return A list of character vectors, each representing one individual. splitIndividuals <- function(lines, verbose = FALSE) { indi_idx <- grep("@ INDI", lines) - if (length(indi_idx) == 0) return(list()) + if (length(indi_idx) == 0) { + return(list()) + } blocks <- list() for (i in seq_along(indi_idx)) { @@ -194,7 +200,7 @@ parseIndividualBlock <- function(block, pattern_rows, all_var_names, verbose = F # Process birth and death events by consuming multiple lines. if (grepl(" BIRT", line) && pattern_rows$num_birt_rows > 0) { record <- processEventLine("birth", block, i, record, pattern_rows) - i <- i + 1 # Skip further processing of this line. + i <- i + 1 # Skip further processing of this line. next } if (grepl(" DEAT", line) && pattern_rows$num_deat_rows > 0) { @@ -214,50 +220,62 @@ parseIndividualBlock <- function(block, pattern_rows, all_var_names, verbose = F list(tag = "_MARNM", field = "name_marriedsurn", mode = "replace") ) out <- applyTagMappings(line, record, pattern_rows, name_piece_mappings) - if (out$matched) { record <- out$record - i <- i + 1 - next } + if (out$matched) { + record <- out$record + i <- i + 1 + next + } # Process attribute tags. attribute_mappings <- list( - list(tag = "SEX", field = "sex", mode = "replace"), + list(tag = "SEX", field = "sex", mode = "replace"), list(tag = "CAST", field = "attribute_caste", mode = "replace"), list(tag = "DSCR", field = "attribute_description", mode = "replace"), list(tag = "EDUC", field = "attribute_education", mode = "replace"), list(tag = "IDNO", field = "attribute_idnumber", mode = "replace"), list(tag = "NATI", field = "attribute_nationality", mode = "replace"), list(tag = "NCHI", field = "attribute_children", mode = "replace"), - list(tag = "NMR", field = "attribute_marriages", mode = "replace"), + list(tag = "NMR", field = "attribute_marriages", mode = "replace"), list(tag = "OCCU", field = "attribute_occupation", mode = "replace"), list(tag = "PROP", field = "attribute_property", mode = "replace"), list(tag = "RELI", field = "attribute_religion", mode = "replace"), list(tag = "RESI", field = "attribute_residence", mode = "replace"), - list(tag = "SSN", field = "attribute_ssn", mode = "replace"), + list(tag = "SSN", field = "attribute_ssn", mode = "replace"), list(tag = "TITL", field = "attribute_title", mode = "replace") ) out <- applyTagMappings(line, record, pattern_rows, attribute_mappings) - if (out$matched) { record <- out$record - i <- i + 1 - next } + if (out$matched) { + record <- out$record + i <- i + 1 + next + } # Process relationship tags, using a custom extractor. relationship_mappings <- list( - list(tag = "FAMC", field = "FAMC", mode = "append", - extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)")), - list(tag = "FAMS", field = "FAMS", mode = "append", - extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)")) + list( + tag = "FAMC", field = "FAMC", mode = "append", + extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)") + ), + list( + tag = "FAMS", field = "FAMS", mode = "append", + extractor = function(x) stringr::str_extract(x, "(?<=@.)\\d*(?=@)") + ) ) out <- applyTagMappings(line, record, pattern_rows, relationship_mappings) - if (out$matched) { record <- out$record - i <- i + 1 - next } + if (out$matched) { + record <- out$record + i <- i + 1 + next + } # Optionally print progress for long records. i <- i + 1 } # If the record has no ID, return NULL. - if (is.na(record$id)) return(NULL) + if (is.na(record$id)) { + return(NULL) + } return(record) } @@ -291,16 +309,16 @@ parseNameLine <- function(line, record) { processEventLine <- function(event, block, i, record, pattern_rows) { n_lines <- length(block) if (event == "birth") { - if (i + 1 <= n_lines) record$birth_date <- extract_info(block[i+1], "DATE") - if (i + 2 <= n_lines) record$birth_place <- extract_info(block[i+2], "PLAC") - if (i + 4 <= n_lines) record$birth_lat <- extract_info(block[i+4], "LATI") - if (i + 5 <= n_lines) record$birth_long <- extract_info(block[i+5], "LONG") + if (i + 1 <= n_lines) record$birth_date <- extract_info(block[i + 1], "DATE") + if (i + 2 <= n_lines) record$birth_place <- extract_info(block[i + 2], "PLAC") + if (i + 4 <= n_lines) record$birth_lat <- extract_info(block[i + 4], "LATI") + if (i + 5 <= n_lines) record$birth_long <- extract_info(block[i + 5], "LONG") } else if (event == "death") { - if (i + 1 <= n_lines) record$death_date <- extract_info(block[i+1], "DATE") - if (i + 2 <= n_lines) record$death_place <- extract_info(block[i+2], "PLAC") - if (i + 3 <= n_lines) record$death_caus <- extract_info(block[i+3], "CAUS") - if (i + 4 <= n_lines) record$death_lat <- extract_info(block[i+4], "LATI") - if (i + 5 <= n_lines) record$death_long <- extract_info(block[i+5], "LONG") + if (i + 1 <= n_lines) record$death_date <- extract_info(block[i + 1], "DATE") + if (i + 2 <= n_lines) record$death_place <- extract_info(block[i + 2], "PLAC") + if (i + 3 <= n_lines) record$death_caus <- extract_info(block[i + 3], "CAUS") + if (i + 4 <= n_lines) record$death_lat <- extract_info(block[i + 4], "LATI") + if (i + 5 <= n_lines) record$death_long <- extract_info(block[i + 5], "LONG") } return(record) } @@ -322,7 +340,8 @@ applyTagMappings <- function(line, record, pattern_rows, tag_mappings) { for (mapping in tag_mappings) { extractor <- if (is.null(mapping$extractor)) NULL else mapping$extractor result <- process_tag(mapping$tag, mapping$field, pattern_rows, line, record, - extractor = extractor, mode = mapping$mode) + extractor = extractor, mode = mapping$mode + ) record <- result$vars if (result$matched) { return(list(record = record, matched = TRUE)) @@ -413,8 +432,8 @@ process_tag <- function(tag, field_name, pattern_rows, line, vars, count_name <- paste0("num_", tolower(tag), "_rows") matched <- FALSE if (!is.null(pattern_rows[[count_name]]) && - pattern_rows[[count_name]] > 0 && - grepl(paste0(" ", tag), line)) { + pattern_rows[[count_name]] > 0 && + grepl(paste0(" ", tag), line)) { value <- if (is.null(extractor)) { extract_info(line, tag) } else { diff --git a/R/readGedcomlegacy.R b/R/readGedcomlegacy.R index e7e04a82..8221197a 100644 --- a/R/readGedcomlegacy.R +++ b/R/readGedcomlegacy.R @@ -49,14 +49,14 @@ #' - `FAMS`: ID(s) of the family where the individual is a spouse #' @keywords internal readGedcom.legacy <- function(file_path, - verbose = FALSE, - add_parents = TRUE, - remove_empty_cols = TRUE, - combine_cols = TRUE, - skinny = FALSE, - update_rate = 1000, - post_process = TRUE, - ...) { + verbose = FALSE, + add_parents = TRUE, + remove_empty_cols = TRUE, + combine_cols = TRUE, + skinny = FALSE, + update_rate = 1000, + post_process = TRUE, + ...) { # Checks if (!file.exists(file_path)) stop("File does not exist: ", file_path) if (verbose) { @@ -302,12 +302,12 @@ readGedcom.legacy <- function(file_path, warning("The number of people found in the processed file does not match the number of individuals raw data") } - if(post_process){ + if (post_process) { if (verbose) { print("Post-processing data frame") } # Remove the first row (empty) -df_temp <- postProcessGedcom.legacy( + df_temp <- postProcessGedcom.legacy( df_temp = df_temp, remove_empty_cols = remove_empty_cols, combine_cols = combine_cols, @@ -315,7 +315,6 @@ df_temp <- postProcessGedcom.legacy( skinny = skinny, verbose = verbose ) - } return(df_temp) @@ -328,12 +327,11 @@ df_temp <- postProcessGedcom.legacy( #' @return A data frame with processed information. postProcessGedcom.legacy <- function(df_temp, - remove_empty_cols = TRUE, - combine_cols = TRUE, - add_parents = TRUE, - skinny = TRUE, - verbose = FALSE -){ + remove_empty_cols = TRUE, + combine_cols = TRUE, + add_parents = TRUE, + skinny = TRUE, + verbose = FALSE) { # Add mom and dad ids if (add_parents) { if (verbose) { @@ -342,28 +340,27 @@ postProcessGedcom.legacy <- function(df_temp, df_temp <- processParents.legacy(df_temp, datasource = "gedcom") } -if (combine_cols) { - df_temp <- collapseNames.legacy(verbose = verbose, df_temp = df_temp) -} + if (combine_cols) { + df_temp <- collapseNames.legacy(verbose = verbose, df_temp = df_temp) + } -if (remove_empty_cols) { - # Remove empty columns - if (verbose) { - print("Removing empty columns") + if (remove_empty_cols) { + # Remove empty columns + if (verbose) { + print("Removing empty columns") + } + df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)] } - df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)] -} -if (skinny) { - if (verbose) { - print("Slimming down the data frame") + if (skinny) { + if (verbose) { + print("Slimming down the data frame") + } + df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)] + df_temp$FAMC <- NULL + df_temp$FAMS <- NULL } - df_temp <- df_temp[, colSums(is.na(df_temp)) < nrow(df_temp)] - df_temp$FAMC <- NULL - df_temp$FAMS <- NULL -} - -return(df_temp) + return(df_temp) } #' Create a mapping of family IDs to parent IDs @@ -570,7 +567,7 @@ countPatternRows.legacy <- function(file) { #' @keywords internal #' process_tag.legacy <- function(tag, field_name, pattern_rows, line, vars, - extractor = NULL, mode = "replace") { + extractor = NULL, mode = "replace") { count_name <- paste0("num_", tolower(tag), "_rows") matched <- FALSE if (!is.null(pattern_rows[[count_name]]) && @@ -622,4 +619,3 @@ collapseNames.legacy <- function(verbose, df_temp) { } return(df_temp) } - diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R index 8fc88915..65f35ff8 100644 --- a/R/simulatePedigree.R +++ b/R/simulatePedigree.R @@ -191,11 +191,13 @@ buildBetweenGenerations <- function(df_Fam, Ngen, sizeGens, verbose, marR, sexR, SingleM <- sum(df_Ngen$sex == "M" & is.na(df_Ngen$spID)) CoupleM <- N_LinkedMale - SingleM - df_Fam[df_Fam$gen == i, ] <- markPotentialChildren(df_Ngen = df_Ngen, - i = i, - Ngen = Ngen, - sizeGens = sizeGens, - CoupleF = CoupleF) + df_Fam[df_Fam$gen == i, ] <- markPotentialChildren( + df_Ngen = df_Ngen, + i = i, + Ngen = Ngen, + sizeGens = sizeGens, + CoupleF = CoupleF + ) if (verbose) { print( "Step 2.2: mark a group of potential parents in the i-1 th generation" diff --git a/data-raw/benchged.R b/data-raw/benchged.R index c0d0eef9..18a9fc47 100644 --- a/data-raw/benchged.R +++ b/data-raw/benchged.R @@ -26,6 +26,6 @@ print(benchmark_results) # Optional: Save results to CSV for later analysis write.csv(summary(benchmark_results), - "benchmark_results.csv", - row.names = FALSE + "benchmark_results.csv", + row.names = FALSE ) diff --git a/data-raw/benchmark.R b/data-raw/benchmark.R index 4a615af2..2add0e4f 100644 --- a/data-raw/benchmark.R +++ b/data-raw/benchmark.R @@ -43,7 +43,8 @@ ped3 <- ped3 %>% momID = momID + max(ped3$ID, na.rm = TRUE), dadID = dadID + max(ped3$ID, na.rm = TRUE), spID = spID + max(ped3$ID, na.rm = TRUE) - ) %>% rbind(ped3) + ) %>% + rbind(ped3) set.seed(1151) kpc <- 2 @@ -62,243 +63,254 @@ ped <- rbind(ped, ped2) ped <- rbind(ped, ped3) ped <- rbind(ped, ped4) -if(TRUE){ -# Define parameters -component <- "common nuclear"#"additive" # Change this to test different components -saveable <- FALSE # Disable saving to avoid disk I/O slowing down benchmarking -resume <- FALSE # Disable resume to ensure full fresh runs -save_path <- "checkpoint/" -verbose <- FALSE # Turn off verbose for cleaner output -update_rate <- 100 -save_rate_parlist <- 1000 -#method_approach <- 1 -# Run benchmarking for "loop" and "indexed" methods in ped2com() -benchmark_results <- microbenchmark( -# loop_big = { -# ped2com( -# ped = ped, -# component = component, -# adjacency_method = "loop", # Test "loop" method -# saveable = saveable, -# resume = resume, -# save_path = save_path, -# verbose = verbose, -# update_rate = update_rate, -# save_rate_parlist = save_rate_parlist -# ) -# }, - indexed_big = { - ped2com( - ped = ped, - component = component, - adjacency_method = "indexed", # Test "indexed" method - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - update_rate = update_rate, - save_rate_parlist = save_rate_parlist - ) - }, - direct4_big = { - ped2com( - ped = ped, - component = component, - adjacency_method = "direct", # Test "indexed" method - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - update_rate = update_rate, - method_approach = 4, - save_rate_parlist = save_rate_parlist - ) - }, - direct2_big = { - ped2com( - ped = ped, - component = component, - adjacency_method = "direct", # Test "indexed" method - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - update_rate = update_rate, - method_approach = 2, - save_rate_parlist = save_rate_parlist - ) - }, - direct5_big = { - ped2com( - ped = ped, - component = component, - adjacency_method = "direct", # Test "indexed" method - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - update_rate = update_rate, - method_approach = 5, - save_rate_parlist = save_rate_parlist - ) - }, -# loop = { -# ped2com( -# ped = ped2, -# component = component, -# adjacency_method = "loop", # Test "loop" method -# saveable = saveable, -# resume = resume, -# save_path = save_path, -# verbose = verbose, -# update_rate = update_rate, -# save_rate_parlist = save_rate_parlist -# -# ) -# }, - indexed = { - ped2com( - ped = ped2, - component = component, - adjacency_method = "indexed", # Test "indexed" method - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - update_rate = update_rate, - save_rate_parlist = save_rate_parlist - ) - }, - direct4 = { - ped2com( - ped = ped2, - component = component, - adjacency_method = "direct", # Test "indexed" method - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - update_rate = update_rate, - method_approach = 4, - save_rate_parlist = save_rate_parlist - ) - }, - direct2 = { - ped2com( - ped = ped2, - component = component, - adjacency_method = "direct", # Test "indexed" method - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - update_rate = update_rate, - method_approach = 2, - save_rate_parlist = save_rate_parlist - ) - }, - direct5 = { - ped2com( - ped = ped2, - component = component, - adjacency_method = "direct", # Test "indexed" method - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - update_rate = update_rate, - method_approach = 5, - save_rate_parlist = save_rate_parlist - ) - }, - times = 10 # Run each method 100 times -) +if (TRUE) { + # Define parameters + component <- "common nuclear" # "additive" # Change this to test different components + saveable <- FALSE # Disable saving to avoid disk I/O slowing down benchmarking + resume <- FALSE # Disable resume to ensure full fresh runs + save_path <- "checkpoint/" + verbose <- FALSE # Turn off verbose for cleaner output + update_rate <- 100 + save_rate_parlist <- 1000 + # method_approach <- 1 + # Run benchmarking for "loop" and "indexed" methods in ped2com() + benchmark_results <- microbenchmark( + # loop_big = { + # ped2com( + # ped = ped, + # component = component, + # adjacency_method = "loop", # Test "loop" method + # saveable = saveable, + # resume = resume, + # save_path = save_path, + # verbose = verbose, + # update_rate = update_rate, + # save_rate_parlist = save_rate_parlist + # ) + # }, + indexed_big = { + ped2com( + ped = ped, + component = component, + adjacency_method = "indexed", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + save_rate_parlist = save_rate_parlist + ) + }, + direct4_big = { + ped2com( + ped = ped, + component = component, + adjacency_method = "direct", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + method_approach = 4, + save_rate_parlist = save_rate_parlist + ) + }, + direct2_big = { + ped2com( + ped = ped, + component = component, + adjacency_method = "direct", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + method_approach = 2, + save_rate_parlist = save_rate_parlist + ) + }, + direct5_big = { + ped2com( + ped = ped, + component = component, + adjacency_method = "direct", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + method_approach = 5, + save_rate_parlist = save_rate_parlist + ) + }, + # loop = { + # ped2com( + # ped = ped2, + # component = component, + # adjacency_method = "loop", # Test "loop" method + # saveable = saveable, + # resume = resume, + # save_path = save_path, + # verbose = verbose, + # update_rate = update_rate, + # save_rate_parlist = save_rate_parlist + # + # ) + # }, + indexed = { + ped2com( + ped = ped2, + component = component, + adjacency_method = "indexed", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + save_rate_parlist = save_rate_parlist + ) + }, + direct4 = { + ped2com( + ped = ped2, + component = component, + adjacency_method = "direct", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + method_approach = 4, + save_rate_parlist = save_rate_parlist + ) + }, + direct2 = { + ped2com( + ped = ped2, + component = component, + adjacency_method = "direct", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + method_approach = 2, + save_rate_parlist = save_rate_parlist + ) + }, + direct5 = { + ped2com( + ped = ped2, + component = component, + adjacency_method = "direct", # Test "indexed" method + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + update_rate = update_rate, + method_approach = 5, + save_rate_parlist = save_rate_parlist + ) + }, + times = 10 # Run each method 100 times + ) -summary(benchmark_results) + summary(benchmark_results) -df_plot <- benchmark_results %>% mutate(size = case_when(expr %in% c("loop", "indexed", "direct4", "direct2", "direct5") ~ "small", - expr %in% c("loop_big", "indexed_big", "direct4_big", "direct2_big", "direct5_big") ~ "big"), - method = case_when(expr %in% c("loop", "loop_big") ~ "loop", - expr %in% c("indexed", "indexed_big") ~ "indexed", - expr %in% c("direct4", "direct4_big") ~ "direct4", - expr %in% c("direct2", "direct2_big") ~ "direct2", - expr %in% c("direct5", "direct5_big") ~ "direct5"))# %>% + df_plot <- benchmark_results %>% mutate( + size = case_when( + expr %in% c("loop", "indexed", "direct4", "direct2", "direct5") ~ "small", + expr %in% c("loop_big", "indexed_big", "direct4_big", "direct2_big", "direct5_big") ~ "big" + ), + method = case_when( + expr %in% c("loop", "loop_big") ~ "loop", + expr %in% c("indexed", "indexed_big") ~ "indexed", + expr %in% c("direct4", "direct4_big") ~ "direct4", + expr %in% c("direct2", "direct2_big") ~ "direct2", + expr %in% c("direct5", "direct5_big") ~ "direct5" + ) + ) # %>% -# set indexed as reference level -df_plot$method <- factor(df_plot$method, levels = c("indexed", "loop","direct2", "direct4", "direct5")) -df_plot$size <- factor(df_plot$size, levels = c("small", "big")) + # set indexed as reference level + df_plot$method <- factor(df_plot$method, levels = c("indexed", "loop", "direct2", "direct4", "direct5")) + df_plot$size <- factor(df_plot$size, levels = c("small", "big")) -lm(time ~ method*size,data=df_plot) %>% - summary() %>% print() + lm(time ~ method * size, data = df_plot) %>% + summary() %>% + print() -p<-ggplot(df_plot, aes(x = method, y = time)) + - geom_boxplot(aes(fill = size), alpha = 0.5) + - labs(title = "Benchmarking Results", - x = "Method", - y = "Time (seconds)") + - theme_minimal() + - theme(axis.text.x = element_text(angle = 45, hjust = 1)) -# Print benchmark results + p <- ggplot(df_plot, aes(x = method, y = time)) + + geom_boxplot(aes(fill = size), alpha = 0.5) + + labs( + title = "Benchmarking Results", + x = "Method", + y = "Time (seconds)" + ) + + theme_minimal() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + # Print benchmark results -p -print(benchmark_results) + p + print(benchmark_results) -# Optional: Save results to CSV for later analysis -write.csv(summary(benchmark_results), - "benchmark_results.csv", - row.names = FALSE -) -# Print benchmark + # Optional: Save results to CSV for later analysis + write.csv(summary(benchmark_results), + "benchmark_results.csv", + row.names = FALSE + ) + # Print benchmark } -if(FALSE){ -verbose=FALSE -ad_ped_matrix <- ped2com(ped, component = "additive", adjacency_method = "direct", sparse = TRUE) -mit_ped_matrix <- ped2com(ped, component = "mitochondrial", adjacency_method = "direct", sparse = TRUE) -cn_ped_matrix <- ped2com(ped, component = "common nuclear", adjacency_method = "indexed", sparse = TRUE) -benchmark_results <- microbenchmark( - beta = { - com2links.beta( - ad_ped_matrix = ad_ped_matrix, - mit_ped_matrix = mit_ped_matrix, - cn_ped_matrix = cn_ped_matrix, - writetodisk = TRUE, - verbose = verbose -); file.remove("dataRelatedPairs.csv") - }, regular = { - com2links( - ad_ped_matrix = ad_ped_matrix, - mit_ped_matrix = mit_ped_matrix, - cn_ped_matrix = cn_ped_matrix, - writetodisk = TRUE, - verbose = verbose - ); file.remove("dataRelatedPairs.csv") - }, legacy = { - com2links( - ad_ped_matrix = ad_ped_matrix, - mit_ped_matrix = mit_ped_matrix, - cn_ped_matrix = cn_ped_matrix, - verbose = verbose, - legacy = TRUE - ); file.remove("dataRelatedPairs.csv") - }, - - times = 100 # Run each method 100 times -) +if (FALSE) { + verbose <- FALSE + ad_ped_matrix <- ped2com(ped, component = "additive", adjacency_method = "direct", sparse = TRUE) + mit_ped_matrix <- ped2com(ped, component = "mitochondrial", adjacency_method = "direct", sparse = TRUE) + cn_ped_matrix <- ped2com(ped, component = "common nuclear", adjacency_method = "indexed", sparse = TRUE) + benchmark_results <- microbenchmark( + beta = { + com2links.beta( + ad_ped_matrix = ad_ped_matrix, + mit_ped_matrix = mit_ped_matrix, + cn_ped_matrix = cn_ped_matrix, + writetodisk = TRUE, + verbose = verbose + ) + file.remove("dataRelatedPairs.csv") + }, regular = { + com2links( + ad_ped_matrix = ad_ped_matrix, + mit_ped_matrix = mit_ped_matrix, + cn_ped_matrix = cn_ped_matrix, + writetodisk = TRUE, + verbose = verbose + ) + file.remove("dataRelatedPairs.csv") + }, legacy = { + com2links( + ad_ped_matrix = ad_ped_matrix, + mit_ped_matrix = mit_ped_matrix, + cn_ped_matrix = cn_ped_matrix, + verbose = verbose, + legacy = TRUE + ) + file.remove("dataRelatedPairs.csv") + }, + times = 100 # Run each method 100 times + ) -summary(benchmark_results) + summary(benchmark_results) -lm(benchmark_results$time ~ benchmark_results$expr) %>% - summary() -# Print benchmark results -print(benchmark_results) + lm(benchmark_results$time ~ benchmark_results$expr) %>% + summary() + # Print benchmark results + print(benchmark_results) -# Optional: Save results to CSV for later analysis -write.csv(summary(benchmark_results), - "benchmark_results.csv", - row.names = FALSE -) + # Optional: Save results to CSV for later analysis + write.csv(summary(benchmark_results), + "benchmark_results.csv", + row.names = FALSE + ) } diff --git a/tests/testthat/test-convertPedigree.R b/tests/testthat/test-convertPedigree.R index a4b747e1..a011c334 100644 --- a/tests/testthat/test-convertPedigree.R +++ b/tests/testthat/test-convertPedigree.R @@ -304,8 +304,8 @@ test_that("adjacency_method 'indexed', 'loop', and direct produce the same resu ped_common_indexed <- ped2com(hazard, component = "common nuclear", adjacency_method = "indexed") ped_common_loop <- ped2com(hazard, component = "common nuclear", adjacency_method = "loop") ped_common_direct <- ped2com(hazard, component = "common nuclear", adjacency_method = "direct") - ped_common_adjBeta_1 <- ped2com(hazard, component = "common nuclear", adjacency_method = "beta",adjBeta_method = adjBeta_method_2) - ped_common_adjBeta_2 <- ped2com(hazard, component = "common nuclear", adjacency_method = "beta",adjBeta_method = adjBeta_method_3) + ped_common_adjBeta_1 <- ped2com(hazard, component = "common nuclear", adjacency_method = "beta", adjBeta_method = adjBeta_method_2) + ped_common_adjBeta_2 <- ped2com(hazard, component = "common nuclear", adjacency_method = "beta", adjBeta_method = adjBeta_method_3) expect_equal(ped_common_indexed, ped_common_loop, tolerance = tolerance) expect_equal(ped_common_loop, ped_common_direct, tolerance = tolerance) diff --git a/tests/testthat/test-makeLinks.R b/tests/testthat/test-makeLinks.R index e23b2390..05fc58fd 100644 --- a/tests/testthat/test-makeLinks.R +++ b/tests/testthat/test-makeLinks.R @@ -86,7 +86,7 @@ test_that("com2links processes creates same length for cn with 3, 2, and 1 matri expect_true(all(result2$addRel >= 0)) expect_true(all(result2$cnuRel >= 0)) - expect_equal(result3$cnuRel,result2$cnuRel) + expect_equal(result3$cnuRel, result2$cnuRel) result1 <- com2links(cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE) result1_legacy <- com2links.legacy(cn_ped_matrix = cn_ped_matrix, writetodisk = FALSE) @@ -98,12 +98,11 @@ test_that("com2links processes creates same length for cn with 3, 2, and 1 matri expect_equal(ncol(result1_legacy), 3) # Expect ID1, ID2, and cnuRel expect_true(all(result1$cnuRel >= 0)) expect_true(all(result1_legacy$cnuRel >= 0)) - expect_equal(result3$cnuRel[result3$cnuRel==1],result1$cnuRel[result1$cnuRel==1]) - expect_equal(result3$cnuRel[result3$cnuRel==1],result1_legacy$cnuRel[result1_legacy$cnuRel==1]) - expect_equal(result2$cnuRel[result2$cnuRel==1],result1$cnuRel[result1$cnuRel==1]) - expect_equal(result2$cnuRel[result2$cnuRel==1],result1_legacy$cnuRel[result1_legacy$cnuRel==1]) - expect_equal(result1$cnuRel[result1$cnuRel==1],result1_legacy$cnuRel[result1_legacy$cnuRel==1]) - + expect_equal(result3$cnuRel[result3$cnuRel == 1], result1$cnuRel[result1$cnuRel == 1]) + expect_equal(result3$cnuRel[result3$cnuRel == 1], result1_legacy$cnuRel[result1_legacy$cnuRel == 1]) + expect_equal(result2$cnuRel[result2$cnuRel == 1], result1$cnuRel[result1$cnuRel == 1]) + expect_equal(result2$cnuRel[result2$cnuRel == 1], result1_legacy$cnuRel[result1_legacy$cnuRel == 1]) + expect_equal(result1$cnuRel[result1$cnuRel == 1], result1_legacy$cnuRel[result1_legacy$cnuRel == 1]) }) test_that("com2links written version matchs", { data(hazard) @@ -327,9 +326,8 @@ test_that("com2links handles large batch writing correctly", { expect_true(file.exists(temp_file)) written_data <- read.csv(temp_file) - expect_true(nrow(written_data) == 155) # Ensuring batch writing logic works + expect_true(nrow(written_data) == 155) # Ensuring batch writing logic works expect_true(file.remove(temp_file)) - }) test_that("com2links garbage collection does not affect output, using two components", { diff --git a/tests/testthat/test-plotPedigree.R b/tests/testthat/test-plotPedigree.R index 2c5199d7..ce4697c4 100644 --- a/tests/testthat/test-plotPedigree.R +++ b/tests/testthat/test-plotPedigree.R @@ -34,11 +34,9 @@ test_that("pedigree plots correctly with affected variables", { # file.remove("Rplots.pdf") test_that("pedigree errs when affected variables named", { -data(inbreeding) + data(inbreeding) expect_error(plotPedigree(data, verbose = TRUE, affected = "affected")) - - }) @@ -46,6 +44,4 @@ test_that("pedigree plots multiple families", { data(inbreeding) expect_output(plotPedigree(inbreeding, verbose = TRUE)) - - }) diff --git a/tests/testthat/test-readPedigrees.R b/tests/testthat/test-readPedigrees.R index b48bf74a..6adb4687 100644 --- a/tests/testthat/test-readPedigrees.R +++ b/tests/testthat/test-readPedigrees.R @@ -231,4 +231,3 @@ test_that("readGedcom handles incomplete individual records gracefully", { unlink(temp_file) }) - diff --git a/tests/testthat/test-readWikiTree.R b/tests/testthat/test-readWikiTree.R index d73e5810..166539d9 100644 --- a/tests/testthat/test-readWikiTree.R +++ b/tests/testthat/test-readWikiTree.R @@ -1,4 +1,3 @@ - # readWikifamilytree test_that("readWikifamilytree reads a string correctly", { diff --git a/vignettes/ASOIAF.Rmd b/vignettes/ASOIAF.Rmd index 54ecea5f..c83d9241 100644 --- a/vignettes/ASOIAF.Rmd +++ b/vignettes/ASOIAF.Rmd @@ -165,7 +165,5 @@ This code creates new IDs for individuals with one known parent and a missing ot We can now visualize the repaired pedigree using the `plotPedigree()` function. This function generates a plot of the pedigree, with individuals colored based on their affected status. In this case, we highlight Jon and Daenerys as "affected" individuals. Otherwise they would be difficult to distinguish from the rest of the pedigree. ```{r, message=FALSE, warning=FALSE} - - plotPedigree(df_repaired, affected = df_repaired$affected, verbose = FALSE) ``` diff --git a/vignettes/partial.Rmd b/vignettes/partial.Rmd index 52a9e45e..50009cfc 100644 --- a/vignettes/partial.Rmd +++ b/vignettes/partial.Rmd @@ -71,16 +71,18 @@ corrplot(as.matrix(ped_add_classic_complete), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Additive component - Classic method", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) corrplot(as.matrix(ped_add_partial_complete), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, title = "Additive component - Partial parent method", + is.corr = FALSE, title = "Additive component - Partial parent method", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) ``` @@ -89,9 +91,10 @@ To verify this, we subtract one matrix from the other and calculate RMSE. The di ```{r,warning=FALSE} corrplot((as.matrix(ped_add_classic_complete) - as.matrix(ped_add_partial_complete)), method = "color", type = "lower", col.lim = c(0, 1), - is.corr = FALSE, order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + is.corr = FALSE, order = "hclust", + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) ``` @@ -131,14 +134,16 @@ corrplot(as.matrix(ped_add_classic), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Classic (mother removed)", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) corrplot(as.matrix(ped_add_partial), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Partial (mother removed)", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) ``` @@ -156,8 +161,9 @@ corrplot(as.matrix(ped_add_classic_complete) - as.matrix(ped_add_classic), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) sqrt(mean((ped_add_classic_complete - ped_add_classic)^2)) ``` @@ -169,8 +175,9 @@ corrplot(as.matrix(ped_add_classic_complete - ped_add_partial), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) sqrt(mean((ped_add_classic_complete - ped_add_partial)^2)) ``` @@ -214,15 +221,17 @@ corrplot(as.matrix(ped_add_classic_dad), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Classic (father removed)", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) corrplot(as.matrix(ped_add_partial_dad), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Partial (father removed)", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) ``` Again, we compare to the true matrix from the complete pedigree: @@ -233,8 +242,9 @@ corrplot(as.matrix(ped_add_classic_complete - ped_add_classic), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) sqrt(mean((ped_add_classic_complete - ped_add_classic)^2)) ``` @@ -245,8 +255,9 @@ corrplot(as.matrix(ped_add_classic_complete - ped_add_partial), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) sqrt(mean((ped_add_classic_complete - ped_add_partial)^2)) ``` @@ -389,36 +400,41 @@ corrplot(as.matrix(fam1$ped_add_classic_complete), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Classic - Complete", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) corrplot(as.matrix(fam1$ped_add_classic_mom), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Classic - Mom Missing", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) corrplot(as.matrix(fam1$ped_add_partial_mom), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Partial - Mom Missing", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) corrplot(as.matrix(fam1$ped_add_classic_dad), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Classic - Dad Missing", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) corrplot(as.matrix(fam1$ped_add_partial_dad), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Partial - Dad Missing", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) ``` @@ -429,29 +445,33 @@ corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_classic_mom), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Classic Mom Diff from Complete", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_partial_mom), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Partial Mom Diff from Complete", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_classic_dad), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Classic Dad Diff from Complete", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_partial_dad), method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Partial Dad Diff from Complete", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +) ``` These plots show how each method responds to missing data, and whether it maintains consistency with the complete pedigree. We observe that the partial parent method typically introduces smaller deviations. If desired, this same diagnostic can be repeated for additional families, such as inbreeding_list[[2]]. diff --git a/vignettes/partial.html b/vignettes/partial.html index 9a2e5535..51f5d3f5 100644 --- a/vignettes/partial.html +++ b/vignettes/partial.html @@ -396,25 +396,28 @@

Hazard Data Example

method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Additive component - Classic method", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0)) + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)


 
 corrplot(as.matrix(ped_add_partial_complete),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE, title = "Additive component - Partial parent method", 
+  is.corr = FALSE, title = "Additive component - Partial parent method",
   order = "hclust",
-  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
-  col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)

To verify this, we subtract one matrix from the other and calculate RMSE. The difference should be numerically zero. Indeed, it is 0.

corrplot((as.matrix(ped_add_classic_complete) - as.matrix(ped_add_partial_complete)),
   method = "color", type = "lower", col.lim = c(0, 1),
-  is.corr = FALSE,  order = "hclust",
-  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
-  col = COL1('Reds', 100), mar=c(0,0,2,0))
+ is.corr = FALSE, order = "hclust", + tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)

@@ -445,15 +448,17 @@

Introducing Missingness: Remove a Parent

method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Classic (mother removed)", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)

corrplot(as.matrix(ped_add_partial),
   method = "color", type = "lower", col.lim = c(0, 1),
   is.corr = FALSE, title = "Partial (mother removed)",
   order = "hclust",
-  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
-  col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)

We quantify the overall matrix difference:

sqrt(mean((as.matrix(ped_add_classic) - as.matrix(ped_add_partial))^2))
@@ -465,8 +470,9 @@ 

Introducing Missingness: Remove a Parent

method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)


 sqrt(mean((ped_add_classic_complete - ped_add_classic)^2))
@@ -477,8 +483,9 @@ 

Introducing Missingness: Remove a Parent

method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)


 sqrt(mean((ped_add_classic_complete - ped_add_partial)^2))
@@ -515,24 +522,27 @@ 

Removing the Father Instead

method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Classic (father removed)", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)


 corrplot(as.matrix(ped_add_partial_dad),
   method = "color", type = "lower", col.lim = c(0, 1),
   is.corr = FALSE, title = "Partial (father removed)",
   order = "hclust",
-  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
-  col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)

Again, we compare to the true matrix from the complete pedigree:

corrplot(as.matrix(ped_add_classic_complete - ped_add_classic),
   method = "color", type = "lower", col.lim = c(0, 1),
   is.corr = FALSE,
   order = "hclust",
-  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
-  col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)


 sqrt(mean((ped_add_classic_complete - ped_add_classic)^2))
@@ -543,8 +553,9 @@ 

Removing the Father Instead

method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)


 sqrt(mean((ped_add_classic_complete - ped_add_partial)^2))
@@ -757,72 +768,81 @@ 

Example: Family 1

method = "color", type = "lower", col.lim = c(0, 1), is.corr = FALSE, title = "Classic - Complete", order = "hclust", - tl.pos ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, - col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)


 corrplot(as.matrix(fam1$ped_add_classic_mom),
   method = "color", type = "lower", col.lim = c(0, 1),
   is.corr = FALSE, title = "Classic - Mom Missing",
   order = "hclust",
-  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
-  col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)


 corrplot(as.matrix(fam1$ped_add_partial_mom),
   method = "color", type = "lower", col.lim = c(0, 1),
   is.corr = FALSE, title = "Partial - Mom Missing",
   order = "hclust",
-  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
-  col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)


 corrplot(as.matrix(fam1$ped_add_classic_dad),
   method = "color", type = "lower", col.lim = c(0, 1),
   is.corr = FALSE, title = "Classic - Dad Missing",
   order = "hclust",
-  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
-  col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)


 corrplot(as.matrix(fam1$ped_add_partial_dad),
   method = "color", type = "lower", col.lim = c(0, 1),
   is.corr = FALSE, title = "Partial - Dad Missing",
   order = "hclust",
-  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
-  col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)

To visualize the differences from the true matrix:

corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_classic_mom),
   method = "color", type = "lower", col.lim = c(0, 1),
   is.corr = FALSE, title = "Classic Mom Diff from Complete",
   order = "hclust",
-  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
-  col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)


 corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_partial_mom),
   method = "color", type = "lower", col.lim = c(0, 1),
   is.corr = FALSE, title = "Partial Mom Diff from Complete",
   order = "hclust",
-  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
-  col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)


 corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_classic_dad),
   method = "color", type = "lower", col.lim = c(0, 1),
   is.corr = FALSE, title = "Classic Dad Diff from Complete",
   order = "hclust",
-  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
-  col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)


 corrplot(as.matrix(fam1$ped_add_classic_complete - fam1$ped_add_partial_dad),
   method = "color", type = "lower", col.lim = c(0, 1),
   is.corr = FALSE, title = "Partial Dad Diff from Complete",
   order = "hclust",
-  tl.pos    ="l", tl.col = "black", tl.srt = 5, tl.cex = 0.2,
-  col = COL1('Reds', 100), mar=c(0,0,2,0))
+ tl.pos = "l", tl.col = "black", tl.srt = 5, tl.cex = 0.2, + col = COL1("Reds", 100), mar = c(0, 0, 2, 0) +)

These plots show how each method responds to missing data, and whether it maintains consistency with the complete pedigree. We observe From 13ac783f590b51014b39dec856f7b1a976e8ed50 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Wed, 16 Apr 2025 20:19:47 -0400 Subject: [PATCH 35/35] oops --- R/helpPedigree.R | 4 ++-- man/determineSex.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/helpPedigree.R b/R/helpPedigree.R index e46d49cf..87263fb9 100644 --- a/R/helpPedigree.R +++ b/R/helpPedigree.R @@ -39,8 +39,8 @@ createGenDataFrame <- function(sizeGens, genIndex, idGen) { #' #' @param idGen Vector of IDs for the generation. #' @param sexR Numeric value indicating the sex ratio (proportion of males). -#' @param recode_male The value to use for males. Default is "M" -#' @param recode_female The value to use for females. Default is "F" +#' @param code_male The value to use for males. Default is "M" +#' @param code_female The value to use for females. Default is "F" #' @return Vector of sexes ("M" for male, "F" for female) for the offspring. #' @importFrom stats runif determineSex <- function(idGen, sexR, code_male = "M", code_female = "F") { diff --git a/man/determineSex.Rd b/man/determineSex.Rd index 4ea9498a..39711ada 100644 --- a/man/determineSex.Rd +++ b/man/determineSex.Rd @@ -11,9 +11,9 @@ determineSex(idGen, sexR, code_male = "M", code_female = "F") \item{sexR}{Numeric value indicating the sex ratio (proportion of males).} -\item{recode_male}{The value to use for males. Default is "M"} +\item{code_male}{The value to use for males. Default is "M"} -\item{recode_female}{The value to use for females. Default is "F"} +\item{code_female}{The value to use for females. Default is "F"} } \value{ Vector of sexes ("M" for male, "F" for female) for the offspring.