diff --git a/DESCRIPTION b/DESCRIPTION index 338d5d2..10bb71c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Type: Package -Package: RankedSetSampling +Package: InPlotSampling Title: Easing the Application of Ranked Set Sampling in Practice Version: 0.1.0 Date: 2021-02-09 @@ -11,15 +11,15 @@ Authors@R: c( comment = c(ORCID = "0000-0001-5291-3600")), person("Peter", "Kasprzak", , "peter.kasprzak@adelaide.edu.au", role = "aut") ) -Description: The RankedSetSampling package provides a way for researchers +Description: The InPlotSampling package provides a way for researchers to easily implement Ranked Set Sampling in practice. Ranked Set Sampling was originally described by McIntyre (1952) (reprinted in 2005) . This package takes work by Omer and Kravchuk (2021) and enables easy use of the methods. License: MIT + file LICENSE -URL: https://biometryhub.github.io/RankedSetSampling/ -BugReports: https://github.com/biometryhub/RankedSetSampling/issues +URL: https://biometryhub.github.io/InPlotSampling/ +BugReports: https://github.com/biometryhub/InPlotSampling/issues Depends: R (>= 3.5.0) Imports: @@ -36,4 +36,4 @@ LinkingTo: Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index eabfa89..21bbe8b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,10 +6,11 @@ export(rss_jps_estimate) export(rss_sample) export(sbs_pps_estimate) export(sbs_pps_sample) +export(two_stage_cluster_sample) importFrom(Rcpp,sourceCpp) importFrom(stats,aggregate) importFrom(stats,qt) importFrom(stats,rnorm) importFrom(stats,sd) importFrom(stats,var) -useDynLib(RankedSetSampling, .registration = TRUE) +useDynLib(InPlotSampling, .registration = TRUE) diff --git a/R/CoefF.R b/R/CoefF.R index 5dd64f1..cdc145d 100644 --- a/R/CoefF.R +++ b/R/CoefF.R @@ -1,6 +1,6 @@ #' This function computes the coefficient of variance estimator #' -#' @param H Set size for each raking group. +#' @param H Set size for each ranking group. #' @param n Sample size. #' #' @return diff --git a/R/RankedSetSampling-package.R b/R/InPlotSampling-package.R similarity index 75% rename from R/RankedSetSampling-package.R rename to R/InPlotSampling-package.R index a34d809..0e1d3e2 100644 --- a/R/RankedSetSampling-package.R +++ b/R/InPlotSampling-package.R @@ -1,11 +1,11 @@ -#' @name RankedSetSampling +#' @name InPlotSampling "_PACKAGE" # The following block is used by usethis to automatically manage # roxygen namespace tags. Modify with care! ## usethis namespace: start #' @importFrom Rcpp sourceCpp -#' @useDynLib RankedSetSampling, .registration = TRUE +#' @useDynLib InPlotSampling, .registration = TRUE ## usethis namespace: end NULL diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..e3f00b9 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,8 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#' @keywords internal +pascal <- function(m, popsize, set) { + .Call(`_InPlotSampling_pascal`, m, popsize, set) +} + diff --git a/R/jps_estimate.R b/R/jps_estimate.R index d0efb83..4de0b26 100644 --- a/R/jps_estimate.R +++ b/R/jps_estimate.R @@ -1,7 +1,7 @@ #' Computes the estimator for JPS data #' #' @param data The data to use for estimation. -#' @param set_size Set size for each raking group. +#' @param set_size Set size for each ranking group. #' @param replace Logical (default `TRUE`). Sample with replacement? #' @param model_based An inference mode: #' - `FALSE`: design based inference diff --git a/R/jps_estimate_single.R b/R/jps_estimate_single.R index b3e6b9c..194e834 100644 --- a/R/jps_estimate_single.R +++ b/R/jps_estimate_single.R @@ -2,7 +2,7 @@ #' #' @param ranks Ranks of Y. #' @param y Response measurements. -#' @param set_size Set size for each raking group. +#' @param set_size Set size for each ranking group. #' @param N Finite population size. #' @param coef Coefficients used in variance computation when sample size is n. #' @param coef_del Coefficients used in variance computation when the i-th unit is deleted. diff --git a/R/jps_sample.R b/R/jps_sample.R index 97fd3e0..4306677 100644 --- a/R/jps_sample.R +++ b/R/jps_sample.R @@ -1,7 +1,12 @@ #' Generate JPS sampling on the provided population. #' -#' @inheritParams rss_sample +#' @param pop Population that will be sampled. +#' @param n Sample size. +#' @param H Set size for each ranking group. +#' @param K Number of rankers. #' @param tau A parameter which controls ranking quality. +#' @param replace A boolean which specifies whether to sample with replacement or not. +#' @param with_index A boolean which specifies whether to return the index of the sampled population. #' #' @return A matrix with ranks from each ranker. #' @export @@ -36,10 +41,11 @@ #' #> [9,] 8.701285 2 1 2 #' #> [10,] 13.323884 3 3 3 #' -jps_sample <- function(pop, n, H, tau, K, replace = FALSE) { - verify_jps_params(pop, n, H, tau, K, replace) +jps_sample <- function(pop, n, H, tau, K, replace = FALSE, with_index = FALSE) { + verify_jps_params(pop, n, H, tau, K, replace, with_index) - sampling_matrix <- matrix(sample(pop, n * H, replace = replace), ncol = H, nrow = n) + sampling_indices <- sample(seq_along(pop), n * H, replace = replace) + sampling_matrix <- matrix(pop[sampling_indices], ncol = H, nrow = n) # rank each SRS unit post experimentally jps_matrix <- matrix(0, ncol = K + 1, nrow = n) @@ -55,6 +61,10 @@ jps_sample <- function(pop, n, H, tau, K, replace = FALSE) { } colnames(jps_matrix) <- c("Y", paste0("R", 1:K)) + if (with_index) { + jps_matrix <- cbind(sampling_indices[1:n], jps_matrix) + colnames(jps_matrix)[1] <- "i" + } + return(jps_matrix) } -#' @export diff --git a/R/rss_jps_estimate.R b/R/rss_jps_estimate.R index 68326c8..755858d 100644 --- a/R/rss_jps_estimate.R +++ b/R/rss_jps_estimate.R @@ -36,7 +36,7 @@ #' taus <- sigma * sqrt(1 / rhos^2 - 1) #' population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) #' -#' data <- RankedSetSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement) +#' data <- InPlotSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement) #' data <- data[order(data[, 2]), ] #' #' rss_jps_estimate( @@ -75,7 +75,7 @@ #' x <- population + tau * rnorm(population_size, 0, 1) #' #' population <- cbind(population, x) -#' data <- RankedSetSampling::rss_sample(population, n, H, n_rankers, with_replacement) +#' data <- InPlotSampling::rss_sample(population, n, H, n_rankers, with_replacement) #' data <- data[order(data[, 2]), ] #' #' rss_estimates <- rss_jps_estimate( diff --git a/R/rss_sample.R b/R/rss_sample.R index 4407b15..9bb89dd 100644 --- a/R/rss_sample.R +++ b/R/rss_sample.R @@ -50,7 +50,7 @@ rss_sample <- function(pop, n, H, K, replace = FALSE) { #' #' @param pop Population that will be sampled with an auxiliary parameter in the second column. #' @param n Sample size. -#' @param H Set size for each raking group. +#' @param H Set size for each ranking group. #' @param K Number of rankers. #' #' @return A matrix with ranks from each ranker. diff --git a/R/sbs_pps_sample.R b/R/sbs_pps_sample.R index 41416ff..fb9bafa 100644 --- a/R/sbs_pps_sample.R +++ b/R/sbs_pps_sample.R @@ -82,6 +82,7 @@ sbs_pps_sample <- function(population, n, n_cores = getOption("n_cores", 1)) { #' - sbs_indices: sbs sample indices #' - pps_indices: pps sample indices #' - sizes_wo_sbs: measured sizes without sbs sample +#' @keywords internal #' get_sbs_pps_sample_indices <- function(population, n, with_unique_pps = FALSE) { n_population <- dim(population)[1] diff --git a/R/two_stage_cluster_sample.R b/R/two_stage_cluster_sample.R new file mode 100644 index 0000000..ce16847 --- /dev/null +++ b/R/two_stage_cluster_sample.R @@ -0,0 +1,117 @@ +#' Generate two-stage cluster sampling on the population provided. +#' +#' @param pop Population that will be sampled with these ordered columns: +#' 1. Parent id: an index to denotes the parent of the record +#' 2. Parent auxiliary parameter: an auxiliary parameter for ranking parents +#' 3. Child auxiliary parameter: an auxiliary parameter for ranking children +#' @param sampling_strategies (first stage sampling strategy, second stage sampling strategy), e.g., +#' `c('srs', 'jps')`. +#' - `'srs'`: simple random sampling without replacement +#' - `'jps'`: JPS sampling +#' @param n Number of samples in the first stage. +#' @param H Set size for each ranking group in the first stage. +#' @param replace A boolean which specifies whether to sample with replacement or not in the first stage +#' (applicable only for JPS sampling). +#' @param ni Number(s) of samples in the second stage. Can be a single number or a vector of `n` numbers. +#' @param Hi Set size for each ranking group in the second stage. Can be a single number or a vector of `n` +#' numbers. +#' @param replace_i A boolean which specifies whether to sample with replacement or not in the second stage +#' (applicable only for JPS sampling). +#' +#' @return A matrix with ranks from each ranker. +#' @export +#' +#' @examples +#' set.seed(112) +#' parent_size <- 300 +#' child_size <- 50 +#' # the number of samples to be ranked in each set +#' H <- 3 +#' +#' sampling_strategies <- c("jps", "jps") +#' replace <- FALSE +#' mu <- 10 +#' sigma <- 4 +#' n <- 4 +#' +#' parent_indices <- rep(1:parent_size, child_size) +#' parent_aux <- abs(qnorm(1:parent_size / (parent_size + 1), mu, sigma) + 5 * rnorm(parent_size, 0, 1)) +#' child_aux <- abs(parent_aux + 10 * rnorm(parent_size * child_size, 0, 1)) +#' +#' population <- cbind(parent_indices, rep(parent_aux, child_size), child_aux) +#' two_stage_cluster_sample(population, sampling_strategies, n, H, replace, 6, 3, FALSE) +#' #> parent_id parent_rank child_id child_aux child_rank +#' #> [1,] 201 1 7101 2.2349453 1 +#' #> [2,] 201 1 12801 9.7175545 3 +#' #> [3,] 201 1 6501 7.9207230 1 +#' #> [4,] 201 1 9801 5.7644835 2 +#' #> [5,] 201 1 10701 13.8089335 3 +#' #> [6,] 201 1 3501 0.3598331 1 +#' #> [7,] 254 2 8654 17.3059292 3 +#' #> [8,] 254 2 11354 15.0837335 2 +#' #> [9,] 254 2 9254 6.0103919 2 +#' #> [10,] 254 2 2954 12.7011502 2 +#' #> [11,] 254 2 14954 5.1158133 2 +#' #> [12,] 254 2 13754 5.8931551 1 +#' #> [13,] 74 1 8474 4.3393349 1 +#' #> [14,] 74 1 9674 15.0512523 2 +#' #> [15,] 74 1 6674 12.9022479 3 +#' #> [16,] 74 1 674 2.9209174 2 +#' #> [17,] 74 1 7274 7.2500468 3 +#' #> [18,] 74 1 6374 7.0925954 1 +#' #> [19,] 223 3 9223 28.4694257 3 +#' #> [20,] 223 3 223 4.4001977 1 +#' #> [21,] 223 3 9823 22.8676415 3 +#' #> [22,] 223 3 11923 26.4531048 3 +#' #> [23,] 223 3 823 20.8714211 2 +#' #> [24,] 223 3 9523 8.1783058 1 +#' +two_stage_cluster_sample <- function(pop, sampling_strategies, n, H, replace, ni, Hi, replace_i) { + verify_two_stage_params(pop, sampling_strategies, n, H, replace, ni, Hi, replace_i) + + pop <- cbind(pop, seq_len(dim(pop)[1])) + parent <- unique(pop[, c(1, 2)]) + first_strategy <- sampling_strategies[1] + second_strategy <- sampling_strategies[2] + + # first stage + if (first_strategy == "srs") { + first_stage_indices <- sample(seq_len(dim(parent)[1]), n) + first_stage_sample <- cbind(parent[first_stage_indices, ], 0) + } else if (first_strategy == "jps") { + first_stage_sample <- jps_sample(parent[, 2], n, H, 0, 1, replace, TRUE) + + first_stage_indices <- first_stage_sample[, 1] + first_stage_sample[, 1] <- parent[first_stage_indices, 1] + } + first_stage_sample <- first_stage_sample[, c(1, 3)] + + if (length(ni) == 1) { + ni <- rep(ni, n) + } + if (length(Hi) == 1) { + Hi <- rep(Hi, n) + } + + sampling_matrix <- matrix(nrow = 0, ncol = 5) + for (i in 1:n) { + parent_filter <- pop[, 1] == first_stage_sample[i, 1] + children <- pop[parent_filter, c(4, 3)] + + if (second_strategy == "srs") { + second_stage_indices <- sample(seq_len(dim(children)[1]), ni[i]) + second_stage_sample <- cbind(children[second_stage_indices, ], 0) + } else if (second_strategy == "jps") { + second_stage_sample <- jps_sample(children[, 2], ni[i], Hi[i], 0, 1, replace, TRUE) + + second_stage_indices <- second_stage_sample[, 1] + second_stage_sample[, 1] <- children[second_stage_indices, 1] + } + + children_sample <- cbind(first_stage_sample[i, 1], first_stage_sample[i, 2], second_stage_sample) + sampling_matrix <- rbind(sampling_matrix, children_sample) + } + + colnames(sampling_matrix) <- c("parent_id", "parent_rank", "child_id", colnames(pop)[3], "child_rank") + return(sampling_matrix) +} diff --git a/R/utils.R b/R/utils.R index 3fe2078..8b5d632 100644 --- a/R/utils.R +++ b/R/utils.R @@ -68,6 +68,15 @@ is_whole_number <- function(x, tol = default_tolerance) { return(abs(x - round(x)) < tol) } +is_positive_whole_numbers <- function(x, tol = default_tolerance) { + for (i in x) { + if (!is_positive_whole_number(i, tol)) { + return(FALSE) + } + } + return(TRUE) +} + must_be <- function(x, valid_values) { return(must_be_(valid_values)(x)) } @@ -139,9 +148,9 @@ verify_rss_wo_replace_params <- function(pop, n, H, K) { } } -verify_jps_params <- function(pop, n, H, tau, K, with_replacement) { +verify_jps_params <- function(pop, n, H, tau, K, replace, with_index) { verify_positive_whole_number(n, H, K) - verify_boolean(with_replacement) + verify_boolean(replace, with_index) if (n < H) { stop("`n` must >= `H`.") @@ -153,7 +162,44 @@ verify_jps_params <- function(pop, n, H, tau, K, with_replacement) { } n_population <- length(pop) - if (!with_replacement) { + if (!replace) { + if (n_population < n * H) { + stop("The number of population must be at least `nH`.") + } + } +} + +verify_two_stage_params <- function(pop, sampling_strategies, n, H, replace, ni, Hi, replace_i) { + verify_positive_whole_number(n, H) + verify_boolean(replace, replace_i) + verify_positive_whole_numbers(ni, Hi) + + if (length(ni) != 1 && length(ni) != n) { + stop("`ni` must be a vector of 1 or `n` values.") + } + + if (length(Hi) != 1 && length(Hi) != n) { + stop("`Hi` must be a vector of 1 or `n` values.") + } + + if (length(sampling_strategies) != 2) { + stop("`sampling_strategies` must be a vector of 2 values.") + } + + if (!all(sampling_strategies %in% c("srs", "jps"))) { + stop("`sampling_strategies` must be a vector of `'srs'` and/or `'jps'`.") + } + + if (n < H) { + stop("`n` must be at least `H`.") + } + + if (!all(ni >= Hi)) { + stop("ith value of `ni` must be at least ith value of `Hi`.") + } + + n_population <- dim(pop)[[1]] + if (!replace && sampling_strategies[1] == "jps") { if (n_population < n * H) { stop("The number of population must be at least `nH`.") } @@ -224,6 +270,10 @@ verify_positive_whole_number <- function(..., var_names = NULL) { verify_data_type(is_positive_whole_number, "a positive whole number", var_names, ...) } +verify_positive_whole_numbers <- function(..., var_names = NULL) { + verify_data_type(is_positive_whole_numbers, "a vector of positive whole numbers", var_names, ...) +} + verify_must_be <- function(..., valid_values, var_names = NULL) { literal_values <- get_literal_values(valid_values) verify_data_type(must_be_(valid_values), literal_values, var_names, ...) diff --git a/README.Rmd b/README.Rmd index d0d398a..986248d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,7 +16,7 @@ knitr::opts_chunk$set( ) ``` -# RankedSetSampling +# InPlotSampling ```{r, echo = FALSE} description <- read.dcf("DESCRIPTION") @@ -28,8 +28,8 @@ min.r <- substr(description[, "Depends"], 7, 11) [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](http://www.repostatus.org/badges/latest/wip.svg)](http://www.repostatus.org/#wip) [![Codecov test coverage](https://codecov.io/gh/biometryhub/RankedSetSampling/branch/main/graph/badge.svg)](https://codecov.io/gh/biometryhub/RankedSetSampling?branch=main) -[![R build status](https://github.com/biometryhub/RankedSetSampling/workflows/R-CMD-check/badge.svg)](https://github.com/biometryhub/RankedSetSampling/actions) -![pkgdown](https://github.com/biometryhub/RankedSetSampling/workflows/pkgdown/badge.svg) +[![R build status](https://github.com/biometryhub/InPlotSampling/workflows/R-CMD-check/badge.svg)](https://github.com/biometryhub/InPlotSampling/actions) +![pkgdown](https://github.com/biometryhub/InPlotSampling/workflows/pkgdown/badge.svg)
[![minimal R version](https://img.shields.io/badge/R%3E%3D-`r min.r`-6666ff.svg)](https://cran.r-project.org/) [![packageversion](https://img.shields.io/badge/Package%20version-`r gsub('-', '--', version)`-orange.svg?style=flat-square)](/commits/main) @@ -38,7 +38,7 @@ min.r <- substr(description[, "Depends"], 7, 11) -The RankedSetSampling package provides a way for researchers to easily implement Ranked Set Sampling in practice. +The InPlotSampling package provides a way for researchers to easily implement Ranked Set Sampling in practice. ## Table of Contents @@ -76,7 +76,7 @@ Use the following code to install this package: ```{r eval=F} if (!require("remotes")) install.packages("remotes") -remotes::install_github("biometryhub/RankedSetSampling", upgrade = FALSE) +remotes::install_github("biometryhub/InPlotSampling", upgrade = FALSE) ``` ## Examples @@ -103,10 +103,10 @@ remotes::install_github("biometryhub/RankedSetSampling", upgrade = FALSE) taus <- sigma * sqrt(1 / rhos^2 - 1) population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) - data <- RankedSetSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement) + data <- InPlotSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement) data <- data[order(data[, 2]), ] - RankedSetSampling::rss_jps_estimate( + InPlotSampling::rss_jps_estimate( data, set_size = H, method = "JPS", @@ -163,10 +163,10 @@ remotes::install_github("biometryhub/RankedSetSampling", upgrade = FALSE) # Citing this package -This package can be cited using `citation("RankedSetSampling")` which generates +This package can be cited using `citation("InPlotSampling")` which generates ```{r echo=F, comment = NA} -citation("RankedSetSampling") +citation("InPlotSampling") ``` # Related Reference diff --git a/README.md b/README.md index 16cd59f..b82dc9b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# RankedSetSampling +# InPlotSampling @@ -11,17 +11,17 @@ public.](http://www.repostatus.org/badges/latest/wip.svg)](http://www.repostatus [![Codecov test coverage](https://codecov.io/gh/biometryhub/RankedSetSampling/branch/main/graph/badge.svg)](https://codecov.io/gh/biometryhub/RankedSetSampling?branch=main) [![R build -status](https://github.com/biometryhub/RankedSetSampling/workflows/R-CMD-check/badge.svg)](https://github.com/biometryhub/RankedSetSampling/actions) -![pkgdown](https://github.com/biometryhub/RankedSetSampling/workflows/pkgdown/badge.svg) +status](https://github.com/biometryhub/InPlotSampling/workflows/R-CMD-check/badge.svg)](https://github.com/biometryhub/InPlotSampling/actions) +![pkgdown](https://github.com/biometryhub/InPlotSampling/workflows/pkgdown/badge.svg)
[![minimal R version](https://img.shields.io/badge/R%3E%3D-3.5.0-6666ff.svg)](https://cran.r-project.org/) [![packageversion](https://img.shields.io/badge/Package%20version-0.1.0-orange.svg?style=flat-square)](/commits/main) -[![Last-changedate](https://img.shields.io/badge/last%20change-2024--05--29-yellowgreen.svg)](/commits/main) +[![Last-changedate](https://img.shields.io/badge/last%20change-2025--02--11-yellowgreen.svg)](/commits/main) [![Licence](https://img.shields.io/github/license/mashape/apistatus.svg)](http://choosealicense.com/licenses/mit/) -The RankedSetSampling package provides a way for researchers to easily +The InPlotSampling package provides a way for researchers to easily implement Ranked Set Sampling in practice. ## Table of Contents @@ -68,7 +68,7 @@ Use the following code to install this package: ``` r if (!require("remotes")) install.packages("remotes") -remotes::install_github("biometryhub/RankedSetSampling", upgrade = FALSE) +remotes::install_github("biometryhub/InPlotSampling", upgrade = FALSE) ``` ## Examples @@ -76,7 +76,9 @@ remotes::install_github("biometryhub/RankedSetSampling", upgrade = FALSE) ### JPS Sample and Estimator
+ + JPS sample and estimator @@ -97,10 +99,10 @@ JPS sample and estimator taus <- sigma * sqrt(1 / rhos^2 - 1) population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) - data <- RankedSetSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement) + data <- InPlotSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement) data <- data[order(data[, 2]), ] - RankedSetSampling::rss_jps_estimate( + InPlotSampling::rss_jps_estimate( data, set_size = H, method = "JPS", @@ -123,7 +125,9 @@ JPS sample and estimator ### SBS PPS Sample and Estimator
+ + SBS PPS sample and estimator @@ -161,24 +165,23 @@ SBS PPS sample and estimator # Citing this package -This package can be cited using `citation("RankedSetSampling")` which +This package can be cited using `citation("InPlotSampling")` which generates - To cite package 'RankedSetSampling' in publications use: + To cite package 'InPlotSampling' in publications use: - Ozturk O, Rogers S, Kravchuk O, Kasprzak P (2021). - _RankedSetSampling: Easing the Application of Ranked Set Sampling in - Practice_. R package version 0.1.0, - . + Ozturk O, Rogers S, Kravchuk O, Kasprzak P (2021). _InPlotSampling: + Easing the Application of Ranked Set Sampling in Practice_. R package + version 0.1.0, . A BibTeX entry for LaTeX users is @Manual{, - title = {RankedSetSampling: Easing the Application of Ranked Set Sampling in Practice}, + title = {InPlotSampling: Easing the Application of Ranked Set Sampling in Practice}, author = {Omer Ozturk and Sam Rogers and Olena Kravchuk and Peter Kasprzak}, year = {2021}, note = {R package version 0.1.0}, - url = {https://biometryhub.github.io/RankedSetSampling/}, + url = {https://biometryhub.github.io/InPlotSampling/}, } # Related Reference diff --git a/_pkgdown.yml b/_pkgdown.yml index 540a7eb..e7901a5 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,4 @@ -url: https://biometryhub.github.io/RankedSetSampling/ +url: https://biometryhub.github.io/InPlotSampling/ destination: docs diff --git a/man/InPlotSampling.Rd b/man/InPlotSampling.Rd new file mode 100644 index 0000000..30ad58b --- /dev/null +++ b/man/InPlotSampling.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/InPlotSampling-package.R +\docType{package} +\name{InPlotSampling} +\alias{InPlotSampling-package} +\alias{InPlotSampling} +\title{InPlotSampling: Easing the Application of Ranked Set Sampling in Practice} +\description{ +The InPlotSampling package provides a way for researchers to easily implement Ranked Set Sampling in practice. Ranked Set Sampling was originally described by McIntyre (1952) (reprinted in 2005) \doi{10.1198/000313005X54180}. This package takes work by Omer and Kravchuk (2021) \url{https://doi.org/10.1007/s13253-021-00439-1} and enables easy use of the methods. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://biometryhub.github.io/InPlotSampling/} + \item Report bugs at \url{https://github.com/biometryhub/InPlotSampling/issues} +} + +} +\author{ +\strong{Maintainer}: Sam Rogers \email{sam.rogers@adelaide.edu.au} + +Authors: +\itemize{ + \item Omer Ozturk \email{omer@stat.osu.edu} (\href{https://orcid.org/0000-0002-0235-4772}{ORCID}) + \item Olena Kravchuk \email{olena.kravchuk@adelaide.edu.au} (\href{https://orcid.org/0000-0001-5291-3600}{ORCID}) [data contributor] + \item Peter Kasprzak \email{peter.kasprzak@adelaide.edu.au} +} + +} diff --git a/man/RankedSetSampling.Rd b/man/RankedSetSampling.Rd deleted file mode 100644 index 92a50ee..0000000 --- a/man/RankedSetSampling.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RankedSetSampling-package.R -\docType{package} -\name{RankedSetSampling} -\alias{RankedSetSampling-package} -\alias{RankedSetSampling} -\title{RankedSetSampling: Easing the Application of Ranked Set Sampling in Practice} -\description{ -The RankedSetSampling package provides a way for researchers to easily implement Ranked Set Sampling in practice. Ranked Set Sampling was originally described by McIntyre (1952) (reprinted in 2005) \doi{10.1198/000313005X54180}. This package takes work by Omer and Kravchuk (2021) \url{https://doi.org/10.1007/s13253-021-00439-1} and enables easy use of the methods. -} -\seealso{ -Useful links: -\itemize{ - \item \url{https://biometryhub.github.io/RankedSetSampling/} - \item Report bugs at \url{https://github.com/biometryhub/RankedSetSampling/issues} -} - -} -\author{ -\strong{Maintainer}: Sam Rogers \email{sam.rogers@adelaide.edu.au} - -Authors: -\itemize{ - \item Omer Ozturk \email{omer@stat.osu.edu} (\href{https://orcid.org/0000-0002-0235-4772}{ORCID}) - \item Olena Kravchuk \email{olena.kravchuk@adelaide.edu.au} (\href{https://orcid.org/0000-0001-5291-3600}{ORCID}) [data contributor] - \item Peter Kasprzak \email{peter.kasprzak@adelaide.edu.au} -} - -} diff --git a/man/calculate_coefficients.Rd b/man/calculate_coefficients.Rd index 9048e6c..c1b5b8c 100644 --- a/man/calculate_coefficients.Rd +++ b/man/calculate_coefficients.Rd @@ -7,7 +7,7 @@ calculate_coefficients(H, n) } \arguments{ -\item{H}{Set size for each raking group.} +\item{H}{Set size for each ranking group.} \item{n}{Sample size.} } diff --git a/man/get_sbs_pps_sample_indices.Rd b/man/get_sbs_pps_sample_indices.Rd index 1999595..5993ec5 100644 --- a/man/get_sbs_pps_sample_indices.Rd +++ b/man/get_sbs_pps_sample_indices.Rd @@ -29,3 +29,4 @@ A named list of: \description{ Generate SBS PPS sample indices. } +\keyword{internal} diff --git a/man/jps_estimate.Rd b/man/jps_estimate.Rd index 8311128..5d7234c 100644 --- a/man/jps_estimate.Rd +++ b/man/jps_estimate.Rd @@ -9,7 +9,7 @@ jps_estimate(data, set_size, replace = TRUE, model_based, N, alpha) \arguments{ \item{data}{The data to use for estimation.} -\item{set_size}{Set size for each raking group.} +\item{set_size}{Set size for each ranking group.} \item{replace}{Logical (default \code{TRUE}). Sample with replacement?} diff --git a/man/jps_estimate_single.Rd b/man/jps_estimate_single.Rd index f3dc971..26fc4a4 100644 --- a/man/jps_estimate_single.Rd +++ b/man/jps_estimate_single.Rd @@ -21,7 +21,7 @@ jps_estimate_single( \item{y}{Response measurements.} -\item{set_size}{Set size for each raking group.} +\item{set_size}{Set size for each ranking group.} \item{N}{Finite population size.} diff --git a/man/jps_sample.Rd b/man/jps_sample.Rd index f78ebba..a326b40 100644 --- a/man/jps_sample.Rd +++ b/man/jps_sample.Rd @@ -4,20 +4,22 @@ \alias{jps_sample} \title{Generate JPS sampling on the provided population.} \usage{ -jps_sample(pop, n, H, tau, K, replace = FALSE) +jps_sample(pop, n, H, tau, K, replace = FALSE, with_index = FALSE) } \arguments{ -\item{pop}{Population that will be sampled with an auxiliary parameter in the second column.} +\item{pop}{Population that will be sampled.} \item{n}{Sample size.} -\item{H}{Set size for each raking group.} +\item{H}{Set size for each ranking group.} \item{tau}{A parameter which controls ranking quality.} \item{K}{Number of rankers.} \item{replace}{A boolean which specifies whether to sample with replacement or not.} + +\item{with_index}{A boolean which specifies whether to return the index of the sampled population.} } \value{ A matrix with ranks from each ranker. diff --git a/man/rss_jps_estimate.Rd b/man/rss_jps_estimate.Rd index bc4c0df..a208099 100644 --- a/man/rss_jps_estimate.Rd +++ b/man/rss_jps_estimate.Rd @@ -66,7 +66,7 @@ rhos <- rep(0.75, n_rankers) taus <- sigma * sqrt(1 / rhos^2 - 1) population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) -data <- RankedSetSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement) +data <- InPlotSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement) data <- data[order(data[, 2]), ] rss_jps_estimate( @@ -105,7 +105,7 @@ tau <- sigma * sqrt(1 / rho^2 - 1) x <- population + tau * rnorm(population_size, 0, 1) population <- cbind(population, x) -data <- RankedSetSampling::rss_sample(population, n, H, n_rankers, with_replacement) +data <- InPlotSampling::rss_sample(population, n, H, n_rankers, with_replacement) data <- data[order(data[, 2]), ] rss_estimates <- rss_jps_estimate( diff --git a/man/rss_sample.Rd b/man/rss_sample.Rd index 2c2f23a..95f027e 100644 --- a/man/rss_sample.Rd +++ b/man/rss_sample.Rd @@ -11,7 +11,7 @@ rss_sample(pop, n, H, K, replace = FALSE) \item{n}{Sample size.} -\item{H}{Set size for each raking group.} +\item{H}{Set size for each ranking group.} \item{K}{Number of rankers.} diff --git a/man/rss_sample_w_replacement.Rd b/man/rss_sample_w_replacement.Rd index a2d3a7f..1092b9b 100644 --- a/man/rss_sample_w_replacement.Rd +++ b/man/rss_sample_w_replacement.Rd @@ -11,7 +11,7 @@ rss_sample_w_replacement(pop, n, H, K) \item{n}{Sample size.} -\item{H}{Set size for each raking group.} +\item{H}{Set size for each ranking group.} \item{K}{Number of rankers.} } diff --git a/man/rss_sample_wo_replacement.Rd b/man/rss_sample_wo_replacement.Rd index a01d458..96adfdc 100644 --- a/man/rss_sample_wo_replacement.Rd +++ b/man/rss_sample_wo_replacement.Rd @@ -11,7 +11,7 @@ rss_sample_wo_replacement(pop, n, H, K) \item{n}{Sample size.} -\item{H}{Set size for each raking group.} +\item{H}{Set size for each ranking group.} \item{K}{Number of rankers.} } diff --git a/man/two_stage_cluster_sample.Rd b/man/two_stage_cluster_sample.Rd new file mode 100644 index 0000000..fe15098 --- /dev/null +++ b/man/two_stage_cluster_sample.Rd @@ -0,0 +1,99 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/two_stage_cluster_sample.R +\name{two_stage_cluster_sample} +\alias{two_stage_cluster_sample} +\title{Generate two-stage cluster sampling on the population provided.} +\usage{ +two_stage_cluster_sample( + pop, + sampling_strategies, + n, + H, + replace, + ni, + Hi, + replace_i +) +} +\arguments{ +\item{pop}{Population that will be sampled with these ordered columns: +\enumerate{ +\item Parent id: an index to denotes the parent of the record +\item Parent auxiliary parameter: an auxiliary parameter for ranking parents +\item Child auxiliary parameter: an auxiliary parameter for ranking children +}} + +\item{sampling_strategies}{(first stage sampling strategy, second stage sampling strategy), e.g., +\code{c('srs', 'jps')}. +\itemize{ +\item \code{'srs'}: simple random sampling without replacement +\item \code{'jps'}: JPS sampling +}} + +\item{n}{Number of samples in the first stage.} + +\item{H}{Set size for each ranking group in the first stage.} + +\item{replace}{A boolean which specifies whether to sample with replacement or not in the first stage +(applicable only for JPS sampling).} + +\item{ni}{Number(s) of samples in the second stage. Can be a single number or a vector of \code{n} numbers.} + +\item{Hi}{Set size for each ranking group in the second stage. Can be a single number or a vector of \code{n} +numbers.} + +\item{replace_i}{A boolean which specifies whether to sample with replacement or not in the second stage +(applicable only for JPS sampling).} +} +\value{ +A matrix with ranks from each ranker. +} +\description{ +Generate two-stage cluster sampling on the population provided. +} +\examples{ +set.seed(112) +parent_size <- 300 +child_size <- 50 +# the number of samples to be ranked in each set +H <- 3 + +sampling_strategies <- c("jps", "jps") +replace <- FALSE +mu <- 10 +sigma <- 4 +n <- 4 + +parent_indices <- rep(1:parent_size, child_size) +parent_aux <- abs(qnorm(1:parent_size / (parent_size + 1), mu, sigma) + 5 * rnorm(parent_size, 0, 1)) +child_aux <- abs(parent_aux + 10 * rnorm(parent_size * child_size, 0, 1)) + +population <- cbind(parent_indices, rep(parent_aux, child_size), child_aux) +two_stage_cluster_sample(population, sampling_strategies, n, H, replace, 6, 3, FALSE) +#> parent_id parent_rank child_id child_aux child_rank +#> [1,] 201 1 7101 2.2349453 1 +#> [2,] 201 1 12801 9.7175545 3 +#> [3,] 201 1 6501 7.9207230 1 +#> [4,] 201 1 9801 5.7644835 2 +#> [5,] 201 1 10701 13.8089335 3 +#> [6,] 201 1 3501 0.3598331 1 +#> [7,] 254 2 8654 17.3059292 3 +#> [8,] 254 2 11354 15.0837335 2 +#> [9,] 254 2 9254 6.0103919 2 +#> [10,] 254 2 2954 12.7011502 2 +#> [11,] 254 2 14954 5.1158133 2 +#> [12,] 254 2 13754 5.8931551 1 +#> [13,] 74 1 8474 4.3393349 1 +#> [14,] 74 1 9674 15.0512523 2 +#> [15,] 74 1 6674 12.9022479 3 +#> [16,] 74 1 674 2.9209174 2 +#> [17,] 74 1 7274 7.2500468 3 +#> [18,] 74 1 6374 7.0925954 1 +#> [19,] 223 3 9223 28.4694257 3 +#> [20,] 223 3 223 4.4001977 1 +#> [21,] 223 3 9823 22.8676415 3 +#> [22,] 223 3 11923 26.4531048 3 +#> [23,] 223 3 823 20.8714211 2 +#> [24,] 223 3 9523 8.1783058 1 + +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 07e1545..7d3d11f 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -12,7 +12,7 @@ Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); // pascal Rcpp::NumericMatrix pascal(Rcpp::NumericMatrix m, int popsize, int set); -RcppExport SEXP _RankedSetSampling_pascal(SEXP mSEXP, SEXP popsizeSEXP, SEXP setSEXP) { +RcppExport SEXP _InPlotSampling_pascal(SEXP mSEXP, SEXP popsizeSEXP, SEXP setSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -25,11 +25,11 @@ END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_RankedSetSampling_pascal", (DL_FUNC) &_RankedSetSampling_pascal, 3}, + {"_InPlotSampling_pascal", (DL_FUNC) &_InPlotSampling_pascal, 3}, {NULL, NULL, 0} }; -RcppExport void R_init_RankedSetSampling(DllInfo *dll) { +RcppExport void R_init_InPlotSampling(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } diff --git a/tests/testthat.R b/tests/testthat.R index ee96e74..c6bfa8c 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,4 @@ library(testthat) -library(RankedSetSampling) +library(InPlotSampling) -test_check("RankedSetSampling") +test_check("InPlotSampling") diff --git a/tests/testthat/test-jps_sample.R b/tests/testthat/test-jps_sample.R index dab78f1..67e43a9 100644 --- a/tests/testthat/test-jps_sample.R +++ b/tests/testthat/test-jps_sample.R @@ -20,5 +20,6 @@ test_that("Inputs are valid.", { expect_error(jps_sample(population, 30, 2, tau, 4), "The length of `tau` must equal to `K`.") expect_error(jps_sample(population, 50, 4, tau, k), "The number of population must be at least `nH`.") expect_error(jps_sample(population, 10, 20, tau, k), "`n` must >= `H`.") - expect_error(jps_sample(population, 30, 2, tau, k, "T"), "`with_replacement` must be a boolean.") + expect_error(jps_sample(population, 30, 2, tau, k, "T"), "`replace` must be a boolean.") + expect_error(jps_sample(population, 30, 2, tau, k, TRUE, "T"), "`with_index` must be a boolean.") }) diff --git a/tests/testthat/test-two_stage_cluster_sample.R b/tests/testthat/test-two_stage_cluster_sample.R new file mode 100644 index 0000000..dba8a6f --- /dev/null +++ b/tests/testthat/test-two_stage_cluster_sample.R @@ -0,0 +1,109 @@ +test_that("Two-stage cluster sample has a correct output.", { + skip_if(getRversion() < "3.4") + + parent_size <- 20 + child_size <- 20 + + parent_indices <- rep(1:parent_size, child_size) + parent_aux <- 1:parent_size + child_aux <- 1:(parent_size * child_size) + population <- cbind(parent_indices, rep(parent_aux, child_size), child_aux) + + jps_jps_matrix <- two_stage_cluster_sample(population, c("jps", "jps"), 4, 3, FALSE, 6, 3, FALSE) + expect_equal(dim(jps_jps_matrix), c(24, 5)) + expect_true(all(jps_jps_matrix[, 2] %in% 1:3)) + expect_true(all(jps_jps_matrix[, 5] %in% 1:3)) + + srs_jps_matrix <- two_stage_cluster_sample(population, c("srs", "jps"), 4, 3, FALSE, 6, 3, FALSE) + expect_equal(dim(srs_jps_matrix), c(24, 5)) + expect_equal(unique(srs_jps_matrix[, 2]), 0) + + jps_srs_matrix <- two_stage_cluster_sample(population, c("jps", "srs"), 4, 3, FALSE, 6, 3, FALSE) + expect_equal(dim(jps_srs_matrix), c(24, 5)) + expect_equal(unique(jps_srs_matrix[, 5]), 0) + + jps_srs_matrix <- two_stage_cluster_sample(population, c("jps", "srs"), 4, 3, FALSE, c(4, 3, 3, 3), 3, FALSE) + expect_equal(dim(jps_srs_matrix), c(13, 5)) + + jps_srs_matrix <- two_stage_cluster_sample(population, c("jps", "srs"), 4, 3, FALSE, 6, c(4, 3, 3, 3), FALSE) + expect_equal(dim(jps_srs_matrix), c(24, 5)) +}) + +test_that("Inputs are valid.", { + parent_size <- 20 + child_size <- 20 + + parent_indices <- rep(1:parent_size, child_size) + parent_aux <- 1:parent_size + child_aux <- 1:(parent_size * child_size) + population <- cbind(parent_indices, rep(parent_aux, child_size), child_aux) + + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 0, 3, FALSE, 6, 3, FALSE), + "`n` must be a positive whole number." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 4, 0, FALSE, 6, 3, FALSE), + "`H` must be a positive whole number." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 4, 3, "FALSE", 6, 3, FALSE), + "`replace` must be a boolean." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 4, 3, 0, 6, 3, FALSE), + "`replace` must be a boolean." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 4, 3, FALSE, 6, 3, "FALSE"), + "`replace_i` must be a boolean." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 4, 3, FALSE, 0, 3, FALSE), + "`ni` must be a vector of positive whole numbers." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 4, 3, FALSE, 6, 0, FALSE), + "`Hi` must be a vector of positive whole numbers." + ) + expect_error( + two_stage_cluster_sample(population, "jp", 4, 3, FALSE, 6, 3, FALSE), + "`sampling_strategies` must be a vector of 2 values." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs", "srs"), 4, 3, FALSE, 6, 3, FALSE), + "`sampling_strategies` must be a vector of 2 values." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "sr"), 4, 3, FALSE, 6, 3, FALSE), + "`sampling_strategies` must be a vector of `'srs'` and/or `'jps'`." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 2, 3, FALSE, 6, 3, FALSE), + "`n` must be at least `H`." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 4, 3, FALSE, rep(4, 3), 3, FALSE), + "`ni` must be a vector of 1 or `n` values." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 4, 3, FALSE, rep(4, 5), 3, FALSE), + "`ni` must be a vector of 1 or `n` values." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 4, 3, FALSE, 4, rep(3, 5), FALSE), + "`Hi` must be a vector of 1 or `n` values." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 4, 3, FALSE, 4, rep(3, 3), FALSE), + "`Hi` must be a vector of 1 or `n` values." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 4, 3, FALSE, 2, 3, FALSE), + "ith value of `ni` must be at least ith value of `Hi`." + ) + expect_error( + two_stage_cluster_sample(population, c("jps", "srs"), 8, 3, FALSE, 6, 3, FALSE), + "The number of population must be at least `nH`." + ) +})