diff --git a/.gitignore b/.gitignore index 5930292..4937f48 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,9 @@ figures/ outputs_di/ outputs_dii/ +# GCAM +output +*.dat # Tests tests/testthat/test_inputs/csv/* diff --git a/DESCRIPTION b/DESCRIPTION index 2f312e3..f48f672 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,9 +32,12 @@ Imports: stringr, here, spatstat.geom, - tibble + tibble, + countrycode Remotes: github::JGCRI/rpackageutils, + github::JGCRI/gcamdata, + github::JGCRI/rgcam, github::eurostat/restatapi Suggests: rmarkdown, diff --git a/NAMESPACE b/NAMESPACE index 9386b7e..e80ea01 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export(check_year) export(elevate_hbs) export(ex_shocks) export(ex_var_intersec) +export(get_prices_gcam) export(id_ep1) export(id_ep2) export(id_tp) @@ -22,9 +23,11 @@ export(impact_intersectional) export(intersectional_graph) export(load_rawhbs) export(order_var) +export(order_vars) export(price_shock) export(rename_values) export(standardize) export(weighted.median) export(weighted.quantile) importFrom(dplyr,"%>%") +importFrom(magrittr,"%>%") diff --git a/R/data.R b/R/data.R index 6ec14c4..07b5612 100644 --- a/R/data.R +++ b/R/data.R @@ -662,3 +662,15 @@ #' @source medusa #' @format .csv "graph_labels" + +#' Mapping between the GCAM items names and the COICOP codes +#' +#' @source medusa +#' @format .csv +"mapping_gcam_medusa" + +#' List of all COICOP codes available in MEDUSA +#' +#' @source medusa +#' @format .csv +"all_coicop" diff --git a/R/models_connections.R b/R/models_connections.R new file mode 100644 index 0000000..656e744 --- /dev/null +++ b/R/models_connections.R @@ -0,0 +1,199 @@ +#' get_prices_gcameurope +#' +#' Extract and format prices from a GCAM-Europe database or project file for MEDUSA. +#' @keywords GCAM, GCAM-Europe, prices. +#' @return a dataframe with prices by sector and GCAM-Europe region. +#' @param db_path Path to the GCAM-Europe database. +#' @param query_path Path to the query file. +#' @param db_name Name of the GCAM-Europe database. +#' @param prj_name Name of the rgcam project. This can be an existing project, or, if not, this will be the newly created project name. +#' @param scenarios Vector names of the GCAM-Europe scenarios to be processed. +#' @param queries Name of the GCAM-Europe query file. The file by default includes the queries required to run rfasst. +#' @param final_db_year Final year in the GCAM-Europe database (this allows to process databases with user-defined "stop periods"). +#' @param saveOutput Writes the files.By default=T. +#' @param base_scen The base scenario that other scenarios will be compared against. +#' @param selected_year The year of analysis, when the scenarios will be compared. +#' @importFrom magrittr %>% +#' @export +get_prices_gcameurope <- function(db_path = NULL, query_path = "inst/extdata", db_name = NULL, prj_name, + scenarios, queries = "queries_GCAM_MEDUSA.xml", final_db_year = 2100, + saveOutput = T, base_scen, selected_year) { + + # Set countries + EU_COUNTRIES <- c("Austria", "Belgium", "Bulgaria", "Croatia", + "Cyprus", "Czech Republic", "Denmark", + "Estonia", "Finland", "France", "Germany", + "Greece", "Hungary", "Ireland", "Italy", + "Latvia", "Lithuania", "Luxembourg", "Malta", + "Netherlands", "Poland", "Portugal", "Romania", + "Slovakia", "Slovenia", "Spain", "Sweden") + + crop_prod_COUNTRY <- "Austria" + + # Set crop coicop codes (for adjustment) + crop_coicop_codes <- c("CP01111", + "CP01112", + "CP01154", + "CP0116", + "CP01163", + "CP0117", + "CP01176", + "CP01181") + + # Set delivered biomass code (not working well) + deliv_bio_coicop <- "CP04549" + + # Load price mapping files + map_price <- medusa::mapping_gcam_medusa %>% + dplyr::filter(query == "prices by sector") %>% + dplyr::select(sector = gcam) %>% + dplyr::distinct() %>% + dplyr::pull() + + map_cost <- medusa::mapping_gcam_medusa %>% + dplyr::filter(query == "costs by subsector") %>% + dplyr::select(sector = gcam) %>% + dplyr::distinct() %>% + dplyr::pull() + + # Load or create the rgcam project: + if (!is.null(db_path) & !is.null(db_name)) { + rlang::inform('Creating project ...') + conn <- rgcam::localDBConn(db_path, + db_name,migabble = FALSE) + + prj <- rgcam::addScenario(conn = conn, + proj = prj_name, + scenario = scenarios, + queryFile = paste0(query_path,"/",queries), + clobber = F, + saveProj = F) + + if (!file.exists('output')) dir.create('output') + rgcam::saveProject(prj, file = file.path('output',prj_name)) + + } else { + rlang::inform('Loading project ...') + prj <- rgcam::loadProject(prj_name) + + } + + + rlang::inform('Extracting prices ...') + + + prices <- rgcam::getQuery(prj, "prices by sector") %>% + dplyr::filter(region %in% EU_COUNTRIES, + sector %in% map_price) + + costs <- rgcam::getQuery(prj, "costs by subsector") %>% + dplyr::filter(region %in% EU_COUNTRIES, + subsector %in% map_cost) %>% + dplyr::select(-sector) %>% + dplyr::rename(sector = subsector) + + + data <- dplyr::bind_rows( + prices, + costs + ) + + # ----- + # Aggregate to COICOP categories + coicop_map <- medusa::mapping_gcam_medusa %>% + dplyr::select(-query) %>% + dplyr::rename(sector = gcam) + + data_coicop <- data %>% + dplyr::filter(year == selected_year) %>% + dplyr::left_join(coicop_map, by = "sector", relationship = "many-to-many") %>% + dplyr::group_by(scenario, region, coicop, year) %>% + dplyr::summarise(value = mean(value)) %>% + dplyr::ungroup() + + # ----- + # Compute price changes in relation to base_scen + data_coicop_diff <- data_coicop %>% + dplyr::filter(scenario != base_scen) %>% + gcamdata::left_join_error_no_match( + data_coicop %>% + dplyr::filter(scenario == base_scen) %>% + dplyr::rename(value_base = value) %>% + dplyr::select(-scenario), + by = c("region", "coicop", "year") + ) %>% + # filter out negative prices from delivered biomass + dplyr::filter(coicop != deliv_bio_coicop) %>% + dplyr::mutate(price_diff = value / value_base) + + + # With the new structure, we need to adjust price changes in crops: The price change in Austria + # should be extended to all EU countries + data_coicop_adjCrop <- data_coicop_diff %>% + dplyr::filter(region == 'Austria') %>% + dplyr::filter(coicop %in% crop_coicop_codes) %>% + dplyr::select(-region) %>% + tibble::as_tibble() %>% + gcamdata::repeat_add_columns(tibble::tibble(region = EU_COUNTRIES)) + + + # Create the final dataset: + data_coicop_fin <- data_coicop_diff %>% + dplyr::filter(!coicop %in% crop_coicop_codes) %>% + dplyr::bind_rows( + data_coicop_adjCrop + ) %>% + dplyr::mutate( + eurostat_code = countrycode::countrycode(region, origin = "country.name", destination = "eurostat") + ) %>% + dplyr::mutate(ctry_sce = paste0(eurostat_code, "_", scenario)) %>% + dplyr::select(ctry_sce, coicop, price_diff) %>% + dplyr::mutate(price_diff = dplyr::if_else(is.nan(price_diff), 1, price_diff)) %>% + dplyr::distinct() %>% + tidyr::pivot_wider( + names_from = ctry_sce, + values_from = price_diff + ) + + # ------- + # Add all the remaining coicop categories (with no change, value = 1) + data_coicop_fin_full <- data_coicop_diff %>% + dplyr::filter(!coicop %in% crop_coicop_codes) %>% + dplyr::bind_rows( + data_coicop_adjCrop + ) %>% + dplyr::mutate( + eurostat_code = countrycode::countrycode(region, origin = "country.name", destination = "eurostat") + ) %>% + dplyr::mutate(ctry_sce = paste0(eurostat_code, "_", scenario)) %>% + dplyr::select(ctry_sce, coicop, price_diff) %>% + dplyr::mutate(price_diff = dplyr::if_else(is.nan(price_diff), 1, price_diff)) %>% + tidyr::complete(tidyr::nesting(ctry_sce), coicop = medusa::all_coicop) %>% + tidyr::replace_na(list(price_diff = 1)) %>% + tidyr::pivot_wider( + names_from = ctry_sce, + values_from = price_diff + ) + + # ------- + # Add names column + names_coicop_map <- get(paste0("coicop_", selected_year)) %>% + dplyr::select(names, coicop) %>% + dplyr::distinct() %>% + dplyr::mutate(coicop = stringr::str_replace(coicop, 'EUR_A_', 'CP')) + + data_coicop_fin_full_names <- data_coicop_fin_full %>% + dplyr::right_join(names_coicop_map, by = 'coicop') %>% + dplyr::relocate(last_col()) + + + # ------- + # Save and return + file_name <- paste0('GCAMEurope_shocks_',stringr::str_remove(prj_name,'.dat'),'.csv') + write.csv(data_coicop_fin, file = file.path('output',file_name)) + print(paste0("The GCAM-Europe prices file has been saved in`", getwd(),"/output/",file_name)) + + return(invisible(data_coicop_fin_full)) + +} + diff --git a/data/all_coicop.rda b/data/all_coicop.rda new file mode 100644 index 0000000..4efa1a8 Binary files /dev/null and b/data/all_coicop.rda differ diff --git a/data/mapping_gcam_medusa.rda b/data/mapping_gcam_medusa.rda new file mode 100644 index 0000000..1a4563c Binary files /dev/null and b/data/mapping_gcam_medusa.rda differ diff --git a/inst/extdata/mapping_gcam_medusa.csv b/inst/extdata/mapping_gcam_medusa.csv new file mode 100644 index 0000000..6e0db36 --- /dev/null +++ b/inst/extdata/mapping_gcam_medusa.csv @@ -0,0 +1,36 @@ +query,gcam,coicop +prices by sector,regional rice,CP01111 +prices by sector,regional corn,CP01112 +prices by sector,regional othergrain,CP01112 +prices by sector,regional wheat,CP01112 +prices by sector,regional beef,CP01121 +prices by sector,regional pork,CP01122 +prices by sector,regional sheepgoat,CP01123 +prices by sector,regional poultry,CP01124 +prices by sector,regional dairy,CP0114 +prices by sector,regional oilcrop,CP01154 +prices by sector,regional fruits,CP0116 +prices by sector,regional nuts_seeds,CP01163 +prices by sector,regional vegetables,CP0117 +prices by sector,regional legumes,CP01176 +prices by sector,regional root_tuber,CP01176 +prices by sector,regional sugarcrop,CP01181 +prices by sector,cement,CP04310 +prices by sector,water_td_muni_C,CP0441 +prices by sector,elect_td_bld,CP04510 +prices by sector,delivered gas,CP04521 +prices by sector,delivered coal,CP04541 +prices by sector,delivered biomass,CP04549 +prices by sector,district heat,CP0455/0 +prices by sector,refined liquids enduse,CP07221 +prices by sector,refined liquids enduse,CP07222 +prices by sector,refined liquids enduse,CP04530 +prices by sector,elect_td_trn,CP07223 +costs by subsector,HSR,CP07311 +costs by subsector,Passenger Rail,CP07311 +costs by subsector,Passenger Rail,CP07312 +costs by subsector,Bus,CP07321 +costs by subsector,Domestic Aviation,CP07331 +prices by sector,trn_aviation_intl,CP07332 +prices by sector,N fertilizer,CP09331 +prices by sector,paper,CP09541 diff --git a/inst/extdata/queries_GCAM_MEDUSA.xml b/inst/extdata/queries_GCAM_MEDUSA.xml new file mode 100644 index 0000000..1bdc421 --- /dev/null +++ b/inst/extdata/queries_GCAM_MEDUSA.xml @@ -0,0 +1,20 @@ + + + + + + sector + cost + *[@type = 'sector']/cost/text() + + + + + + subsector + cost + *[@type='sector' and (local-name()!='AgSupplySector')]/*[@type = 'subsector']/cost/text() + Excludes AgSupplySubsector costs, where data written out are no meaningful + + + diff --git a/inst/extdata/saveDataFiles.R b/inst/extdata/saveDataFiles.R index 7eeb953..34c27b7 100644 --- a/inst/extdata/saveDataFiles.R +++ b/inst/extdata/saveDataFiles.R @@ -162,3 +162,101 @@ use_data(is_categories, overwrite=T) # graph_labels graph_labels = read.csv(paste0(rawDataFolder,"graph_labels.csv"),header=T) use_data(graph_labels, overwrite=T) + +# mapping GCAM - MEDUSA COICOP categories +mapping_gcam_medusa <- read.csv(paste0(rawDataFolder, "mapping_gcam_medusa.csv"), header = T) +use_data(mapping_gcam_medusa, overwrite=T) + +# coicop codes list +all_coicop <- c( + "CP01", "CP011", "CP0111", "CP01111", "CP01112", "CP01113", "CP01114", "CP01115", "CP01116", "CP01117", "CP01118", + "CP0112", "CP01121", "CP01122", "CP01123", "CP01124", "CP01125", "CP01126", "CP01127", "CP01128", + "CP0113", "CP01131", "CP01132", "CP01133", "CP01134", "CP01135", "CP01136", + "CP0114", "CP01141", "CP01142", "CP01143", "CP01144", "CP01145", "CP01146", "CP01147", + "CP0115", "CP01151", "CP01152", "CP01153", "CP01154", "CP01155", + "CP0116", "CP01161", "CP01162", "CP01163", "CP01164", + "CP0117", "CP01171", "CP01172", "CP01173", "CP01174", "CP01175", "CP01176", + "CP0118", "CP01181", "CP01182", "CP01183", "CP01184", "CP01185", "CP01186", + "CP0119", "CP01191", "CP01192", "CP01193", "CP01194", "CP01199", + "CP012", "CP0121", "CP01211", "CP01212", "CP01213", + "CP0122", "CP01221", "CP01222", "CP01223", + "CP02", "CP021", "CP0211", "CP02111", "CP02112", + "CP0212", "CP02121", "CP02122", "CP02123", "CP02124", + "CP0213", "CP02131", "CP02132", "CP02133", "CP02134", + "CP022", "CP0220", "CP02201", "CP02202", "CP02203", + "CP023", + "CP03", "CP031", "CP0311", "CP03110", "CP0312", "CP03121", "CP03122", "CP03123", + "CP0313", "CP03131", "CP03132", + "CP0314", "CP03141", "CP03142", + "CP032", "CP0321", "CP03211", "CP03212", "CP03213", + "CP0322", "CP03220", + "CP04", "CP041", "CP0411", "CP04110", "CP0412", "CP04121", "CP04122", + "CP042", "CP0421", "CP04210", "CP0422", "CP04220", + "CP043", "CP0431", "CP04310", "CP0432", "CP04321", "CP04322", "CP04323", "CP04324", "CP04325", "CP04329", + "CP044", "CP0441", "CP04410", "CP0442", "CP04420", "CP0443", "CP04430", "CP0444", "CP04441", "CP04442", "CP04449", + "CP045", "CP0451", "CP04510", "CP0452", "CP04521", "CP04522", "CP0453", "CP04530", "CP0454", "CP04541", "CP04549", + "CP0455", "CP04550", + "CP05", "CP051", "CP0511", "CP05111", "CP05112", "CP05113", "CP05119", + "CP0512", "CP05121", "CP05122", "CP05123", "CP0513", "CP05130", + "CP052", "CP0520", "CP05201", "CP05202", "CP05203", "CP05204", "CP05209", + "CP053", "CP0531", "CP05311", "CP05312", "CP05313", "CP05314", "CP05315", "CP05319", + "CP0532", "CP05321", "CP05322", "CP05323", "CP05324", "CP05329", + "CP0533", "CP05330", + "CP054", "CP0540", "CP05401", "CP05402", "CP05403", "CP05404", + "CP055", "CP0551", "CP05511", "CP05512", "CP0552", "CP05521", "CP05522", "CP05523", + "CP056", "CP0561", "CP05611", "CP05612", "CP0562", "CP05621", "CP05622", "CP05623", "CP05629", + "CP06", "CP061", "CP0611", "CP06110", "CP0612", "CP06121", "CP06129", + "CP0613", "CP06131", "CP06132", "CP06133", "CP06139", + "CP062", "CP0621", "CP06211", "CP06212", "CP0622", "CP06220", + "CP0623", "CP06231", "CP06232", "CP06239", + "CP063", "CP0630", "CP06300", + "CP07", "CP071", "CP0711", "CP07111", "CP07112", "CP0712", "CP07120", "CP0713", "CP07130", "CP0714", "CP07140", + "CP072", "CP0721", "CP07211", "CP07212", "CP07213", "CP0722", "CP07221", "CP07222", "CP07223", "CP07224", + "CP0723", "CP07230", "CP0724", "CP07241", "CP07242", "CP07243", + "CP073", "CP0731", "CP07311", "CP07312", "CP0732", "CP07321", "CP07322", + "CP0733", "CP07331", "CP07332", "CP0734", "CP07341", "CP07342", + "CP0735", "CP07350", "CP0736", "CP07361", "CP07362", "CP07369", + "CP08", "CP081", "CP0810", "CP08101", "CP08109", + "CP082", "CP0820", "CP08201", "CP08202", "CP08203", "CP08204", + "CP083", "CP0830", "CP08301", "CP08302", "CP08303", "CP08304", "CP08305", + "CP09", "CP091", "CP0911", "CP09111", "CP09112", "CP09113", "CP09119", + "CP0912", "CP09121", "CP09122", "CP09123", + "CP0913", "CP09131", "CP09132", "CP09133", "CP09134", + "CP0914", "CP09141", "CP09142", "CP09149", "CP0915", "CP09150", + "CP092", "CP0921", "CP09211", "CP09212", "CP09213", "CP09214", "CP09215", + "CP0922", "CP09221", "CP09222", + "CP0923", "CP09230", + "CP093", "CP0931", "CP09311", "CP09312", "CP0932", "CP09321", "CP09322", "CP09323", + "CP0933", "CP09331", "CP09332", + "CP0934", "CP09341", "CP09342", + "CP0935", "CP09350", + "CP094", "CP0941", "CP09411", "CP09412", "CP0942", "CP09421", "CP09422", "CP09423", "CP09424", "CP09425", "CP09429", + "CP0943", + "CP095", "CP0951", "CP09511", "CP09512", "CP09513", "CP09514", + "CP0952", "CP09521", "CP09522", + "CP0953", "CP09530", + "CP0954", "CP09541", "CP09549", + "CP096", "CP0960", "CP09601", "CP09602", + "CP10", "CP101", "CP1010", "CP10101", "CP10102", + "CP102", "CP1020", "CP10200", + "CP103", "CP1030", "CP10300", + "CP104", "CP1040", "CP10400", + "CP105", "CP1050", "CP10500", + "CP11", "CP111", "CP1111", "CP11111", "CP11112", + "CP1112", "CP11120", + "CP112", "CP1120", "CP11201", "CP11202", "CP11203", + "CP12", "CP121", "CP1211", "CP12111", "CP12112", "CP12113", + "CP1212", "CP12121", "CP12122", + "CP1213", "CP12131", "CP12132", + "CP122", + "CP123", "CP1231", "CP12311", "CP12312", "CP12313", + "CP1232", "CP12321", "CP12322", "CP12323", "CP12329", + "CP124", "CP1240", "CP12401", "CP12402", "CP12403", "CP12404", + "CP125", "CP1252", "CP12520", "CP1253", "CP12531", "CP12532", + "CP1254", "CP12541", "CP12542", + "CP1255", "CP12550", + "CP126", "CP1262", "CP12621", "CP12622", + "CP127", "CP1270", "CP12701", "CP12702", "CP12703", "CP12704" +) +use_data(all_coicop, overwrite=T) + diff --git a/man/all_coicop.Rd b/man/all_coicop.Rd new file mode 100644 index 0000000..c125c31 --- /dev/null +++ b/man/all_coicop.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{all_coicop} +\alias{all_coicop} +\title{List of all COICOP codes available in MEDUSA} +\format{ +.csv +} +\source{ +medusa +} +\usage{ +all_coicop +} +\description{ +List of all COICOP codes available in MEDUSA +} +\keyword{datasets} diff --git a/man/get_prices_gcam.Rd b/man/get_prices_gcam.Rd new file mode 100644 index 0000000..1123011 --- /dev/null +++ b/man/get_prices_gcam.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_gcam_prices.R +\name{get_prices_gcam} +\alias{get_prices_gcam} +\title{get_prices_gcam} +\usage{ +get_prices_gcam( + db_path = NULL, + query_path = "inst/extdata", + db_name = NULL, + prj_name, + prj = NULL, + scenarios, + queries = "queries_GCAM_MEDUSA.xml", + final_db_year = 2100, + saveOutput = T, + base_scen, + selected_year +) +} +\arguments{ +\item{db_path}{Path to the GCAM database} + +\item{query_path}{Path to the query file} + +\item{db_name}{Name of the GCAM database} + +\item{prj_name}{Name of the rgcam project. This can be an existing project, or, if not, this will be the name} + +\item{prj}{rgcam loaded project} + +\item{scenarios}{Vector names of the GCAM scenarios to be processed} + +\item{queries}{Name of the GCAM query file. The file by default includes the queries required to run rfasst} + +\item{final_db_year}{Final year in the GCAM database (this allows to process databases with user-defined "stop periods")} + +\item{saveOutput}{Writes the files.By default=T} + +\item{base_scen}{The base scenario that other scenarios will be compared against} + +\item{selected_year}{The year of analysis, when the scenarios wil be compared} +} +\value{ +a dataframe with prices by sector and GCAM-Europe region +} +\description{ +Extract and format prices from a GCAM-Europe database or project file for MEDUSA +} +\keyword{GCAM,} +\keyword{GCAM-Europe,} +\keyword{prices} diff --git a/man/mapping_gcam_medusa.Rd b/man/mapping_gcam_medusa.Rd new file mode 100644 index 0000000..a283a00 --- /dev/null +++ b/man/mapping_gcam_medusa.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{mapping_gcam_medusa} +\alias{mapping_gcam_medusa} +\title{Mapping between the GCAM items names and the COICOP codes} +\format{ +.csv +} +\source{ +medusa +} +\usage{ +mapping_gcam_medusa +} +\description{ +Mapping between the GCAM items names and the COICOP codes +} +\keyword{datasets} diff --git a/man/order_var.Rd b/man/order_var.Rd index fc031eb..287e76a 100644 --- a/man/order_var.Rd +++ b/man/order_var.Rd @@ -4,8 +4,6 @@ \alias{order_var} \title{order_var} \usage{ -order_var(data, g) - order_var(data, g) } \arguments{ @@ -14,12 +12,8 @@ order_var(data, g) \item{g}{variable for which we want to sort the labels} } \value{ -a dataset in which the labels are ordered for the selected socioeconomic or demographic variable - a dataset in which the labels are ordered for the selected socioeconomic or demographic variable } \description{ Function to order the labels of the socioeconomic and demographic variablesin basic graph - -Function to order the labels of the socioeconomic and demographic variables in intersectional graph } diff --git a/man/order_vars.Rd b/man/order_vars.Rd new file mode 100644 index 0000000..b0b7a77 --- /dev/null +++ b/man/order_vars.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{order_vars} +\alias{order_vars} +\title{order_vars} +\usage{ +order_vars(data, g) +} +\arguments{ +\item{data}{dataset in which we want to order the labels of the socioeconomic and demographic variables} + +\item{g}{variable for which we want to sort the labels} +} +\value{ +a dataset in which the labels are ordered for the selected socioeconomic or demographic variable +} +\description{ +Function to order the labels of the socioeconomic and demographic variables in intersectional graph +} diff --git a/vignettes/TutorialsExample.Rmd b/vignettes/TutorialsExample.Rmd index 5d2ea95..09b1348 100644 --- a/vignettes/TutorialsExample.Rmd +++ b/vignettes/TutorialsExample.Rmd @@ -131,4 +131,8 @@ The figure generated by `calc_di` shows the relative impact (%) on total equival - In **Scenario 1** (left panel), all income quintiles experience a welfare loss, with middle- and high-income households being more affected. The impact is similar across genders, though minor differences exist. - In **Scenario 2** (right panel), the policy intervention is **progressive**, as it **benefits lower-income households the most**, while the negative impact remains for higher-income groups. Women-headed households in the lowest quintiles appear to benefit slightly more from the intervention than men-headed households, reinforcing the redistributive nature of the policy. +<<<<<<< HEAD +DI_QUINTILE_GENDERRP +======= DI_QUINTILE_GENDERRP +>>>>>>> main diff --git a/vignettes/figs/ex_shocks22.png b/vignettes/figs/ex_shocks22.png new file mode 100644 index 0000000..feaaf85 Binary files /dev/null and b/vignettes/figs/ex_shocks22.png differ diff --git a/vignettes/vig_extra.Rmd b/vignettes/vig_extra.Rmd new file mode 100644 index 0000000..fbe01e6 --- /dev/null +++ b/vignettes/vig_extra.Rmd @@ -0,0 +1,185 @@ +--- + title: "Calculate distributional impacts (EU)" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Calculate distributional impacts (EU)} +%\VignetteEngine{knitr::rmarkdown} +%\VignetteEncoding{UTF-8} +--- + + # Step by step tutorials + + Before running any of the examples below, make sure you have processed the Eurostat HBS microdata with `hbs_eu()`. See [Preparing the data](https://bc3lc.github.io/medusa/articles/TutorialsEU_data.html) for details. + +```r +hbs <- hbs_eu(year = 2015, country = "all", path = "raw_data") +``` + +## Example 1. How to enter price shocks in `calc_di_eu` + +1. Download the example file into your working directory by running the following function in the R terminal: + + ```r +ex_shocks_eu() +``` + +2. Go to your working directory and open the csv **"Example_shocks_eu.csv"**. + +3. Define the scenarios: each column named after a country code and a scenario suffix (e.g. `AT_s1`, `BE_s1`, ..., `AT_s2`, `BE_s2`, ...) represents a scenario for each country. To run `calc_di_eu` for a single scenario, delete all columns for Scenario 2. To add another scenario, copy the Scenario 2 columns to the right and change the suffix to `_s3`. You can rename scenarios by changing the suffix (e.g. `_shock1`), but keep the country code prefix intact. + +4. Enter the price shocks: each row corresponds to a COICOP code. Enter the price change to be applied to each COICOP category for each country and each scenario. A value greater than 1 indicates a price increase (e.g. `1.1` for a 10% increase) and less than 1 indicates a price decrease (e.g. `0.9` for a 10% decrease). If there is no shock in a category, keep `1`. + +5. Save the edited csv file. + +6. Upload the edited file to R: + + ```r +shocks <- read.csv("Example_shocks_eu.csv", + header = TRUE, + sep = ",", + dec = ".") +``` + +7. Run `calc_di_eu` with the processed HBS data and the price shocks: + + ```r +results <- calc_di_eu(data = hbs, # Output from hbs_eu() + shocks = shocks) # Edited shocks file +``` + +By default, `calc_di_eu` calculates distributional impacts for all available socioeconomic variables and saves the results and figures in an `outputs/` folder within your working directory. + +--- + + ## Example 2. How to select variables for distributional impacts + + ### Individual variables (`var`) + + 1. To see the variables available for distributional impact analysis, run: + + ```r +available_var_eu() +``` + +2. Select a variable (e.g. `"decile"`) and pass it to the `var` argument of `calc_di_eu`: + + ```r +results <- calc_di_eu(data = hbs, + shocks = shocks, + var = "decile") # Single variable +``` + +3. To calculate impacts for several variables, pass a vector: + + ```r +vars <- c("decile", "zone", "gender") + +results <- calc_di_eu(data = hbs, + shocks = shocks, + var = vars) +``` + +For more information on available variables, see [Available Variables](https://bc3lc.github.io/medusa/articles/AvailableVariables.html). + +### Intersectional variables (`var_intersec`) + +1. To see the combinations of variables available for intersectional distributional impacts, run: + + ```r +available_var_intersec_eu() +``` + +2. Download the intersectional variables file into your working directory: + + ```r +ex_var_intersec_eu() +``` + +3. Open **"Var_Intersec_eu.csv"** and delete the rows for combinations you do not want to analyse. Save the edited file. + +4. Upload the file to R: + + ```r +example_vars <- read.csv("Var_Intersec_eu.csv", + header = TRUE, + sep = ",", + dec = ".") +``` + +5. Pass the file to the `var_intersec` argument of `calc_di_eu`: + + ```r +results <- calc_di_eu(data = hbs, + shocks = shocks, + var_impact = NULL, # Skip individual variables + var_intersec = example_vars) # Intersectional combinations +``` + +--- + + ## Example 3. Country-level vs. EU-level results + + By default, `calc_di_eu` calculates distributional impacts both at the **EU level** (across all households jointly) and at the **country level** (separately for each member state). To disable country-level results and obtain only EU-level results, set `by_country = FALSE`: + + ```r +results <- calc_di_eu(data = hbs, + shocks = shocks, + by_country = FALSE) +``` + +--- + + ## Example 4. Update the microdata to a different year + + If your price shocks come from a macroeconomic model calibrated to a year different from the HBS wave, you can update the HBS expenditure data before running the simulation using the `update_hbs` argument: + + ```r +results <- calc_di_eu(data = hbs, # HBS 2015 microdata + update_hbs = 2018, # Update expenditure to 2018 prices + shocks = shocks) +``` + +--- + +## Example 5. Connect with GCAM-Europe + +1. Read the GCAM-Europe prices from the database. You can indicate the database path and name, or the already created project file. +**Importantly**, you must indicate the Baseline scenario from which the percentage prices increment will be computed. Indicate also the year of the analysis through the `selected_year` parameter: + +```r +# if indicating the database path and name +shocks <- get_prices_gcameurope(db_path = 'path/to/db', db_name = 'db_name', + prj_name = 'new_prj_name.dat', + scenarios = c('scen1','scen2'), + final_db_year = 2100, saveOutput = T, + base_scen = 'Reference', selected_year = 2015) + + +# if indicating the project name +shocks <- get_prices_gcameurope(prj_name = 'path/to/existing/prj.dat', + scenarios = c('scen1','scen2'), + final_db_year = 2100, saveOutput = T, + base_scen = 'Reference', selected_year = 2015) +``` + +2. Go to your working directory and check the csv **"output/GCAMEU_shocks_[prj_name].csv"**. It will have each column named after a country code and a scenario suffix (e.g. `AT_s1`, `BE_s1`, ..., `AT_s2`, `BE_s2`, ...) representing a scenario for each country and each row a COICOP code. The values indicate the price change (e.g. `1.1` for a 10% increase and `0.9` for a 10% decrease). If there is no shock in a category, keep `1`. + +3. Run `calc_di_eu` with the processed HBS data and the price shocks: + + ```r +results <- calc_di_eu(data = hbs, # Output from hbs_eu() + shocks = shocks) # GCAM-Europe shocks file +``` + +By default, `calc_di_eu` calculates distributional impacts for all available socioeconomic variables and saves the results and figures in an `outputs/` folder within your working directory. + +--- + + ## Outputs + + `calc_di_eu` returns a list with two elements: + + - **`di`**: a data frame with the basic distributional impacts per selected variable and scenario. +- **`dii`** (if `var_intersec` is specified): a data frame with the intersectional distributional impacts. + +By default, results are saved to the `outputs/` folder and figures are generated automatically. To suppress saving, set `save = FALSE` and `fig = FALSE`.