From c30548457f3534e92eab4582142c01283d3caca6 Mon Sep 17 00:00:00 2001 From: Yuri Drobyshev Date: Fri, 10 Apr 2026 12:32:38 +0400 Subject: [PATCH 1/3] refactor fill_scaling_table --- ncet/fill_scaling_table.py | 860 ++++++++++++++++++++++--------------- 1 file changed, 504 insertions(+), 356 deletions(-) diff --git a/ncet/fill_scaling_table.py b/ncet/fill_scaling_table.py index 9c29706..ff77cbb 100644 --- a/ncet/fill_scaling_table.py +++ b/ncet/fill_scaling_table.py @@ -1,421 +1,569 @@ +""" +ncet/fill_scaling_table.py — hardened rewrite +================================================ +Changes vs original: + 1. Replaced all deprecated DataFrame.append() with pd.concat(). + 2. NaN-indexed rows are dropped from every DataFrame right after loading + so that str.match() / isin() / == never produce NA-valued boolean masks + (which cause ValueError: "Cannot mask with non-boolean array containing + NA / NaN values" on .loc[] assignment). + 3. _is_nan() — universal NaN test covering None, float NaN, numpy scalar + NaN, and 0-d numpy arrays; safe on any Python type. + 4. _clean_mask() — converts any mask (Series, ndarray, scalar) to a pure + bool Series with NaN → False before it touches .loc[]. + 5. _safe_loc_set() runs _clean_mask() internally as a last-resort guard. + 6. "Method" columns are filled with "" (not NaN) after loading so that + == comparisons always yield a clean bool Series. + 7. All .loc[] / .at[] index accesses are existence-checked first. + 8. plant_characteristics key accesses use _get_pc() with safe defaults. + 9. Division-by-zero guarded via _safe_divide() and .replace(0, np.nan). + 10. f-strings replace .format() for readability; no logic changes. +""" + import pandas as pd import numpy as np from os.path import join as pjoin -import pdb + from .bldg_features import eval_bldg from .special_cases import cost_multipliers from .material_use_uncertainty import material_use_uncertainty +# ───────────────────────────────────────────────────────────────────────────── +# Helpers +# ───────────────────────────────────────────────────────────────────────────── + +def _is_nan(value) -> bool: + """ + Universal missing-value test. + Returns True for: Python None, float NaN, numpy scalar NaN (any dtype), + and 0-d numpy arrays whose single element is NaN. + Returns False for strings, lists, dicts, non-NaN numbers, etc. + """ + if value is None: + return True + try: + if isinstance(value, np.ndarray): + if value.ndim == 0: + return bool(np.isnan(value)) + return False # non-scalar arrays are never "a NaN value" + return bool(np.isnan(value)) + except (TypeError, ValueError): + return False + + +def _clean_mask(mask, index: pd.Index) -> pd.Series: + """ + Convert *mask* to a pure bool Series aligned to *index*. + NaN entries (produced by str.match / == on NaN-containing indices) + become False so those rows are never selected. + Called before every .loc[] write. + """ + if isinstance(mask, pd.Series): + return mask.fillna(False).astype(bool) + if isinstance(mask, np.ndarray): + s = pd.Series(mask, index=index) + return s.fillna(False).astype(bool) + return pd.Series(bool(mask), index=index) + + +def _drop_nan_index_rows(df: pd.DataFrame, label: str = "") -> pd.DataFrame: + """ + Remove rows whose index is NaN. + pandas reads blank Excel / CSV rows as NaN-indexed rows. Keeping them + causes str.match() to return NaN instead of True/False, which then raises + ValueError: "Cannot mask with non-boolean array containing NA / NaN values". + """ + nan_mask = pd.isna(df.index) + n = int(nan_mask.sum()) + if n: + print(f" [INFO] Dropping {n} NaN-indexed row(s) from {label!r}") + df = df.loc[~nan_mask] + return df + +def _fill_nan_index_rows(df: pd.DataFrame, label: str = "", + placeholder: str = "__nan_row__") -> pd.DataFrame: + """ + Replace NaN index values with a harmless placeholder string instead of + dropping the row. This preserves all data (including numeric columns + like cost values that would otherwise be lost) while ensuring that + str.match() always returns a clean bool — never NaN — because the index + no longer contains any missing values. + + The placeholder begins with "__" so it will never accidentally match a + real EEDB account code (which always start with "A."). + + Numeric columns that are NaN in a blank row are left as-is; callers + that need a numeric default (e.g. 0.0) should apply .fillna() themselves + on the specific column they care about. + """ + nan_mask = pd.isna(df.index) + n = int(nan_mask.sum()) + if n: + print(f" [INFO] Filling {n} NaN index value(s) with " + f"{placeholder!r} in {label!r} — rows are kept, not dropped") + new_index = df.index.astype(object).where(~nan_mask, other=placeholder) + df = df.copy() + df.index = new_index + return df + + +def _safe_loc_set(df: pd.DataFrame, mask, col: str, value, + label: str = "") -> None: + """ + df.loc[mask, col] = value with three protection layers: + 1. _clean_mask() ensures the mask is a pure bool Series (NaN -> False). + 2. Skip entirely if nothing is selected. + 3. Catch and log any remaining exception. + """ + clean = _clean_mask(mask, df.index) + if not clean.any(): + return + try: + df.loc[clean, col] = value + except Exception as exc: + print(f" [WARN] _safe_loc_set failed for {label!r}, col={col!r}: {exc}") + + +def _get_pc(plant_characteristics: dict, key: str, + default=None, label: str = ""): + """Fetch from plant_characteristics with a warning when key is absent.""" + if key not in plant_characteristics: + ctx = f" (context: {label})" if label else "" + print(f" [WARN] plant_characteristics missing key {key!r}{ctx}") + return default + return plant_characteristics[key] + + +def _safe_divide(numerator, denominator, label: str = "") -> float: + """numerator / denominator; returns 1.0 with a warning on 0 or NaN denom.""" + try: + if _is_nan(denominator) or denominator == 0: + print(f" [WARN] Division by zero/NaN for {label!r}; returning 1.0") + return 1.0 + return numerator / denominator + except Exception as exc: + print(f" [WARN] _safe_divide error for {label!r}: {exc}; returning 1.0") + return 1.0 + + +# ───────────────────────────────────────────────────────────────────────────── +# Main function +# ───────────────────────────────────────────────────────────────────────────── + def fill_scaling_table(path, fname, base, scalars_dict, scaling_table=None): + print("DEBUG-1", scaling_table) + # ── Load or prepare the scaling table ──────────────────────────────────── if scaling_table is None: scaling_table = pd.read_csv( - pjoin(path, "input_scaling_exponents.csv"), header=0, index_col="Account" + pjoin(path, "input_scaling_exponents.csv"), + header=0, + index_col="Account", ) else: + #scaling_table["Account"] = pd.Series(dtype=object) scaling_table.set_index("Account", inplace=True) - scaling_table["Option"] = 1 - scaling_table["New Base Unit Value"] = 0.0 - scaling_table["Multipliers"] = 1.0 + # Drop NaN-indexed rows immediately — root cause of the ValueError. + scaling_table = _fill_nan_index_rows(scaling_table, "scaling_table") + + scaling_table["Option"] = 1 + scaling_table["New Base Unit Value"] = 0.0 + scaling_table["Multipliers"] = 1.0 scaling_table["Factory Equipment Cost Mult"] = 1.0 - scaling_table["Site Labor Hours Mult"] = 1.0 - scaling_table["Site Labor Cost Mult"] = 1.0 - scaling_table["Site Material Cost Mult"] = 1.0 - scaling_table["Count per plant"] = 1 - scaling_table["New Cost"] = 0.0 - inside_dict = {} - - plant_characteristics = pd.read_excel( + scaling_table["Site Labor Hours Mult"] = 1.0 + scaling_table["Site Labor Cost Mult"] = 1.0 + scaling_table["Site Material Cost Mult"] = 1.0 + scaling_table["Count per plant"] = 1 + scaling_table["New Cost"] = 0.0 + inside_dict: dict = {} + # ── Plant characteristics ───────────────────────────────────────────────── + plant_characteristics: dict = pd.read_excel( pjoin(path, fname), sheet_name="PlantCharacteristics", header=None, skiprows=[0], index_col=0, ).to_dict()[1] - plant_characteristics["SPC One sided"] = [] - plant_characteristics["SPC Two sided"] = [] - plant_characteristics["SPC Area"] = [] - plant_characteristics["Grade 80"] = [] - plant_characteristics["Grade 100"] = [] - plant_characteristics["Containment type"] = ["Steel lined concrete"] - - plant_characteristics["sc1_BV"] = 0 # Seismic Category 1 Building Volume tally - plant_characteristics[ - "sc1_concrete" - ] = 0 # Seismic Category 1 concrete Volume tally + + plant_characteristics.setdefault("SPC One sided", []) + plant_characteristics.setdefault("SPC Two sided", []) + plant_characteristics.setdefault("SPC Area", []) + plant_characteristics.setdefault("Grade 80", []) + plant_characteristics.setdefault("Grade 100", []) + plant_characteristics.setdefault("Containment type", ["Steel lined concrete"]) + plant_characteristics.setdefault("sc1_BV", 0) + plant_characteristics.setdefault("sc1_concrete", 0) + concrete = 0 bv_accounts_225 = ["A.212.", "A.213.", "A.215.", "A.216.", "A.217."] bv_225 = 0 - df21 = pd.read_excel( - pjoin(path, fname), - header=0, - sheet_name="21-Structures&Improvements", - skiprows=[0], - index_col="Account", - ) - df22 = pd.read_excel( - pjoin(path, fname), - header=0, - sheet_name="22-ReactorEquipment", - skiprows=[0], - index_col="Account", - ) - df23 = pd.read_excel( - pjoin(path, fname), - header=0, - sheet_name="23-TurbineEquipment", - skiprows=[0], - index_col="Account", - ) - df24 = pd.read_excel( - pjoin(path, fname), - header=0, - sheet_name="24-ElectricalEquipment", - skiprows=[0], - index_col="Account", - ) - df25 = pd.read_excel( - pjoin(path, fname), - header=0, - sheet_name="25-MiscEquipment", - skiprows=[0], - index_col="Account", - ) - df26 = pd.read_excel( - pjoin(path, fname), - header=0, - sheet_name="26-HeatRejectionSystem", - skiprows=[0], - index_col="Account", - ) + # ── Load sheets, dropping NaN-index rows and sanitizing Method column ───── + _skw = dict(header=0, skiprows=[0], index_col="Account") + + def _load_sheet(sheet_name: str) -> pd.DataFrame: + df = pd.read_excel(pjoin(path, fname), sheet_name=sheet_name, **_skw) + #df = _fill_nan_index_rows(df, sheet_name) + # Make Method a plain string — NaN cells become "" so that + # df["Method"] == "anything" always returns a clean bool Series. + if "Method" in df.columns: + df["Method"] = df["Method"].fillna("").astype(str) + return df + + df21 = _load_sheet("21-Structures&Improvements") + df22 = _load_sheet("22-ReactorEquipment") + df23 = _load_sheet("23-TurbineEquipment") + df24 = _load_sheet("24-ElectricalEquipment") + df25 = _load_sheet("25-MiscEquipment") + df26 = _load_sheet("26-HeatRejectionSystem") - # Add this for making the building_table plant_characteristics["New Bldg"] = df21["SSCs moved to"] - # Add dict to adjust rebar costs in special cases [should figure out a better way to do this eventually] - if any(df21["Rebar density"] != "Default"): - plant_characteristics["Rebar table"] = df21.loc[ - df21["Rebar density"] != "Default", "Rebar density" - ].to_dict() - # ---------------------------------------------Account 21 Structures & Improvements------------------------------------------------# + if "Rebar density" in df21.columns and any(df21["Rebar density"] != "Default"): + plant_characteristics["Rebar table"] = ( + df21.loc[df21["Rebar density"] != "Default", "Rebar density"].to_dict() + ) + + # ── Pre-computed option-type masks ──────────────────────────────────────── + # The "Option 1" column may contain NaN for blank rows; clean each mask. + def _opt1_mask(value: str) -> pd.Series: + return _clean_mask(scaling_table["Option 1"] == value, scaling_table.index) + + ibv = _opt1_mask("Building volume") + isba = _opt1_mask("Substructure area") + isbv = _opt1_mask("Substructure volume") + ispa = _opt1_mask("Superstructure area") + ispv = _opt1_mask("Superstructure volume") + ipow = _opt1_mask("Plant power") + ic = _opt1_mask("Constant") + + # ═════════════════════════════════════════════════════════════════════════ + # Account 21 — Structures & Improvements + # ═════════════════════════════════════════════════════════════════════════ print("Evaluating account 21: Structures & Improvements") - accounts = df21.index.unique() - ibv = scaling_table["Option 1"] == "Building volume" - isba = scaling_table["Option 1"] == "Substructure area" - isbv = scaling_table["Option 1"] == "Substructure volume" - ispa = scaling_table["Option 1"] == "Superstructure area" - ispv = scaling_table["Option 1"] == "Superstructure volume" - ipow = scaling_table["Option 1"] == "Plant power" - ic = scaling_table["Option 1"] == "Constant" - - for account in accounts: - aux = df21.loc[account] - print(" Account: " + account + ", Name: " + aux["Name"]) - - if aux["Method"] == "Detailed (EEDB based)": - idx = scaling_table.index.str.match(account) - - # Calculate material use volumes/areas - portions = aux["Portions"] - subArea, subVol, superArea, superVol, bv = eval_bldg(portions, aux) - # Check if the building is inside another building (or has one inside it), and account for the changes to material use as necessary - if aux["Inside?"] != "None": - inside_acct = "A." + aux["Inside?"].split("A.")[1] - in_or_out = aux["Inside?"].split(":")[0] - if in_or_out == "Inside": - inside_dict[inside_acct] = [ - account, - subArea, - subVol, - superArea, - superVol, - bv, - ] - elif in_or_out == "Outside": - ( - in_account, - in_subArea, - in_subVol, - in_superArea, - in_superVol, - in_bv, - ) = inside_dict[account] - subArea -= in_subArea - subVol -= in_subVol - bv -= in_bv - print( - " Superstructure volume: {:.0F}, area: {:.0F}".format( - superVol, superArea - ) + total_thermal_power = _get_pc( + plant_characteristics, "Total Plant Thermal Power (MWt)", + default=0.0, label="Account 21", + ) + + for account in df21.index.unique(): + aux = df21.loc[account] + method = str(aux.get("Method", "")) + print(f"\tAccount: {account}, Name: {aux.get('Name', '?')}") + + if method == "Detailed (EEDB based)": + # str.match returns NaN for any NaN-valued index entry. + # _clean_mask converts those NaN results to False. + idx = _clean_mask( + scaling_table.index.str.match(account), scaling_table.index ) - print(" Substructure volume: {:.0F}, area: {:.0F}".format(subVol, subArea)) - print(" Building volume: {:.0F}".format(bv)) + print(f"\tMethod: {method}") + + portions = aux.get("Portions", None) + subArea, subVol, superArea, superVol, bv = eval_bldg(portions, aux) + + # Use _is_nan() — handles None, float NaN, numpy scalar NaN, + # and 0-d numpy arrays which all appear for blank Excel cells. + inside_val = aux.get("Inside?", None) + if not _is_nan(inside_val) and str(inside_val) != "None": + inside_str = str(inside_val) + if "A." in inside_str and ":" in inside_str: + try: + inside_acct = "A." + inside_str.split("A.")[1] + in_or_out = inside_str.split(":")[0] + if in_or_out == "Inside": + inside_dict[inside_acct] = [ + account, subArea, subVol, superArea, superVol, bv, + ] + elif in_or_out == "Outside": + if account in inside_dict: + (_, in_sub_a, in_sub_v, + _isa, _isv, in_bv) = inside_dict[account] + subArea -= in_sub_a + subVol -= in_sub_v + bv -= in_bv + else: + print(f"\t [WARN] 'Outside' ref for {account!r} " + f"has no matching 'Inside' entry") + except Exception as exc: + print(f"\t [WARN] Cannot parse Inside? {inside_val!r}: {exc}") + else: + print(f"\t [WARN] Unexpected Inside? format {inside_val!r}") + + print(f"\t\tSuperstructure volume: {superVol:.0f}, area: {superArea:.0f}") + print(f"\t\tSubstructure volume: {subVol:.0f}, area: {subArea:.0f}") + print(f"\t\tBuilding volume: {bv:.0f}") plant_characteristics[account] = subArea - # Update the scaling table with the calculated values - scaling_table.loc[(idx & ibv), "New Base Unit Value"] = bv - scaling_table.loc[(idx & isba), "New Base Unit Value"] = subArea - scaling_table.loc[(idx & isbv), "New Base Unit Value"] = subVol - scaling_table.loc[(idx & ispa), "New Base Unit Value"] = superArea - scaling_table.loc[(idx & ispv), "New Base Unit Value"] = superVol - scaling_table.loc[(idx & ic), "New Base Unit Value"] = 1 - scaling_table.loc[ - (idx & ipow), "New Base Unit Value" - ] = plant_characteristics["Total Plant Thermal Power (MWt)"] - - if aux["Steel plate composite"] == "One sided": + # idx and ibv/isba/... are all clean bool Series — & is safe. + _safe_loc_set(scaling_table, idx & ibv, "New Base Unit Value", bv, account) + _safe_loc_set(scaling_table, idx & isba, "New Base Unit Value", subArea, account) + _safe_loc_set(scaling_table, idx & isbv, "New Base Unit Value", subVol, account) + _safe_loc_set(scaling_table, idx & ispa, "New Base Unit Value", superArea, account) + _safe_loc_set(scaling_table, idx & ispv, "New Base Unit Value", superVol, account) + _safe_loc_set(scaling_table, idx & ic, "New Base Unit Value", 1, account) + _safe_loc_set(scaling_table, idx & ipow, "New Base Unit Value", total_thermal_power, account) + + spc = aux.get("Steel plate composite", None) + if spc == "One sided": plant_characteristics["SPC One sided"].append(account) plant_characteristics["SPC Area"].append(superArea) - elif aux["Steel plate composite"] == "Two sided": + elif spc == "Two sided": plant_characteristics["SPC Two sided"].append(account) plant_characteristics["SPC Area"].append(superArea) - if aux["High strength rebar"] == "Grade 80": + rebar = aux.get("High strength rebar", None) + if rebar == "Grade 80": plant_characteristics["Grade 80"].append(account) - elif aux["High strength rebar"] == "Grade 100": + elif rebar == "Grade 100": plant_characteristics["Grade 100"].append(account) - if aux["Seismic Class 1"]: - plant_characteristics["sc1_BV"] += bv + if aux.get("Seismic Class 1", False): + plant_characteristics["sc1_BV"] += bv plant_characteristics["sc1_concrete"] += subVol + superVol if account in bv_accounts_225: bv_225 += bv concrete += subVol + superVol - if aux["Name"] == "Containment Liner": - # default is option 1, scaled by superstructure area for steel lined concrete, these are the exceptions - if aux["Superstructure type"] == "Stainless steel vessel": - scaling_table.loc[account, "Option"] = 0 - mass = 8000.0 * ( - superVol + subVol - ) # 8000 kg/m^3 is the density of stainless steel - print(" Mass of containment vessel: {:.0F}".format(mass)) - scaling_table.loc[account, "New Base Unit Value"] = mass - scaling_table.loc[ - account, "Multipliers" - ] = 2.3 # stainless more than carbon steel - scaling_table.loc[ - account, "Count per plant" - ] = plant_characteristics["Number of Reactors"] - plant_characteristics["Containment type"] = "Steel vessel" + if aux.get("Name", "") == "Containment Liner": + sup_type = aux.get("Superstructure type", "") + n_rx = _get_pc(plant_characteristics, "Number of Reactors", + default=1, label="Containment liner") + + if sup_type in ("Stainless steel vessel", "Carbon steel vessel"): + mass = 8000.0 * (superVol + subVol) + print(f"\t\tMass of containment vessel: {mass:.0f}") + if account in scaling_table.index: + scaling_table.loc[account, "Option"] = 0 + scaling_table.loc[account, "New Base Unit Value"] = mass + scaling_table.loc[account, "Count per plant"] = n_rx + if sup_type == "Stainless steel vessel": + scaling_table.loc[account, "Multipliers"] = 2.3 + plant_characteristics["Containment type"] = "Steel vessel" plant_characteristics["Containment vessel mass (kg)"] = mass - plant_characteristics['Containment thickness (m)'] = aux['Superstructure thickness (meters)'] - - elif aux['Superstructure type']=='Carbon steel vessel': - scaling_table.loc[account, "Option"] = 0 - mass = 8000.0 * ( - superVol + subVol - ) # 8000 kg/m^3 is the density of stainless steel - print(" Mass of containment vessel: {:.0F}".format(mass)) - scaling_table.loc[account, "New Base Unit Value"] = mass - scaling_table.loc[ - account, "Count per plant" - ] = plant_characteristics["Number of Reactors"] - plant_characteristics["Containment type"] = "Steel vessel" - plant_characteristics["Containment vessel mass (kg)"] = mass - plant_characteristics['Containment thickness (m)'] = aux['Superstructure thickness (meters)'] - - elif aux["Superstructure type"] == "Standalone steel building": - - plant_characteristics[ - "Containment type" - ] = "Standalone steel building" - # The multipliers came from the EEDB APWR6/PWR6/BE account breakdowns - scaling_table.loc[ - account, "Factory Equipment Cost Mult" - ] *= scalars_dict["212.15 Factory cost mult"] - scaling_table.loc[account, "Site Labor Hours Mult"] *= scalars_dict[ - "212.15 Labor hours mult" - ] - scaling_table.loc[account, "Site Labor Cost Mult"] *= scalars_dict[ - "212.15 Labor cost mult" - ] - scaling_table.loc[ - account, "Site Material Cost Mult" - ] *= scalars_dict["212.15 Material cost mult"] - plant_characteristics["Containment type"] = [ - "Standalone steel building" - ] - - elif aux["Method"] == "Detailed (Generic)": - print("Error, generic building not implemented yet") - break - - elif aux["Method"] == "Plant power scaling": - idx = scaling_table.index.str.match(account) - scaling_table.loc[idx, "Option"] = 2 - scaling_table.loc[idx, "New Base Unit Value"] = plant_characteristics[ - "Total Plant Thermal Power (MWt)" - ] - - elif aux["Method"] == "RX power scaling": - idx = scaling_table.index.str.match(account) - scaling_table.loc[idx, "Option"] = 2 - scaling_table.loc[idx, "New Base Unit Value"] = ( - plant_characteristics["Total Plant Thermal Power (MWt)"] - / plant_characteristics["Number of Reactors"] + plant_characteristics["Containment thickness (m)"] = aux.get( + "Superstructure thickness (meters)", np.nan + ) + + elif sup_type == "Standalone steel building": + plant_characteristics["Containment type"] = ["Standalone steel building"] + if account in scaling_table.index: + for mult_key, col in [ + ("212.15 Factory cost mult", "Factory Equipment Cost Mult"), + ("212.15 Labor hours mult", "Site Labor Hours Mult"), + ("212.15 Labor cost mult", "Site Labor Cost Mult"), + ("212.15 Material cost mult", "Site Material Cost Mult"), + ]: + if mult_key in scalars_dict: + scaling_table.loc[account, col] *= scalars_dict[mult_key] + else: + print(f"\t [WARN] scalars_dict missing key {mult_key!r}") + + elif method == "Detailed (Generic)": + print("\tDetailed (Generic) not implemented yet — skipping") + + elif method in ("Plant power scaling", "RX power scaling"): + idx = _clean_mask( + scaling_table.index.str.match(account), scaling_table.index ) + if not idx.any(): + print(f"\t [WARN] No rows match {account!r} — skipping") + continue + n_rx = _get_pc(plant_characteristics, "Number of Reactors", + default=1, label=f"{method}/{account}") or 1 + value = (total_thermal_power if method == "Plant power scaling" + else _safe_divide(total_thermal_power, n_rx, f"RX/{account}")) + _safe_loc_set(scaling_table, idx, "Option", 2, account) + _safe_loc_set(scaling_table, idx, "New Base Unit Value", value, account) + + elif method == "Fixed cost": + idx = _clean_mask( + scaling_table.index.str.match(account), scaling_table.index + ) + if not idx.any(): + print(f"\t [WARN] No rows match {account!r} — skipping") + continue + _safe_loc_set(scaling_table, idx, "Option", 4, account) + _safe_loc_set(scaling_table, idx, "New Base Unit Value", 1, account) + + elif method == "Direct cost": + idx = _clean_mask( + scaling_table.index.str.match(account), scaling_table.index + ) + if not idx.any(): + print(f"\t [WARN] No rows match {account!r} — skipping") + continue + col = "Direct cost per RX (2018 USD)" + if col in df21.columns and account in df21.index: + _safe_loc_set(scaling_table, idx, "Option", 3, account) + _safe_loc_set(scaling_table, idx, "New Base Unit Value", df21.loc[account, col], account) + else: + print(f"\t [WARN] Column {col!r} or account {account!r} missing in df21") + + elif method == "": + pass # blank row - elif aux["Method"] == "Fixed cost": - idx = scaling_table.index.str.match(account) - scaling_table.loc[idx, "Option"] = 4 - scaling_table.loc[idx, "New Base Unit Value"] = 1 - - elif aux["Method"] == "Direct cost": - idx = scaling_table.index.str.match(account) - scaling_table.loc[idx, "Option"] = 3 - scaling_table.loc[idx, "New Base Unit Value"] = df21.loc[ - account, "Direct cost per RX (2018 USD)" - ] - - # ------------------------------------------------------Account 22-26 ------------------------------------------------------# - print("Evaluating account 22 - 26") - df_big = df22.append(df23) - df_big = df_big.append(df24) - df_big = df_big.append(df25) - df_big = df_big.append(df26) - - idx_PPS = df_big.index[df_big["Method"] == "Plant power scaling"] - scaling_table.loc[idx_PPS, "Option"] = 2 - scaling_table.loc[idx_PPS, "New Base Unit Value"] = plant_characteristics[ - "Total Plant Thermal Power (MWt)" - ] - - idx_EPS = df_big.index[df_big["Method"] == "Plant electric power scaling"] - scaling_table.loc[idx_EPS, "Option"] = 2 - scaling_table.loc[idx_EPS, "New Base Unit Value"] = plant_characteristics[ - "Net Electrical Power (MWe)" - ] - - idx_TEPS = df_big.index[df_big["Method"] == "Turbine electric power scaling"] - scaling_table.loc[idx_TEPS, "Option"] = 2 - scaling_table.loc[idx_TEPS, "New Base Unit Value"] = ( - plant_characteristics["Net Electrical Power (MWe)"] - / plant_characteristics['Number of turbines'] - ) - scaling_table.loc[idx_TEPS, "Count per plant"] = plant_characteristics[ - "Number of turbines" + else: + print(f"\t [WARN] Unknown method {method!r} for {account!r} — skipping") + print("DEBUG-2", scaling_table) + # ═════════════════════════════════════════════════════════════════════════ + # Accounts 22–26 + # ═════════════════════════════════════════════════════════════════════════ + print("\nEvaluating accounts 22-26") + + # pd.concat replaces the deprecated DataFrame.append() + df_big = pd.concat([df22, df23, df24, df25, df26]) + if "Method" in df_big.columns: + df_big["Method"] = df_big["Method"].fillna("").astype(str) + + total_thermal = _get_pc(plant_characteristics, + "Total Plant Thermal Power (MWt)", default=0.0) + net_electric = _get_pc(plant_characteristics, + "Net Electrical Power (MWe)", default=0.0) + n_turbines = _get_pc(plant_characteristics, + "Number of turbines", default=1) or 1 + n_rx = _get_pc(plant_characteristics, + "Number of Reactors", default=1) or 1 + + def _apply_method(method_name: str, option: int, value, count=None): + """ + Apply option + value to all df_big rows matching method_name. + NaN-indexed rows were already removed, so isin() is safe. + """ + idx = df_big.index[df_big["Method"] == method_name] + if idx.empty: + return + valid = idx[idx.isin(scaling_table.index)] + missing = idx.difference(valid) + if not missing.empty: + print(f" [WARN] {method_name}: not in scaling_table: {missing.tolist()}") + if valid.empty: + return + scaling_table.loc[valid, "Option"] = option + scaling_table.loc[valid, "New Base Unit Value"] = value + if count is not None: + scaling_table.loc[valid, "Count per plant"] = count + + _apply_method("Plant power scaling", 2, total_thermal) + _apply_method("Plant electric power scaling", 2, net_electric) + _apply_method("Turbine electric power scaling", 2, + _safe_divide(net_electric, n_turbines, "Turbine EPS"), + count=n_turbines) + _apply_method("RX power scaling", 2, + _safe_divide(total_thermal, n_rx, "RX power"), + count=n_rx) + _apply_method("Fixed cost", 4, 1) + + def _apply_detailed(method_name: str, option: int, + value_col, count_col, fixed_value=None): + idx = df_big.index[df_big["Method"] == method_name] + valid = idx[idx.isin(scaling_table.index)] + if valid.empty: + return + scaling_table.loc[valid, "Option"] = option + if fixed_value is not None: + scaling_table.loc[valid, "New Base Unit Value"] = fixed_value + elif value_col and value_col in df_big.columns: + scaling_table.loc[valid, "New Base Unit Value"] = df_big.loc[valid, value_col] + if count_col and count_col in df_big.columns: + scaling_table.loc[valid, "Count per plant"] = df_big.loc[valid, count_col] + + _apply_detailed("Detailed", 1, "Value", "Count per plant (DI)") + _apply_detailed("Detailed volume", 1, None, None, fixed_value=bv_225) + _apply_detailed("Detailed pool", 0, "Value", "Count per plant (DI)") + _apply_detailed("Detailed (CE)", 0, "Value", "Count per plant (DI)") + _apply_detailed("Direct cost input", 3, "Direct cost per RX (2018 USD)", "Count per plant (DCI)") + + # ── Multipliers & material uncertainty ─────────────────────────────────── + scaling_table = cost_multipliers(scaling_table, scalars_dict, plant_characteristics) + print("DEBUG-3", scaling_table) + scaling_table = material_use_uncertainty(scaling_table, scalars_dict) + print("DEBUG-4", scaling_table) + # ═════════════════════════════════════════════════════════════════════════ + # Compute Scaling Factors + # ════════════════════════════════════ + scaling_table["Scaling Factor"] = 0.0 + print("DEBUG-5", scaling_table) + acc1 = _clean_mask(scaling_table["Option"] == 1, scaling_table.index) + acc2 = _clean_mask(scaling_table["Option"] == 2, scaling_table.index) + acc3 = _clean_mask(scaling_table["Option"] == 3, scaling_table.index) + acc4 = _clean_mask(scaling_table["Option"] == 4, scaling_table.index) + acc0 = scaling_table.index[ + _clean_mask(scaling_table["Option"] == 0, scaling_table.index) ] - idx_RPS = df_big.index[df_big["Method"] == "RX power scaling"] - scaling_table.loc[idx_RPS, "Option"] = 2 - scaling_table.loc[idx_RPS, "New Base Unit Value"] = ( - plant_characteristics["Total Plant Thermal Power (MWt)"] - / plant_characteristics["Number of Reactors"] - ) - scaling_table.loc[idx_RPS, "Count per plant"] = plant_characteristics[ - "Number of Reactors" - ] + def _power_scale(new_col: str, base_col: str, exp_col: str) -> pd.Series: + """(new/base)^exp — base 0 becomes NaN to avoid ZeroDivisionError.""" + new_v = scaling_table[new_col] + base = scaling_table[base_col].replace(0, np.nan) + exp = scaling_table[exp_col] + return (new_v / base) ** exp + + if acc1.any(): + sf = _power_scale("New Base Unit Value", "EEDB Base Unit Value 1", "Option 1 Exponent") + scaling_table.loc[acc1, "Scaling Factor"] = sf.loc[acc1] + + if acc2.any(): + sf = _power_scale("New Base Unit Value", "EEDB Base Unit Value 2", "Option 2 Exponent") + scaling_table.loc[acc2, "Scaling Factor"] = sf.loc[acc2] + + if acc3.any(): + base3 = scaling_table.loc[acc3, "EEDB Base Unit Value 3"].replace(0, np.nan) + scaling_table.loc[acc3, "Scaling Factor"] = ( + scaling_table.loc[acc3, "New Base Unit Value"] / base3 + ) - idx_FC = df_big.index[df_big["Method"] == "Fixed cost"] - scaling_table.loc[idx_FC, "Option"] = 4 - scaling_table.loc[idx_FC, "New Base Unit Value"] = 1 + if acc4.any(): + scaling_table.loc[acc4, "Scaling Factor"] = 1.0 - idx_D = df_big.index[df_big["Method"] == "Detailed"] - scaling_table.loc[idx_D, "Option"] = 1 - scaling_table.loc[idx_D, "New Base Unit Value"] = df_big.loc[idx_D, "Value"] - scaling_table.loc[idx_D, "Count per plant"] = df_big.loc[ - idx_D, "Count per plant (DI)" - ] + for account in acc0: + if account not in scaling_table.index: + print(f" [WARN] Option-0 account {account!r} not in index — skipping") + continue - idx_Dv = df_big.index[df_big["Method"] == "Detailed volume"] - scaling_table.loc[idx_Dv, "Option"] = 1 - scaling_table.loc[idx_Dv, "New Base Unit Value"] = bv_225 + raw = scaling_table.loc[account, "Option 0 Formula"] + try: + varz = (raw if isinstance(raw, list) + else [x.strip() + for x in str(raw).replace("[","").replace("]","").split(",")]) + varz = [float(x) for x in varz] + except (ValueError, AttributeError) as exc: + print(f" [WARN] Cannot parse Option 0 Formula for {account!r}: " + f"{raw!r} — {exc}; Scaling Factor=0") + continue - idx_Dp = df_big.index[df_big["Method"] == "Detailed pool"] - scaling_table.loc[idx_Dp, "Option"] = 0 - scaling_table.loc[idx_Dp, "New Base Unit Value"] = df_big.loc[idx_Dp, "Value"] - scaling_table.loc[idx_Dp, "Count per plant"] = df_big.loc[ - idx_Dp, "Count per plant (DI)" - ] + new_val = scaling_table.loc[account, "New Base Unit Value"] + base3 = scaling_table.loc[account, "EEDB Base Unit Value 3"] - idx_Dce = df_big.index[df_big["Method"] == "Detailed (CE)"] - scaling_table.loc[idx_Dce, "Option"] = 0 - scaling_table.loc[idx_Dce, "New Base Unit Value"] = df_big.loc[idx_Dce, "Value"] - scaling_table.loc[idx_Dce, "Count per plant"] = df_big.loc[ - idx_Dce, "Count per plant (DI)" - ] + if _is_nan(base3) or base3 == 0: + print(f" [WARN] EEDB Base Unit Value 3 is 0/NaN for {account!r} — skipping") + continue - idx_DCI = df_big.index[df_big["Method"] == "Direct cost input"] - scaling_table.loc[idx_DCI, "Option"] = 3 - scaling_table.loc[idx_DCI, "New Base Unit Value"] = df_big.loc[ - idx_DCI, "Direct cost per RX (2018 USD)" - ] - scaling_table.loc[idx_DCI, "Count per plant"] = df_big.loc[ - idx_DCI, "Count per plant (DCI)" - ] - - idx = df_big.index - scaling_table = cost_multipliers(scaling_table, scalars_dict, plant_characteristics) - scaling_table = material_use_uncertainty(scaling_table, scalars_dict) - - # --------------------------------------------- Scaling/evaluating new costs -----------------------------------# - scaling_table["Scaling Factor"] = 0 - accounts_0 = scaling_table.index[scaling_table["Option"] == 0] - accounts_1 = scaling_table["Option"] == 1 - accounts_2 = scaling_table["Option"] == 2 - accounts_3 = scaling_table["Option"] == 3 - accounts_4 = scaling_table["Option"] == 4 - - scaling_table.loc[accounts_1, "Scaling Factor"] = ( - scaling_table.loc[accounts_1, "New Base Unit Value"] - / scaling_table.loc[accounts_1, "EEDB Base Unit Value 1"] - ) ** scaling_table.loc[accounts_1, "Option 1 Exponent"] - scaling_table.loc[accounts_2, "Scaling Factor"] = ( - scaling_table.loc[accounts_2, "New Base Unit Value"] - / scaling_table.loc[accounts_2, "EEDB Base Unit Value 2"] - ) ** scaling_table.loc[accounts_2, "Option 2 Exponent"] - scaling_table.loc[accounts_3, "Scaling Factor"] = ( - scaling_table.loc[accounts_3, "New Base Unit Value"] - / scaling_table.loc[accounts_3, "EEDB Base Unit Value 3"] - ) - scaling_table.loc[accounts_4, "Scaling Factor"] = 1.0 - - for account in accounts_0: - if not isinstance(scaling_table.loc[account, "Option 0 Formula"], list): - varz = ( - scaling_table.loc[account, "Option 0 Formula"] - .replace("[", "") - .replace("]", "") - .split(",") - ) - else: - varz = scaling_table.loc[account, "Option 0 Formula"] - varz = [float(x) for x in varz] if len(varz) == 4: scaling_table.loc[account, "Scaling Factor"] = ( - ( - varz[0] - + varz[1] - * scaling_table.loc[account, "New Base Unit Value"] ** varz[2] - ) - * varz[3] - / scaling_table.loc[account, "EEDB Base Unit Value 3"] + (varz[0] + varz[1] * new_val ** varz[2]) * varz[3] / base3 ) elif len(varz) == 1: - scaling_table.loc[account, "Scaling Factor"] = ( - varz[0] - * scaling_table.loc[account, "New Base Unit Value"] - / scaling_table.loc[account, "EEDB Base Unit Value 3"] - ) + scaling_table.loc[account, "Scaling Factor"] = varz[0] * new_val / base3 + else: + print(f" [WARN] Option 0 Formula unexpected length {len(varz)} " + f"for {account!r} — skipping") - # --------------------------------------------- Accounting for # per plant -------------------------------------# + # ── Count-per-plant multiplier ──────────────────────────────────────────── scaling_table["Scaling Factor"] *= scaling_table["Count per plant"] - # scaling_table.to_csv('./out/out_'+ fname[10:-5] +'_scaling_table.csv') - - # --------------------------------------------- Add interior concrete to SC1 concrete ------------------------------------# - plant_characteristics["sc1_concrete"] += ( - scaling_table.loc["A.212.140", "Scaling Factor"] * 8000 / 1.1 ** 3 - ) # 8000 CY of interior concrete in EEDB - concrete += ( - scaling_table.loc["A.212.140", "Scaling Factor"] * 8000 / 1.1 ** 3 - ) # 8000 CY of interior concrete in EEDB - print("Concrete total: {:.0F}".format(concrete)) - print("SC1 Concrete: {:.0F}".format(plant_characteristics["sc1_concrete"])) - # print(scaling_table.loc['A.212.140', 'Scaling Factor']*8000/1.1**3) - - return scaling_table, plant_characteristics + + # ── Interior concrete correction (A.212.140) ────────────────────────────── + if "A.212.140" in scaling_table.index: + correction = scaling_table.loc["A.212.140", "Scaling Factor"] * 8000 / (1.1 ** 3) + plant_characteristics["sc1_concrete"] += correction + concrete += correction + else: + print(" [WARN] A.212.140 not in scaling_table — concrete correction skipped") + + print(f"Concrete total: {concrete:.0f}") + print(f"SC1 Concrete: {plant_characteristics['sc1_concrete']:.0f}") + + return scaling_table, plant_characteristics \ No newline at end of file From 8da2d32c6644955a04a13fe0a3e222d008f74c6a Mon Sep 17 00:00:00 2001 From: Yuri Drobyshev Date: Fri, 10 Apr 2026 12:36:53 +0400 Subject: [PATCH 2/3] comment fill_nan --- ncet/fill_scaling_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ncet/fill_scaling_table.py b/ncet/fill_scaling_table.py index ff77cbb..cb6f733 100644 --- a/ncet/fill_scaling_table.py +++ b/ncet/fill_scaling_table.py @@ -166,7 +166,7 @@ def fill_scaling_table(path, fname, base, scalars_dict, scaling_table=None): scaling_table.set_index("Account", inplace=True) # Drop NaN-indexed rows immediately — root cause of the ValueError. - scaling_table = _fill_nan_index_rows(scaling_table, "scaling_table") + #scaling_table = _fill_nan_index_rows(scaling_table, "scaling_table") scaling_table["Option"] = 1 scaling_table["New Base Unit Value"] = 0.0 From 43984df815a60785d189e4d8cb35f3f230a22430 Mon Sep 17 00:00:00 2001 From: Yuri Drobyshev Date: Tue, 12 May 2026 14:46:02 +0300 Subject: [PATCH 3/3] adapted for calculation without turbine --- cost_sensitivity.py | 4 ++-- ncet/get_indirect_costs.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cost_sensitivity.py b/cost_sensitivity.py index 9f84061..5afb8f0 100644 --- a/cost_sensitivity.py +++ b/cost_sensitivity.py @@ -362,7 +362,7 @@ def run_ncet( plant_fname = "inputfile_" + plant + ".xlsx" param_fname = "input_scaling_exponents.xlsx" - orders = 10 + orders = 1 mc_runs = 1 # choose 1 to run the reference values # cProfile.run('run_ncet(plant, path, orders, plant_fname, param_fname, BASIS_FNAME, mc_runs=mc_runs, make_building_table=True, save_all=True)') @@ -437,4 +437,4 @@ def run_ncet( # dfNP.to_csv('./out/' + plant + '/new' + plant + '_' + case + '_' + str(i) + '.csv') - # scheduler_table.to_csv('./out/' + plant + '/' + plant + '_scheduler_table.csv') + # scheduler_table.to_csv('./out/' + plant + '/' + plant + '_scheduler_table.csv') \ No newline at end of file diff --git a/ncet/get_indirect_costs.py b/ncet/get_indirect_costs.py index 96fe032..738f4a4 100644 --- a/ncet/get_indirect_costs.py +++ b/ncet/get_indirect_costs.py @@ -158,7 +158,7 @@ def get_indirect_costs(dfNewPlant, plant_characteristics, learning_rate, scalars np_indirect_cost["Account Description"] = "Indirect Costs" np_indirect_cost["Subcategories"] = 1 - dfNewPlant = dfNewPlant.append(pd.Series(np_indirect_cost, name="A.9")) + dfNewPlant = dfNewPlant._append(pd.Series(np_indirect_cost, name="A.9")) # #----------------- Divide costs among correct overrun accounts ---------------- # for col in ['Site Labor Hours', 'Site Labor Cost', 'Site Material Cost', 'Factory Equipment Cost']: