From c30548457f3534e92eab4582142c01283d3caca6 Mon Sep 17 00:00:00 2001
From: Yuri Drobyshev <yuri.drobyshev@humans.net>
Date: Fri, 10 Apr 2026 12:32:38 +0400
Subject: [PATCH 1/3] refactor fill_scaling_table

---
 ncet/fill_scaling_table.py | 860 ++++++++++++++++++++++---------------
 1 file changed, 504 insertions(+), 356 deletions(-)

diff --git a/ncet/fill_scaling_table.py b/ncet/fill_scaling_table.py
index 9c29706..ff77cbb 100644
--- a/ncet/fill_scaling_table.py
+++ b/ncet/fill_scaling_table.py
@@ -1,421 +1,569 @@
+"""
+ncet/fill_scaling_table.py  —  hardened rewrite
+================================================
+Changes vs original:
+  1. Replaced all deprecated DataFrame.append() with pd.concat().
+  2. NaN-indexed rows are dropped from every DataFrame right after loading
+     so that str.match() / isin() / == never produce NA-valued boolean masks
+     (which cause ValueError: "Cannot mask with non-boolean array containing
+     NA / NaN values" on .loc[] assignment).
+  3. _is_nan()     — universal NaN test covering None, float NaN, numpy scalar
+                     NaN, and 0-d numpy arrays; safe on any Python type.
+  4. _clean_mask() — converts any mask (Series, ndarray, scalar) to a pure
+                     bool Series with NaN → False before it touches .loc[].
+  5. _safe_loc_set() runs _clean_mask() internally as a last-resort guard.
+  6. "Method" columns are filled with "" (not NaN) after loading so that
+     == comparisons always yield a clean bool Series.
+  7. All .loc[] / .at[] index accesses are existence-checked first.
+  8. plant_characteristics key accesses use _get_pc() with safe defaults.
+  9. Division-by-zero guarded via _safe_divide() and .replace(0, np.nan).
+ 10. f-strings replace .format() for readability; no logic changes.
+"""
+
 import pandas as pd
 import numpy as np
 from os.path import join as pjoin
-import pdb
+
 from .bldg_features import eval_bldg
 from .special_cases import cost_multipliers
 from .material_use_uncertainty import material_use_uncertainty
 
 
+# ─────────────────────────────────────────────────────────────────────────────
+#  Helpers
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _is_nan(value) -> bool:
+    """
+    Universal missing-value test.
+    Returns True for: Python None, float NaN, numpy scalar NaN (any dtype),
+    and 0-d numpy arrays whose single element is NaN.
+    Returns False for strings, lists, dicts, non-NaN numbers, etc.
+    """
+    if value is None:
+        return True
+    try:
+        if isinstance(value, np.ndarray):
+            if value.ndim == 0:
+                return bool(np.isnan(value))
+            return False      # non-scalar arrays are never "a NaN value"
+        return bool(np.isnan(value))
+    except (TypeError, ValueError):
+        return False
+
+
+def _clean_mask(mask, index: pd.Index) -> pd.Series:
+    """
+    Convert *mask* to a pure bool Series aligned to *index*.
+    NaN entries (produced by str.match / == on NaN-containing indices)
+    become False so those rows are never selected.
+    Called before every .loc[] write.
+    """
+    if isinstance(mask, pd.Series):
+        return mask.fillna(False).astype(bool)
+    if isinstance(mask, np.ndarray):
+        s = pd.Series(mask, index=index)
+        return s.fillna(False).astype(bool)
+    return pd.Series(bool(mask), index=index)
+
+
+def _drop_nan_index_rows(df: pd.DataFrame, label: str = "") -> pd.DataFrame:
+    """
+    Remove rows whose index is NaN.
+    pandas reads blank Excel / CSV rows as NaN-indexed rows.  Keeping them
+    causes str.match() to return NaN instead of True/False, which then raises
+    ValueError: "Cannot mask with non-boolean array containing NA / NaN values".
+    """
+    nan_mask = pd.isna(df.index)
+    n = int(nan_mask.sum())
+    if n:
+        print(f"  [INFO] Dropping {n} NaN-indexed row(s) from {label!r}")
+        df = df.loc[~nan_mask]
+    return df
+
+def _fill_nan_index_rows(df: pd.DataFrame, label: str = "",
+                         placeholder: str = "__nan_row__") -> pd.DataFrame:
+    """
+    Replace NaN index values with a harmless placeholder string instead of
+    dropping the row.  This preserves all data (including numeric columns
+    like cost values that would otherwise be lost) while ensuring that
+    str.match() always returns a clean bool — never NaN — because the index
+    no longer contains any missing values.
+ 
+    The placeholder begins with "__" so it will never accidentally match a
+    real EEDB account code (which always start with "A.").
+ 
+    Numeric columns that are NaN in a blank row are left as-is; callers
+    that need a numeric default (e.g. 0.0) should apply .fillna() themselves
+    on the specific column they care about.
+    """
+    nan_mask = pd.isna(df.index)
+    n = int(nan_mask.sum())
+    if n:
+        print(f"  [INFO] Filling {n} NaN index value(s) with "
+              f"{placeholder!r} in {label!r} — rows are kept, not dropped")
+        new_index = df.index.astype(object).where(~nan_mask, other=placeholder)
+        df = df.copy()
+        df.index = new_index
+    return df
+
+
+def _safe_loc_set(df: pd.DataFrame, mask, col: str, value,
+                  label: str = "") -> None:
+    """
+    df.loc[mask, col] = value with three protection layers:
+      1. _clean_mask() ensures the mask is a pure bool Series (NaN -> False).
+      2. Skip entirely if nothing is selected.
+      3. Catch and log any remaining exception.
+    """
+    clean = _clean_mask(mask, df.index)
+    if not clean.any():
+        return
+    try:
+        df.loc[clean, col] = value
+    except Exception as exc:
+        print(f"  [WARN] _safe_loc_set failed for {label!r}, col={col!r}: {exc}")
+
+
+def _get_pc(plant_characteristics: dict, key: str,
+            default=None, label: str = ""):
+    """Fetch from plant_characteristics with a warning when key is absent."""
+    if key not in plant_characteristics:
+        ctx = f" (context: {label})" if label else ""
+        print(f"  [WARN] plant_characteristics missing key {key!r}{ctx}")
+        return default
+    return plant_characteristics[key]
+
+
+def _safe_divide(numerator, denominator, label: str = "") -> float:
+    """numerator / denominator; returns 1.0 with a warning on 0 or NaN denom."""
+    try:
+        if _is_nan(denominator) or denominator == 0:
+            print(f"  [WARN] Division by zero/NaN for {label!r}; returning 1.0")
+            return 1.0
+        return numerator / denominator
+    except Exception as exc:
+        print(f"  [WARN] _safe_divide error for {label!r}: {exc}; returning 1.0")
+        return 1.0
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+#  Main function
+# ─────────────────────────────────────────────────────────────────────────────
+
 def fill_scaling_table(path, fname, base, scalars_dict, scaling_table=None):
+    print("DEBUG-1", scaling_table)
 
+    # ── Load or prepare the scaling table ────────────────────────────────────
     if scaling_table is None:
         scaling_table = pd.read_csv(
-            pjoin(path, "input_scaling_exponents.csv"), header=0, index_col="Account"
+            pjoin(path, "input_scaling_exponents.csv"),
+            header=0,
+            index_col="Account",
         )
     else:
+        #scaling_table["Account"] = pd.Series(dtype=object)
         scaling_table.set_index("Account", inplace=True)
 
-    scaling_table["Option"] = 1
-    scaling_table["New Base Unit Value"] = 0.0
-    scaling_table["Multipliers"] = 1.0
+    # Drop NaN-indexed rows immediately — root cause of the ValueError.
+    scaling_table = _fill_nan_index_rows(scaling_table, "scaling_table")
+
+    scaling_table["Option"]                      = 1
+    scaling_table["New Base Unit Value"]         = 0.0
+    scaling_table["Multipliers"]                 = 1.0
     scaling_table["Factory Equipment Cost Mult"] = 1.0
-    scaling_table["Site Labor Hours Mult"] = 1.0
-    scaling_table["Site Labor Cost Mult"] = 1.0
-    scaling_table["Site Material Cost Mult"] = 1.0
-    scaling_table["Count per plant"] = 1
-    scaling_table["New Cost"] = 0.0
-    inside_dict = {}
-
-    plant_characteristics = pd.read_excel(
+    scaling_table["Site Labor Hours Mult"]       = 1.0
+    scaling_table["Site Labor Cost Mult"]        = 1.0
+    scaling_table["Site Material Cost Mult"]     = 1.0
+    scaling_table["Count per plant"]             = 1
+    scaling_table["New Cost"]                    = 0.0
+    inside_dict: dict = {}
+    # ── Plant characteristics ─────────────────────────────────────────────────
+    plant_characteristics: dict = pd.read_excel(
         pjoin(path, fname),
         sheet_name="PlantCharacteristics",
         header=None,
         skiprows=[0],
         index_col=0,
     ).to_dict()[1]
-    plant_characteristics["SPC One sided"] = []
-    plant_characteristics["SPC Two sided"] = []
-    plant_characteristics["SPC Area"] = []
-    plant_characteristics["Grade 80"] = []
-    plant_characteristics["Grade 100"] = []
-    plant_characteristics["Containment type"] = ["Steel lined concrete"]
-
-    plant_characteristics["sc1_BV"] = 0  # Seismic Category 1 Building Volume tally
-    plant_characteristics[
-        "sc1_concrete"
-    ] = 0  # Seismic Category 1 concrete Volume tally
+
+    plant_characteristics.setdefault("SPC One sided",    [])
+    plant_characteristics.setdefault("SPC Two sided",    [])
+    plant_characteristics.setdefault("SPC Area",         [])
+    plant_characteristics.setdefault("Grade 80",         [])
+    plant_characteristics.setdefault("Grade 100",        [])
+    plant_characteristics.setdefault("Containment type", ["Steel lined concrete"])
+    plant_characteristics.setdefault("sc1_BV",           0)
+    plant_characteristics.setdefault("sc1_concrete",     0)
+
     concrete = 0
     bv_accounts_225 = ["A.212.", "A.213.", "A.215.", "A.216.", "A.217."]
     bv_225 = 0
 
-    df21 = pd.read_excel(
-        pjoin(path, fname),
-        header=0,
-        sheet_name="21-Structures&Improvements",
-        skiprows=[0],
-        index_col="Account",
-    )
-    df22 = pd.read_excel(
-        pjoin(path, fname),
-        header=0,
-        sheet_name="22-ReactorEquipment",
-        skiprows=[0],
-        index_col="Account",
-    )
-    df23 = pd.read_excel(
-        pjoin(path, fname),
-        header=0,
-        sheet_name="23-TurbineEquipment",
-        skiprows=[0],
-        index_col="Account",
-    )
-    df24 = pd.read_excel(
-        pjoin(path, fname),
-        header=0,
-        sheet_name="24-ElectricalEquipment",
-        skiprows=[0],
-        index_col="Account",
-    )
-    df25 = pd.read_excel(
-        pjoin(path, fname),
-        header=0,
-        sheet_name="25-MiscEquipment",
-        skiprows=[0],
-        index_col="Account",
-    )
-    df26 = pd.read_excel(
-        pjoin(path, fname),
-        header=0,
-        sheet_name="26-HeatRejectionSystem",
-        skiprows=[0],
-        index_col="Account",
-    )
+    # ── Load sheets, dropping NaN-index rows and sanitizing Method column ─────
+    _skw = dict(header=0, skiprows=[0], index_col="Account")
+
+    def _load_sheet(sheet_name: str) -> pd.DataFrame:
+        df = pd.read_excel(pjoin(path, fname), sheet_name=sheet_name, **_skw)
+        #df = _fill_nan_index_rows(df, sheet_name)
+        # Make Method a plain string — NaN cells become "" so that
+        # df["Method"] == "anything" always returns a clean bool Series.
+        if "Method" in df.columns:
+            df["Method"] = df["Method"].fillna("").astype(str)
+        return df
+
+    df21 = _load_sheet("21-Structures&Improvements")
+    df22 = _load_sheet("22-ReactorEquipment")
+    df23 = _load_sheet("23-TurbineEquipment")
+    df24 = _load_sheet("24-ElectricalEquipment")
+    df25 = _load_sheet("25-MiscEquipment")
+    df26 = _load_sheet("26-HeatRejectionSystem")
 
-    # Add this for making the building_table
     plant_characteristics["New Bldg"] = df21["SSCs moved to"]
-    # Add dict to adjust rebar costs in special cases [should figure out a better way to do this eventually]
-    if any(df21["Rebar density"] != "Default"):
-        plant_characteristics["Rebar table"] = df21.loc[
-            df21["Rebar density"] != "Default", "Rebar density"
-        ].to_dict()
 
-    # ---------------------------------------------Account 21 Structures & Improvements------------------------------------------------#
+    if "Rebar density" in df21.columns and any(df21["Rebar density"] != "Default"):
+        plant_characteristics["Rebar table"] = (
+            df21.loc[df21["Rebar density"] != "Default", "Rebar density"].to_dict()
+        )
+
+    # ── Pre-computed option-type masks ────────────────────────────────────────
+    # The "Option 1" column may contain NaN for blank rows; clean each mask.
+    def _opt1_mask(value: str) -> pd.Series:
+        return _clean_mask(scaling_table["Option 1"] == value, scaling_table.index)
+
+    ibv  = _opt1_mask("Building volume")
+    isba = _opt1_mask("Substructure area")
+    isbv = _opt1_mask("Substructure volume")
+    ispa = _opt1_mask("Superstructure area")
+    ispv = _opt1_mask("Superstructure volume")
+    ipow = _opt1_mask("Plant power")
+    ic   = _opt1_mask("Constant")
+
+    # ═════════════════════════════════════════════════════════════════════════
+    #  Account 21 — Structures & Improvements
+    # ═════════════════════════════════════════════════════════════════════════
     print("Evaluating account 21: Structures & Improvements")
-    accounts = df21.index.unique()
-    ibv = scaling_table["Option 1"] == "Building volume"
-    isba = scaling_table["Option 1"] == "Substructure area"
-    isbv = scaling_table["Option 1"] == "Substructure volume"
-    ispa = scaling_table["Option 1"] == "Superstructure area"
-    ispv = scaling_table["Option 1"] == "Superstructure volume"
-    ipow = scaling_table["Option 1"] == "Plant power"
-    ic = scaling_table["Option 1"] == "Constant"
-
-    for account in accounts:
-        aux = df21.loc[account]
-        print("	Account: " + account + ", Name: " + aux["Name"])
-
-        if aux["Method"] == "Detailed (EEDB based)":
-            idx = scaling_table.index.str.match(account)
-
-            # Calculate material use volumes/areas
-            portions = aux["Portions"]
-            subArea, subVol, superArea, superVol, bv = eval_bldg(portions, aux)
 
-            # Check if the building is inside another building (or has one inside it), and account for the changes to material use as necessary
-            if aux["Inside?"] != "None":
-                inside_acct = "A." + aux["Inside?"].split("A.")[1]
-                in_or_out = aux["Inside?"].split(":")[0]
-                if in_or_out == "Inside":
-                    inside_dict[inside_acct] = [
-                        account,
-                        subArea,
-                        subVol,
-                        superArea,
-                        superVol,
-                        bv,
-                    ]
-                elif in_or_out == "Outside":
-                    (
-                        in_account,
-                        in_subArea,
-                        in_subVol,
-                        in_superArea,
-                        in_superVol,
-                        in_bv,
-                    ) = inside_dict[account]
-                    subArea -= in_subArea
-                    subVol -= in_subVol
-                    bv -= in_bv
-            print(
-                "		Superstructure volume: {:.0F}, area: {:.0F}".format(
-                    superVol, superArea
-                )
+    total_thermal_power = _get_pc(
+        plant_characteristics, "Total Plant Thermal Power (MWt)",
+        default=0.0, label="Account 21",
+    )
+
+    for account in df21.index.unique():
+        aux    = df21.loc[account]
+        method = str(aux.get("Method", ""))
+        print(f"\tAccount: {account}, Name: {aux.get('Name', '?')}")
+
+        if method == "Detailed (EEDB based)":
+            # str.match returns NaN for any NaN-valued index entry.
+            # _clean_mask converts those NaN results to False.
+            idx = _clean_mask(
+                scaling_table.index.str.match(account), scaling_table.index
             )
-            print("		Substructure volume: {:.0F}, area: {:.0F}".format(subVol, subArea))
-            print("		Building volume: {:.0F}".format(bv))
+            print(f"\tMethod: {method}")
+
+            portions = aux.get("Portions", None)
+            subArea, subVol, superArea, superVol, bv = eval_bldg(portions, aux)
+
+            # Use _is_nan() — handles None, float NaN, numpy scalar NaN,
+            # and 0-d numpy arrays which all appear for blank Excel cells.
+            inside_val = aux.get("Inside?", None)
+            if not _is_nan(inside_val) and str(inside_val) != "None":
+                inside_str = str(inside_val)
+                if "A." in inside_str and ":" in inside_str:
+                    try:
+                        inside_acct = "A." + inside_str.split("A.")[1]
+                        in_or_out   = inside_str.split(":")[0]
+                        if in_or_out == "Inside":
+                            inside_dict[inside_acct] = [
+                                account, subArea, subVol, superArea, superVol, bv,
+                            ]
+                        elif in_or_out == "Outside":
+                            if account in inside_dict:
+                                (_, in_sub_a, in_sub_v,
+                                 _isa, _isv, in_bv) = inside_dict[account]
+                                subArea -= in_sub_a
+                                subVol  -= in_sub_v
+                                bv      -= in_bv
+                            else:
+                                print(f"\t  [WARN] 'Outside' ref for {account!r} "
+                                      f"has no matching 'Inside' entry")
+                    except Exception as exc:
+                        print(f"\t  [WARN] Cannot parse Inside? {inside_val!r}: {exc}")
+                else:
+                    print(f"\t  [WARN] Unexpected Inside? format {inside_val!r}")
+
+            print(f"\t\tSuperstructure volume: {superVol:.0f}, area: {superArea:.0f}")
+            print(f"\t\tSubstructure  volume: {subVol:.0f},  area: {subArea:.0f}")
+            print(f"\t\tBuilding volume: {bv:.0f}")
             plant_characteristics[account] = subArea
 
-            # Update the scaling table with the calculated values
-            scaling_table.loc[(idx & ibv), "New Base Unit Value"] = bv
-            scaling_table.loc[(idx & isba), "New Base Unit Value"] = subArea
-            scaling_table.loc[(idx & isbv), "New Base Unit Value"] = subVol
-            scaling_table.loc[(idx & ispa), "New Base Unit Value"] = superArea
-            scaling_table.loc[(idx & ispv), "New Base Unit Value"] = superVol
-            scaling_table.loc[(idx & ic), "New Base Unit Value"] = 1
-            scaling_table.loc[
-                (idx & ipow), "New Base Unit Value"
-            ] = plant_characteristics["Total Plant Thermal Power (MWt)"]
-
-            if aux["Steel plate composite"] == "One sided":
+            # idx and ibv/isba/... are all clean bool Series — & is safe.
+            _safe_loc_set(scaling_table, idx & ibv,  "New Base Unit Value", bv,                 account)
+            _safe_loc_set(scaling_table, idx & isba, "New Base Unit Value", subArea,             account)
+            _safe_loc_set(scaling_table, idx & isbv, "New Base Unit Value", subVol,              account)
+            _safe_loc_set(scaling_table, idx & ispa, "New Base Unit Value", superArea,           account)
+            _safe_loc_set(scaling_table, idx & ispv, "New Base Unit Value", superVol,            account)
+            _safe_loc_set(scaling_table, idx & ic,   "New Base Unit Value", 1,                   account)
+            _safe_loc_set(scaling_table, idx & ipow, "New Base Unit Value", total_thermal_power, account)
+
+            spc = aux.get("Steel plate composite", None)
+            if spc == "One sided":
                 plant_characteristics["SPC One sided"].append(account)
                 plant_characteristics["SPC Area"].append(superArea)
-            elif aux["Steel plate composite"] == "Two sided":
+            elif spc == "Two sided":
                 plant_characteristics["SPC Two sided"].append(account)
                 plant_characteristics["SPC Area"].append(superArea)
 
-            if aux["High strength rebar"] == "Grade 80":
+            rebar = aux.get("High strength rebar", None)
+            if rebar == "Grade 80":
                 plant_characteristics["Grade 80"].append(account)
-            elif aux["High strength rebar"] == "Grade 100":
+            elif rebar == "Grade 100":
                 plant_characteristics["Grade 100"].append(account)
 
-            if aux["Seismic Class 1"]:
-                plant_characteristics["sc1_BV"] += bv
+            if aux.get("Seismic Class 1", False):
+                plant_characteristics["sc1_BV"]       += bv
                 plant_characteristics["sc1_concrete"] += subVol + superVol
             if account in bv_accounts_225:
                 bv_225 += bv
             concrete += subVol + superVol
 
-            if aux["Name"] == "Containment Liner":
-                # default is option 1, scaled by superstructure area for steel lined concrete, these are the exceptions
-                if aux["Superstructure type"] == "Stainless steel vessel":
-                    scaling_table.loc[account, "Option"] = 0
-                    mass = 8000.0 * (
-                        superVol + subVol
-                    )  # 8000 kg/m^3 is the density of stainless steel
-                    print("		Mass of containment vessel: {:.0F}".format(mass))
-                    scaling_table.loc[account, "New Base Unit Value"] = mass
-                    scaling_table.loc[
-                        account, "Multipliers"
-                    ] = 2.3  # stainless more than carbon steel
-                    scaling_table.loc[
-                        account, "Count per plant"
-                    ] = plant_characteristics["Number of Reactors"]
-                    plant_characteristics["Containment type"] = "Steel vessel"
+            if aux.get("Name", "") == "Containment Liner":
+                sup_type = aux.get("Superstructure type", "")
+                n_rx = _get_pc(plant_characteristics, "Number of Reactors",
+                               default=1, label="Containment liner")
+
+                if sup_type in ("Stainless steel vessel", "Carbon steel vessel"):
+                    mass = 8000.0 * (superVol + subVol)
+                    print(f"\t\tMass of containment vessel: {mass:.0f}")
+                    if account in scaling_table.index:
+                        scaling_table.loc[account, "Option"]             = 0
+                        scaling_table.loc[account, "New Base Unit Value"] = mass
+                        scaling_table.loc[account, "Count per plant"]    = n_rx
+                        if sup_type == "Stainless steel vessel":
+                            scaling_table.loc[account, "Multipliers"] = 2.3
+                    plant_characteristics["Containment type"]             = "Steel vessel"
                     plant_characteristics["Containment vessel mass (kg)"] = mass
-                    plant_characteristics['Containment thickness (m)'] = aux['Superstructure thickness (meters)']
-
-                elif aux['Superstructure type']=='Carbon steel vessel':
-                    scaling_table.loc[account, "Option"] = 0
-                    mass = 8000.0 * (
-                        superVol + subVol
-                    )  # 8000 kg/m^3 is the density of stainless steel
-                    print("		Mass of containment vessel: {:.0F}".format(mass))
-                    scaling_table.loc[account, "New Base Unit Value"] = mass
-                    scaling_table.loc[
-                        account, "Count per plant"
-                    ] = plant_characteristics["Number of Reactors"]
-                    plant_characteristics["Containment type"] = "Steel vessel"
-                    plant_characteristics["Containment vessel mass (kg)"] = mass
-                    plant_characteristics['Containment thickness (m)'] = aux['Superstructure thickness (meters)']
-
-                elif aux["Superstructure type"] == "Standalone steel building":
-
-                    plant_characteristics[
-                        "Containment type"
-                    ] = "Standalone steel building"
-                    # The multipliers came from the EEDB APWR6/PWR6/BE account breakdowns
-                    scaling_table.loc[
-                        account, "Factory Equipment Cost Mult"
-                    ] *= scalars_dict["212.15 Factory cost mult"]
-                    scaling_table.loc[account, "Site Labor Hours Mult"] *= scalars_dict[
-                        "212.15 Labor hours mult"
-                    ]
-                    scaling_table.loc[account, "Site Labor Cost Mult"] *= scalars_dict[
-                        "212.15 Labor cost mult"
-                    ]
-                    scaling_table.loc[
-                        account, "Site Material Cost Mult"
-                    ] *= scalars_dict["212.15 Material cost mult"]
-                    plant_characteristics["Containment type"] = [
-                        "Standalone steel building"
-                    ]
-
-        elif aux["Method"] == "Detailed (Generic)":
-            print("Error, generic building not implemented yet")
-            break
-
-        elif aux["Method"] == "Plant power scaling":
-            idx = scaling_table.index.str.match(account)
-            scaling_table.loc[idx, "Option"] = 2
-            scaling_table.loc[idx, "New Base Unit Value"] = plant_characteristics[
-                "Total Plant Thermal Power (MWt)"
-            ]
-
-        elif aux["Method"] == "RX power scaling":
-            idx = scaling_table.index.str.match(account)
-            scaling_table.loc[idx, "Option"] = 2
-            scaling_table.loc[idx, "New Base Unit Value"] = (
-                plant_characteristics["Total Plant Thermal Power (MWt)"]
-                / plant_characteristics["Number of Reactors"]
+                    plant_characteristics["Containment thickness (m)"]    = aux.get(
+                        "Superstructure thickness (meters)", np.nan
+                    )
+
+                elif sup_type == "Standalone steel building":
+                    plant_characteristics["Containment type"] = ["Standalone steel building"]
+                    if account in scaling_table.index:
+                        for mult_key, col in [
+                            ("212.15 Factory cost mult",  "Factory Equipment Cost Mult"),
+                            ("212.15 Labor hours mult",   "Site Labor Hours Mult"),
+                            ("212.15 Labor cost mult",    "Site Labor Cost Mult"),
+                            ("212.15 Material cost mult", "Site Material Cost Mult"),
+                        ]:
+                            if mult_key in scalars_dict:
+                                scaling_table.loc[account, col] *= scalars_dict[mult_key]
+                            else:
+                                print(f"\t  [WARN] scalars_dict missing key {mult_key!r}")
+
+        elif method == "Detailed (Generic)":
+            print("\tDetailed (Generic) not implemented yet — skipping")
+
+        elif method in ("Plant power scaling", "RX power scaling"):
+            idx = _clean_mask(
+                scaling_table.index.str.match(account), scaling_table.index
             )
+            if not idx.any():
+                print(f"\t  [WARN] No rows match {account!r} — skipping")
+                continue
+            n_rx = _get_pc(plant_characteristics, "Number of Reactors",
+                           default=1, label=f"{method}/{account}") or 1
+            value = (total_thermal_power if method == "Plant power scaling"
+                     else _safe_divide(total_thermal_power, n_rx, f"RX/{account}"))
+            _safe_loc_set(scaling_table, idx, "Option",             2,     account)
+            _safe_loc_set(scaling_table, idx, "New Base Unit Value", value, account)
+
+        elif method == "Fixed cost":
+            idx = _clean_mask(
+                scaling_table.index.str.match(account), scaling_table.index
+            )
+            if not idx.any():
+                print(f"\t  [WARN] No rows match {account!r} — skipping")
+                continue
+            _safe_loc_set(scaling_table, idx, "Option",             4, account)
+            _safe_loc_set(scaling_table, idx, "New Base Unit Value", 1, account)
+
+        elif method == "Direct cost":
+            idx = _clean_mask(
+                scaling_table.index.str.match(account), scaling_table.index
+            )
+            if not idx.any():
+                print(f"\t  [WARN] No rows match {account!r} — skipping")
+                continue
+            col = "Direct cost per RX (2018 USD)"
+            if col in df21.columns and account in df21.index:
+                _safe_loc_set(scaling_table, idx, "Option",             3,                    account)
+                _safe_loc_set(scaling_table, idx, "New Base Unit Value", df21.loc[account, col], account)
+            else:
+                print(f"\t  [WARN] Column {col!r} or account {account!r} missing in df21")
+
+        elif method == "":
+            pass  # blank row
 
-        elif aux["Method"] == "Fixed cost":
-            idx = scaling_table.index.str.match(account)
-            scaling_table.loc[idx, "Option"] = 4
-            scaling_table.loc[idx, "New Base Unit Value"] = 1
-
-        elif aux["Method"] == "Direct cost":
-            idx = scaling_table.index.str.match(account)
-            scaling_table.loc[idx, "Option"] = 3
-            scaling_table.loc[idx, "New Base Unit Value"] = df21.loc[
-                account, "Direct cost per RX (2018 USD)"
-            ]
-
-    # ------------------------------------------------------Account 22-26 ------------------------------------------------------#
-    print("Evaluating account 22 - 26")
-    df_big = df22.append(df23)
-    df_big = df_big.append(df24)
-    df_big = df_big.append(df25)
-    df_big = df_big.append(df26)
-
-    idx_PPS = df_big.index[df_big["Method"] == "Plant power scaling"]
-    scaling_table.loc[idx_PPS, "Option"] = 2
-    scaling_table.loc[idx_PPS, "New Base Unit Value"] = plant_characteristics[
-        "Total Plant Thermal Power (MWt)"
-    ]
-
-    idx_EPS = df_big.index[df_big["Method"] == "Plant electric power scaling"]
-    scaling_table.loc[idx_EPS, "Option"] = 2
-    scaling_table.loc[idx_EPS, "New Base Unit Value"] = plant_characteristics[
-        "Net Electrical Power (MWe)"
-    ]
-
-    idx_TEPS = df_big.index[df_big["Method"] == "Turbine electric power scaling"]
-    scaling_table.loc[idx_TEPS, "Option"] = 2
-    scaling_table.loc[idx_TEPS, "New Base Unit Value"] = (
-        plant_characteristics["Net Electrical Power (MWe)"] 
-        / plant_characteristics['Number of turbines']
-    )
-    scaling_table.loc[idx_TEPS, "Count per plant"] = plant_characteristics[
-        "Number of turbines"
+        else:
+            print(f"\t  [WARN] Unknown method {method!r} for {account!r} — skipping")
+    print("DEBUG-2", scaling_table)
+    # ═════════════════════════════════════════════════════════════════════════
+    #  Accounts 22–26
+    # ═════════════════════════════════════════════════════════════════════════
+    print("\nEvaluating accounts 22-26")
+
+    # pd.concat replaces the deprecated DataFrame.append()
+    df_big = pd.concat([df22, df23, df24, df25, df26])
+    if "Method" in df_big.columns:
+        df_big["Method"] = df_big["Method"].fillna("").astype(str)
+
+    total_thermal = _get_pc(plant_characteristics,
+                            "Total Plant Thermal Power (MWt)", default=0.0)
+    net_electric  = _get_pc(plant_characteristics,
+                            "Net Electrical Power (MWe)",      default=0.0)
+    n_turbines    = _get_pc(plant_characteristics,
+                            "Number of turbines",              default=1) or 1
+    n_rx          = _get_pc(plant_characteristics,
+                            "Number of Reactors",              default=1) or 1
+
+    def _apply_method(method_name: str, option: int, value, count=None):
+        """
+        Apply option + value to all df_big rows matching method_name.
+        NaN-indexed rows were already removed, so isin() is safe.
+        """
+        idx = df_big.index[df_big["Method"] == method_name]
+        if idx.empty:
+            return
+        valid = idx[idx.isin(scaling_table.index)]
+        missing = idx.difference(valid)
+        if not missing.empty:
+            print(f"  [WARN] {method_name}: not in scaling_table: {missing.tolist()}")
+        if valid.empty:
+            return
+        scaling_table.loc[valid, "Option"]             = option
+        scaling_table.loc[valid, "New Base Unit Value"] = value
+        if count is not None:
+            scaling_table.loc[valid, "Count per plant"] = count
+
+    _apply_method("Plant power scaling",            2, total_thermal)
+    _apply_method("Plant electric power scaling",   2, net_electric)
+    _apply_method("Turbine electric power scaling", 2,
+                  _safe_divide(net_electric, n_turbines, "Turbine EPS"),
+                  count=n_turbines)
+    _apply_method("RX power scaling",              2,
+                  _safe_divide(total_thermal, n_rx, "RX power"),
+                  count=n_rx)
+    _apply_method("Fixed cost", 4, 1)
+
+    def _apply_detailed(method_name: str, option: int,
+                        value_col, count_col, fixed_value=None):
+        idx = df_big.index[df_big["Method"] == method_name]
+        valid = idx[idx.isin(scaling_table.index)]
+        if valid.empty:
+            return
+        scaling_table.loc[valid, "Option"] = option
+        if fixed_value is not None:
+            scaling_table.loc[valid, "New Base Unit Value"] = fixed_value
+        elif value_col and value_col in df_big.columns:
+            scaling_table.loc[valid, "New Base Unit Value"] = df_big.loc[valid, value_col]
+        if count_col and count_col in df_big.columns:
+            scaling_table.loc[valid, "Count per plant"] = df_big.loc[valid, count_col]
+
+    _apply_detailed("Detailed",          1, "Value",                          "Count per plant (DI)")
+    _apply_detailed("Detailed volume",   1,  None,                             None, fixed_value=bv_225)
+    _apply_detailed("Detailed pool",     0, "Value",                          "Count per plant (DI)")
+    _apply_detailed("Detailed (CE)",     0, "Value",                          "Count per plant (DI)")
+    _apply_detailed("Direct cost input", 3, "Direct cost per RX (2018 USD)",  "Count per plant (DCI)")
+
+    # ── Multipliers & material uncertainty ───────────────────────────────────
+    scaling_table = cost_multipliers(scaling_table, scalars_dict, plant_characteristics)
+    print("DEBUG-3", scaling_table)
+    scaling_table = material_use_uncertainty(scaling_table, scalars_dict)
+    print("DEBUG-4", scaling_table)
+    # ═════════════════════════════════════════════════════════════════════════
+    #  Compute Scaling Factors
+    # ════════════════════════════════════
+    scaling_table["Scaling Factor"] = 0.0
+    print("DEBUG-5", scaling_table)
+    acc1 = _clean_mask(scaling_table["Option"] == 1, scaling_table.index)
+    acc2 = _clean_mask(scaling_table["Option"] == 2, scaling_table.index)
+    acc3 = _clean_mask(scaling_table["Option"] == 3, scaling_table.index)
+    acc4 = _clean_mask(scaling_table["Option"] == 4, scaling_table.index)
+    acc0 = scaling_table.index[
+        _clean_mask(scaling_table["Option"] == 0, scaling_table.index)
     ]
 
-    idx_RPS = df_big.index[df_big["Method"] == "RX power scaling"]
-    scaling_table.loc[idx_RPS, "Option"] = 2
-    scaling_table.loc[idx_RPS, "New Base Unit Value"] = (
-        plant_characteristics["Total Plant Thermal Power (MWt)"]
-        / plant_characteristics["Number of Reactors"]
-    )
-    scaling_table.loc[idx_RPS, "Count per plant"] = plant_characteristics[
-        "Number of Reactors"
-    ]
+    def _power_scale(new_col: str, base_col: str, exp_col: str) -> pd.Series:
+        """(new/base)^exp — base 0 becomes NaN to avoid ZeroDivisionError."""
+        new_v = scaling_table[new_col]
+        base  = scaling_table[base_col].replace(0, np.nan)
+        exp   = scaling_table[exp_col]
+        return (new_v / base) ** exp
+
+    if acc1.any():
+        sf = _power_scale("New Base Unit Value", "EEDB Base Unit Value 1", "Option 1 Exponent")
+        scaling_table.loc[acc1, "Scaling Factor"] = sf.loc[acc1]
+
+    if acc2.any():
+        sf = _power_scale("New Base Unit Value", "EEDB Base Unit Value 2", "Option 2 Exponent")
+        scaling_table.loc[acc2, "Scaling Factor"] = sf.loc[acc2]
+
+    if acc3.any():
+        base3 = scaling_table.loc[acc3, "EEDB Base Unit Value 3"].replace(0, np.nan)
+        scaling_table.loc[acc3, "Scaling Factor"] = (
+            scaling_table.loc[acc3, "New Base Unit Value"] / base3
+        )
 
-    idx_FC = df_big.index[df_big["Method"] == "Fixed cost"]
-    scaling_table.loc[idx_FC, "Option"] = 4
-    scaling_table.loc[idx_FC, "New Base Unit Value"] = 1
+    if acc4.any():
+        scaling_table.loc[acc4, "Scaling Factor"] = 1.0
 
-    idx_D = df_big.index[df_big["Method"] == "Detailed"]
-    scaling_table.loc[idx_D, "Option"] = 1
-    scaling_table.loc[idx_D, "New Base Unit Value"] = df_big.loc[idx_D, "Value"]
-    scaling_table.loc[idx_D, "Count per plant"] = df_big.loc[
-        idx_D, "Count per plant (DI)"
-    ]
+    for account in acc0:
+        if account not in scaling_table.index:
+            print(f"  [WARN] Option-0 account {account!r} not in index — skipping")
+            continue
 
-    idx_Dv = df_big.index[df_big["Method"] == "Detailed volume"]
-    scaling_table.loc[idx_Dv, "Option"] = 1
-    scaling_table.loc[idx_Dv, "New Base Unit Value"] = bv_225
+        raw = scaling_table.loc[account, "Option 0 Formula"]
+        try:
+            varz = (raw if isinstance(raw, list)
+                    else [x.strip()
+                          for x in str(raw).replace("[","").replace("]","").split(",")])
+            varz = [float(x) for x in varz]
+        except (ValueError, AttributeError) as exc:
+            print(f"  [WARN] Cannot parse Option 0 Formula for {account!r}: "
+                  f"{raw!r} — {exc}; Scaling Factor=0")
+            continue
 
-    idx_Dp = df_big.index[df_big["Method"] == "Detailed pool"]
-    scaling_table.loc[idx_Dp, "Option"] = 0
-    scaling_table.loc[idx_Dp, "New Base Unit Value"] = df_big.loc[idx_Dp, "Value"]
-    scaling_table.loc[idx_Dp, "Count per plant"] = df_big.loc[
-        idx_Dp, "Count per plant (DI)"
-    ]
+        new_val = scaling_table.loc[account, "New Base Unit Value"]
+        base3   = scaling_table.loc[account, "EEDB Base Unit Value 3"]
 
-    idx_Dce = df_big.index[df_big["Method"] == "Detailed (CE)"]
-    scaling_table.loc[idx_Dce, "Option"] = 0
-    scaling_table.loc[idx_Dce, "New Base Unit Value"] = df_big.loc[idx_Dce, "Value"]
-    scaling_table.loc[idx_Dce, "Count per plant"] = df_big.loc[
-        idx_Dce, "Count per plant (DI)"
-    ]
+        if _is_nan(base3) or base3 == 0:
+            print(f"  [WARN] EEDB Base Unit Value 3 is 0/NaN for {account!r} — skipping")
+            continue
 
-    idx_DCI = df_big.index[df_big["Method"] == "Direct cost input"]
-    scaling_table.loc[idx_DCI, "Option"] = 3
-    scaling_table.loc[idx_DCI, "New Base Unit Value"] = df_big.loc[
-        idx_DCI, "Direct cost per RX (2018 USD)"
-    ]
-    scaling_table.loc[idx_DCI, "Count per plant"] = df_big.loc[
-        idx_DCI, "Count per plant (DCI)"
-    ]
-
-    idx = df_big.index
-    scaling_table = cost_multipliers(scaling_table, scalars_dict, plant_characteristics)
-    scaling_table = material_use_uncertainty(scaling_table, scalars_dict)
-
-    # --------------------------------------------- Scaling/evaluating new costs -----------------------------------#
-    scaling_table["Scaling Factor"] = 0
-    accounts_0 = scaling_table.index[scaling_table["Option"] == 0]
-    accounts_1 = scaling_table["Option"] == 1
-    accounts_2 = scaling_table["Option"] == 2
-    accounts_3 = scaling_table["Option"] == 3
-    accounts_4 = scaling_table["Option"] == 4
-
-    scaling_table.loc[accounts_1, "Scaling Factor"] = (
-        scaling_table.loc[accounts_1, "New Base Unit Value"]
-        / scaling_table.loc[accounts_1, "EEDB Base Unit Value 1"]
-    ) ** scaling_table.loc[accounts_1, "Option 1 Exponent"]
-    scaling_table.loc[accounts_2, "Scaling Factor"] = (
-        scaling_table.loc[accounts_2, "New Base Unit Value"]
-        / scaling_table.loc[accounts_2, "EEDB Base Unit Value 2"]
-    ) ** scaling_table.loc[accounts_2, "Option 2 Exponent"]
-    scaling_table.loc[accounts_3, "Scaling Factor"] = (
-        scaling_table.loc[accounts_3, "New Base Unit Value"]
-        / scaling_table.loc[accounts_3, "EEDB Base Unit Value 3"]
-    )
-    scaling_table.loc[accounts_4, "Scaling Factor"] = 1.0
-
-    for account in accounts_0:
-        if not isinstance(scaling_table.loc[account, "Option 0 Formula"], list):
-            varz = (
-                scaling_table.loc[account, "Option 0 Formula"]
-                .replace("[", "")
-                .replace("]", "")
-                .split(",")
-            )
-        else:
-            varz = scaling_table.loc[account, "Option 0 Formula"]
-        varz = [float(x) for x in varz]
         if len(varz) == 4:
             scaling_table.loc[account, "Scaling Factor"] = (
-                (
-                    varz[0]
-                    + varz[1]
-                    * scaling_table.loc[account, "New Base Unit Value"] ** varz[2]
-                )
-                * varz[3]
-                / scaling_table.loc[account, "EEDB Base Unit Value 3"]
+                (varz[0] + varz[1] * new_val ** varz[2]) * varz[3] / base3
             )
         elif len(varz) == 1:
-            scaling_table.loc[account, "Scaling Factor"] = (
-                varz[0]
-                * scaling_table.loc[account, "New Base Unit Value"]
-                / scaling_table.loc[account, "EEDB Base Unit Value 3"]
-            )
+            scaling_table.loc[account, "Scaling Factor"] = varz[0] * new_val / base3
+        else:
+            print(f"  [WARN] Option 0 Formula unexpected length {len(varz)} "
+                  f"for {account!r} — skipping")
 
-    # --------------------------------------------- Accounting for # per plant -------------------------------------#
+    # ── Count-per-plant multiplier ────────────────────────────────────────────
     scaling_table["Scaling Factor"] *= scaling_table["Count per plant"]
-    # scaling_table.to_csv('./out/out_'+ fname[10:-5] +'_scaling_table.csv')
-
-    # --------------------------------------------- Add interior concrete to SC1 concrete ------------------------------------#
-    plant_characteristics["sc1_concrete"] += (
-        scaling_table.loc["A.212.140", "Scaling Factor"] * 8000 / 1.1 ** 3
-    )  # 8000 CY of interior concrete in EEDB
-    concrete += (
-        scaling_table.loc["A.212.140", "Scaling Factor"] * 8000 / 1.1 ** 3
-    )  # 8000 CY of interior concrete in EEDB
-    print("Concrete total: {:.0F}".format(concrete))
-    print("SC1 Concrete: {:.0F}".format(plant_characteristics["sc1_concrete"]))
-    # print(scaling_table.loc['A.212.140', 'Scaling Factor']*8000/1.1**3)
-
-    return scaling_table, plant_characteristics
+
+    # ── Interior concrete correction (A.212.140) ──────────────────────────────
+    if "A.212.140" in scaling_table.index:
+        correction = scaling_table.loc["A.212.140", "Scaling Factor"] * 8000 / (1.1 ** 3)
+        plant_characteristics["sc1_concrete"] += correction
+        concrete += correction
+    else:
+        print("  [WARN] A.212.140 not in scaling_table — concrete correction skipped")
+
+    print(f"Concrete total: {concrete:.0f}")
+    print(f"SC1 Concrete:   {plant_characteristics['sc1_concrete']:.0f}")
+
+    return scaling_table, plant_characteristics
\ No newline at end of file

From 8da2d32c6644955a04a13fe0a3e222d008f74c6a Mon Sep 17 00:00:00 2001
From: Yuri Drobyshev <yuri.drobyshev@humans.net>
Date: Fri, 10 Apr 2026 12:36:53 +0400
Subject: [PATCH 2/3] comment fill_nan

---
 ncet/fill_scaling_table.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ncet/fill_scaling_table.py b/ncet/fill_scaling_table.py
index ff77cbb..cb6f733 100644
--- a/ncet/fill_scaling_table.py
+++ b/ncet/fill_scaling_table.py
@@ -166,7 +166,7 @@ def fill_scaling_table(path, fname, base, scalars_dict, scaling_table=None):
         scaling_table.set_index("Account", inplace=True)
 
     # Drop NaN-indexed rows immediately — root cause of the ValueError.
-    scaling_table = _fill_nan_index_rows(scaling_table, "scaling_table")
+    #scaling_table = _fill_nan_index_rows(scaling_table, "scaling_table")
 
     scaling_table["Option"]                      = 1
     scaling_table["New Base Unit Value"]         = 0.0

From 43984df815a60785d189e4d8cb35f3f230a22430 Mon Sep 17 00:00:00 2001
From: Yuri Drobyshev <yuri.drobyshev@humans.net>
Date: Tue, 12 May 2026 14:46:02 +0300
Subject: [PATCH 3/3] adapted for calculation without turbine

---
 cost_sensitivity.py        | 4 ++--
 ncet/get_indirect_costs.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cost_sensitivity.py b/cost_sensitivity.py
index 9f84061..5afb8f0 100644
--- a/cost_sensitivity.py
+++ b/cost_sensitivity.py
@@ -362,7 +362,7 @@ def run_ncet(
     plant_fname = "inputfile_" + plant + ".xlsx"
 
     param_fname = "input_scaling_exponents.xlsx"
-    orders = 10
+    orders = 1
 
     mc_runs = 1  # choose 1 to run the reference values
     # cProfile.run('run_ncet(plant, path, orders, plant_fname, param_fname, BASIS_FNAME, mc_runs=mc_runs, make_building_table=True, save_all=True)')
@@ -437,4 +437,4 @@ def run_ncet(
 
     #         dfNP.to_csv('./out/' + plant + '/new' + plant + '_' + case + '_' + str(i) + '.csv')
 
-    # scheduler_table.to_csv('./out/' + plant + '/' + plant + '_scheduler_table.csv')
+    # scheduler_table.to_csv('./out/' + plant + '/' + plant + '_scheduler_table.csv')
\ No newline at end of file
diff --git a/ncet/get_indirect_costs.py b/ncet/get_indirect_costs.py
index 96fe032..738f4a4 100644
--- a/ncet/get_indirect_costs.py
+++ b/ncet/get_indirect_costs.py
@@ -158,7 +158,7 @@ def get_indirect_costs(dfNewPlant, plant_characteristics, learning_rate, scalars
     np_indirect_cost["Account Description"] = "Indirect Costs"
     np_indirect_cost["Subcategories"] = 1
 
-    dfNewPlant = dfNewPlant.append(pd.Series(np_indirect_cost, name="A.9"))
+    dfNewPlant = dfNewPlant._append(pd.Series(np_indirect_cost, name="A.9"))
 
     # #----------------- Divide costs among correct overrun accounts ----------------
     # for col in ['Site Labor Hours', 'Site Labor Cost', 'Site Material Cost', 'Factory Equipment Cost']: