From 34e409439dbe9f785884d65a88ed32f5d0f4d617 Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Wed, 9 Apr 2025 14:08:08 -0500
Subject: [PATCH 01/20] saving changes

---
 frustratometer/classes/AWSEM.py             | 300 +++++++++++++++-----
 frustratometer/classes/__init__.py          |   2 +-
 frustratometer/optimization/optimization.py | 124 ++------
 3 files changed, 246 insertions(+), 180 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 770f4ad..72f6f9a 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -5,9 +5,9 @@
 from .Gamma import Gamma
 from pydantic import BaseModel, Field, ConfigDict
 from pydantic.types import Path
-from typing import List,Optional,Union
+from typing import List,Optional,Union,Generator
 
-__all__ = ['AWSEM']
+__all__ = ['AWSEM','AWSEMEnsemble']
 
 class AWSEMParameters(BaseModel):
     model_config = ConfigDict(extra='ignore', arbitrary_types_allowed=True)
@@ -75,73 +75,88 @@ def __init__(self,
         AWSEM object
         """
         
-        #Set attributes
-        p = AWSEMParameters(**parameters)
-        if p.min_sequence_separation_contact is None:
-            p.min_sequence_separation_contact = 1
-        if p.min_sequence_separation_rho is None:
-            p.min_sequence_separation_rho = 1
-        if p.min_sequence_separation_electrostatics is None:
-            p.min_sequence_separation_electrostatics = 1
-
-        for field, value in p:
+        #Set parameters attributes
+        self.p = AWSEMParameters(**parameters)
+        if self.p.min_sequence_separation_contact is None:
+            self.p.min_sequence_separation_contact = 1
+        if self.p.min_sequence_separation_rho is None:
+            self.p.min_sequence_separation_rho = 1
+        if self.p.min_sequence_separation_electrostatics is None:
+            self.p.min_sequence_separation_electrostatics = 1
+        for field, value in self.p:
             setattr(self, field, value)
-        
-        #Gamma parameters
-        if isinstance(p.gamma, Gamma):
-            gamma = p.gamma
-        elif isinstance(p.gamma, Path):
-            gamma = Gamma(p.gamma)
+        if isinstance(self.p.gamma, Gamma):
+            gamma = self.p.gamma
+        elif isinstance(self.p.gamma, Path):
+            gamma = Gamma(self.p.gamma)
         else:
             raise ValueError("Gamma parameter must be a path or a Gamma object.")
-                
         self.gamma=gamma
         self.burial_gamma = gamma['Burial'].T
         self.direct_gamma = gamma['Direct'][0]
         self.protein_gamma = gamma['Protein'][0]
         self.water_gamma = gamma['Water'][0]
-        self.burial_in_context=p.burial_in_context
+        self.burial_in_context=self.p.burial_in_context
 
-        #Structure details
-        self.full_to_aligned_index_dict=pdb_structure.full_to_aligned_index_dict
+        # ??????
+        self._decoy_fluctuation = {} # don't know what this does
+        self.minimally_frustrated_threshold=.78 # this should be a class variable or an argument to __init__
+
+        # sequence details
         if sequence is None:
             self.sequence=pdb_structure.sequence
         else:
             self.sequence=sequence
+        self.aa_freq = frustration.compute_aa_freq(self.sequence)
+        self.contact_freq = frustration.compute_contact_freq(self.sequence)
+
+        # structure details
+        self.expose_indicator_functions = expose_indicator_functions
+        self.full_to_aligned_index_dict=pdb_structure.full_to_aligned_index_dict
+        self.pdb_structure = pdb_structure
+
+
+    @property
+    def pdb_structure(self):
+        return self._pdb_structure
+    @pdb_structure.setter
+    def pdb_structure(self,pdb_structure):
+        # check structure
+        selection_CB = pdb_structure.structure.select('name CB or (resname GLY IGL and name CA)')
+        resid = selection_CB.getResindices()
+        N=len(resid)
+        if N != len(self.sequence):
+            raise ValueError("The pdb is incomplete. Try setting 'repair_pdb=True' when constructing the Structure object.")
+        self.resid = resid
+        self.N = N
+        # set structure-dependent proterties
+        self.pdb_structure  = pdb_structure
         self.structure=pdb_structure.structure
         self.chain=pdb_structure.chain
         self.pdb_file=pdb_structure.pdb_file
         self.init_index_shift=pdb_structure.init_index_shift
         self.distance_matrix=pdb_structure.distance_matrix
         self.full_pdb_distance_matrix=pdb_structure.full_pdb_distance_matrix
-        selection_CB = self.structure.select('name CB or (resname GLY IGL and name CA)')
-
-        resid = selection_CB.getResindices()
-        self.resid=resid
-        self.N=len(self.resid)
-        assert self.N == len(self.sequence), "The pdb is incomplete. Try setting 'repair_pdb=True' when constructing the Structure object."
+        # reset indicator functions, energies, and potts model
+        self.compute_indicators_energies_potts_model()
 
+    def compute_indicators_energies_potts_model():
         if self.burial_in_context==True:
             selected_matrix=self.full_pdb_distance_matrix
         else:
             selected_matrix=self.distance_matrix
         sequence_mask_rho = frustration.compute_mask(selected_matrix, 
                                                      maximum_contact_distance=None, 
-                                                     minimum_sequence_separation = p.min_sequence_separation_rho)
+                                                     minimum_sequence_separation = self.p.min_sequence_separation_rho)
         sequence_mask_contact = frustration.compute_mask(self.distance_matrix, 
-                                                     maximum_contact_distance=p.distance_cutoff_contact, 
-                                                     minimum_sequence_separation = p.min_sequence_separation_contact)
-        
-        self._decoy_fluctuation = {}
-        self.minimally_frustrated_threshold=.78
-
+                                                     maximum_contact_distance=self.p.distance_cutoff_contact, 
+                                                     minimum_sequence_separation = self.p.min_sequence_separation_contact)
         # Calculate rho
         rho = 0.25 
-        rho *= (1 + np.tanh(p.eta * (selected_matrix- p.r_min)))
-        rho *= (1 + np.tanh(p.eta * (p.r_max - selected_matrix)))
+        rho *= (1 + np.tanh(self.p.eta * (selected_matrix- self.p.r_min)))
+        rho *= (1 + np.tanh(self.p.eta * (self.p.r_max - selected_matrix)))
         rho *= sequence_mask_rho
         self.rho=rho
-        
         #Calculate sigma water
         rho_r = (rho).sum(axis=1)
         if self.full_pdb_distance_matrix.shape!=self.distance_matrix.shape:
@@ -153,23 +168,21 @@ def __init__(self,
         rho_b = np.expand_dims(rho_r, 1)
         rho1 = np.expand_dims(rho_r, 0)
         rho2 = np.expand_dims(rho_r, 1)
-        sigma_water = 0.25 * (1 - np.tanh(p.eta_sigma * (rho1 - p.rho_0))) * (1 - np.tanh(p.eta_sigma * (rho2 - p.rho_0)))
+        sigma_water = 0.25 * (1 - np.tanh(self.p.eta_sigma * (rho1 - self.p.rho_0))) * (1 - np.tanh(self.p.eta_sigma * (rho2 - self.p.rho_0)))
         sigma_protein = 1 - sigma_water
-
         #Calculate theta and indicators
-        theta = 0.25 * (1 + np.tanh(p.eta * (self.distance_matrix - p.r_min))) * (1 + np.tanh(p.eta * (p.r_max - self.distance_matrix)))
-        thetaII = 0.25 * (1 + np.tanh(p.eta * (self.distance_matrix - p.r_minII))) * (1 + np.tanh(p.eta * (p.r_maxII - self.distance_matrix)))
-        burial_indicator = np.tanh(p.burial_kappa * (rho_b - p.burial_ro_min)) + np.tanh(p.burial_kappa * (p.burial_ro_max - rho_b))
+        theta = 0.25 * (1 + np.tanh(self.p.eta * (self.distance_matrix - self.p.r_min))) * (1 + np.tanh(self.p.eta * (self.p.r_max - self.distance_matrix)))
+        thetaII = 0.25 * (1 + np.tanh(self.p.eta * (self.distance_matrix - self.p.r_minII))) * (1 + np.tanh(self.p.eta * (self.p.r_maxII - self.distance_matrix)))
+        burial_indicator = np.tanh(self.p.burial_kappa * (rho_b - self.p.burial_ro_min)) + np.tanh(self.p.burial_kappa * (self.p.burial_ro_max - rho_b))
         direct_indicator = theta[:, :, np.newaxis, np.newaxis]
         water_indicator = thetaII[:, :, np.newaxis, np.newaxis] * sigma_water[:, :, np.newaxis, np.newaxis]
         protein_indicator = thetaII[:, :, np.newaxis, np.newaxis] * sigma_protein[:, :, np.newaxis, np.newaxis]
-        
-        if expose_indicator_functions:
+        # store indicators and gammas for our particular sequence as attributes
+        if self.expose_indicator_functions:
             self.indicators=[]
             self.indicators.append(burial_indicator[:,0])
             self.indicators.append(burial_indicator[:,1])
             self.indicators.append(burial_indicator[:,2])
-            
             self.indicators.append(direct_indicator[:,:,0,0]*sequence_mask_contact)
             self.indicators.append(protein_indicator[:,:,0,0]*sequence_mask_contact)
             self.indicators.append(water_indicator[:,:,0,0]*sequence_mask_contact)
@@ -194,60 +207,52 @@ def __init__(self,
             self.water_indicator = water_indicator
             self.protein_indicator = protein_indicator
             
-
+        
         J_index = np.meshgrid(range(self.N), range(self.N), range(self.q), range(self.q), indexing='ij', sparse=False)
         h_index = np.meshgrid(range(self.N), range(self.q), indexing='ij', sparse=False)
-
-        #Burial energy
-        burial_energy = 0.5 * p.k_contact * self.burial_gamma[h_index[1]] * burial_indicator[:, np.newaxis, :]
-        self.burial_energy = burial_energy
-
-        #Contact energy
+        
+        # compute burial and contact energies
+        self.burial_energy = 0.5 * p.k_contact * self.burial_gamma[h_index[1]] * burial_indicator[:, np.newaxis, :]
         direct = direct_indicator * self.direct_gamma[J_index[2], J_index[3]]
         water_mediated = water_indicator * self.water_gamma[J_index[2], J_index[3]]
         protein_mediated = protein_indicator  * self.protein_gamma[J_index[2], J_index[3]]
-        contact_energy = p.k_contact * np.array([direct, water_mediated, protein_mediated]) * sequence_mask_contact[np.newaxis, :, :, np.newaxis, np.newaxis]
-
-        # Compute electrostatics
-        if p.k_electrostatics!=0:
-            self.sequence_cutoff=min(p.min_sequence_separation_electrostatics, p.min_sequence_separation_contact)
+        self.contact_energy = self.p.k_contact * np.array([direct, water_mediated, protein_mediated]) * sequence_mask_contact[np.newaxis, :, :, np.newaxis, np.newaxis]
+        # Compute electrostatics and add to contact energy
+        if self.p.k_electrostatics!=0:
+            self.sequence_cutoff=min(p.min_sequence_separation_electrostatics, self.p.min_sequence_separation_contact)
             self.distance_cutoff=None
-            
-            
-            electrostatics_mask = frustration.compute_mask(self.distance_matrix, maximum_contact_distance=None, minimum_sequence_separation=p.min_sequence_separation_electrostatics)
+            electrostatics_mask = frustration.compute_mask(self.distance_matrix, maximum_contact_distance=None, minimum_sequence_separation=self.p.min_sequence_separation_electrostatics)
             # ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
             charges = np.array([0, 1, 0, -1, 0, 0, -1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])
             charges2 = charges[:,np.newaxis]*charges[np.newaxis,:]
-
-            electrostatics_indicator = 1 / (self.distance_matrix + 1E-6) * np.exp(-self.distance_matrix / p.electrostatics_screening_length) * electrostatics_mask
-            electrostatics_energy = -p.k_electrostatics * (charges2[np.newaxis,np.newaxis,:,:]*electrostatics_indicator[:,:,np.newaxis,np.newaxis])
-
+            electrostatics_indicator = 1 / (self.distance_matrix + 1E-6) * np.exp(-self.distance_matrix / self.p.electrostatics_screening_length) * electrostatics_mask
+            electrostatics_energy = -self.p.k_electrostatics * (charges2[np.newaxis,np.newaxis,:,:]*electrostatics_indicator[:,:,np.newaxis,np.newaxis])
             contact_energy = np.append(contact_energy, electrostatics_energy[np.newaxis,:,:,:,:], axis=0)
-            if expose_indicator_functions:
+            if self.expose_indicator_functions:
                 self.indicators.append(electrostatics_indicator)
-                temp_gamma=0.5 * p.k_electrostatics * charges2[self.aa_map_awsem_x, self.aa_map_awsem_y]
+                temp_gamma=0.5 * self.p.k_electrostatics * charges2[self.aa_map_awsem_x, self.aa_map_awsem_y]
                 temp_gamma[0,:]=0
                 temp_gamma[:,0]=0
                 self.gamma_array.append(temp_gamma)
         else:
-            self.sequence_cutoff=p.min_sequence_separation_contact
-            self.distance_cutoff=p.distance_cutoff_contact
+            self.sequence_cutoff=self.p.min_sequence_separation_contact
+            self.distance_cutoff=self.p.distance_cutoff_contact
         self.mask = frustration.compute_mask(self.distance_matrix, maximum_contact_distance=self.distance_cutoff, minimum_sequence_separation = self.sequence_cutoff)
-
         self.contact_energy = contact_energy
 
-        # Compute fast properties
-        self.aa_freq = frustration.compute_aa_freq(self.sequence)
-        self.contact_freq = frustration.compute_contact_freq(self.sequence)
+        # Compute potts model
         self.potts_model = {}
         self.potts_model['h'] = burial_energy.sum(axis=-1)[:, self.aa_map_awsem_list]
         self.potts_model['J'] = contact_energy.sum(axis=0)[:, :, self.aa_map_awsem_x, self.aa_map_awsem_y]
-        
         # Set the gap energy to zero
         self.potts_model['h'][:, 0] = 0
         self.potts_model['J'][:, :, 0, :] = 0
         self.potts_model['J'][:, :, :, 0] = 0
-        self._native_energy=None
+        self._native_energy=None # don't know what this does
+
+    def change_conformation(alternative_pdb_structure):
+        # this function is an alias for the pdb_structure setter
+        self.pdb_structure = alternative_pdb_structure
 
     def compute_configurational_decoy_statistics(self, n_decoys=4000,aa_freq=None):
         # ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
@@ -366,4 +371,147 @@ def compute_configurational_energies(self):
     
     def configurational_frustration(self,aa_freq=None, correction=0, n_decoys=4000):
         mean_decoy_energy, std_decoy_energy = self.compute_configurational_decoy_statistics(n_decoys=n_decoys,aa_freq=aa_freq)
-        return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction)
\ No newline at end of file
+        return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction)
+
+
+class AWSEMEnsemble(): # don't think it's necessary for this one to inherit from Frustratometer
+                       # also, note that the functions compute_configurational_decoy_statistics,
+                       # compute_configurational_energies, and configuration_frustration are
+                       # present in the AWSEM class but removed here, since we don't expect to 
+                       # compute frustration on an entire ensemble
+    #Mapping to DCA
+    q = 20
+    aa_map_awsem_list = [0, 0, 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15, 16, 19, 17, 18] #A gap has no energy
+    aa_map_awsem_x, aa_map_awsem_y = np.meshgrid(aa_map_awsem_list, aa_map_awsem_list, indexing='ij')
+    
+    def __init__(self, 
+                 pdb_structures: Generator[object,None,None],
+                 **parameters)->object:
+        """
+        Generate AWSEMEnsemble object
+
+        Parameters
+        ----------
+        pdb_structures : Generator[object,None,None]
+            yields Structure objects representing decoy structures
+        
+        Returns
+        -------
+        AWSEMEnsemble object
+        """
+        
+        #Set attributes
+        p = AWSEMParameters(**parameters)
+        if p.min_sequence_separation_contact is None:
+            p.min_sequence_separation_contact = 1
+        if p.min_sequence_separation_rho is None:
+            p.min_sequence_separation_rho = 1
+        if p.min_sequence_separation_electrostatics is None:
+            p.min_sequence_separation_electrostatics = 1
+
+        for field, value in p:
+            setattr(self, field, value)
+        
+        #Gamma parameters
+        if isinstance(p.gamma, Gamma):
+            gamma = p.gamma
+        elif isinstance(p.gamma, Path):
+            gamma = Gamma(p.gamma)
+        else:
+            raise ValueError("Gamma parameter must be a path or a Gamma object.")
+                
+        self.gamma=gamma
+        self.burial_gamma = gamma['Burial'].T
+        self.direct_gamma = gamma['Direct'][0]
+        self.protein_gamma = gamma['Protein'][0]
+        self.water_gamma = gamma['Water'][0]
+        self.burial_in_context=p.burial_in_context # need to be careful here--the same choice will have to apply to all structures,
+                                              # the way this code is currently written
+        self.indicators = [] # we're always going to expose indicator functions for this class
+        #self._decoy_fluctuation = {} # not sure what this does
+        if p.k_electrostatics!=0:
+            self.sequence_cutoff=min(p.min_sequence_separation_electrostatics, p.min_sequence_separation_contact)
+            self.distance_cutoff=None
+        else:
+            self.sequence_cutoff=p.min_sequence_separation_contact
+            self.distance_cutoff=p.distance_cutoff_contact
+       
+        Ns = [] # number of residues in each structure
+        for pdb_structure in pdb_structures: 
+            self.indicators.append(AWSEM(pdb_structure,expose_indicator_functions=True).indicators)
+            """
+            #Structure details
+            # we can exclude most of the details present in the AWSEM class
+            structure=pdb_structure.structure
+            init_index_shift=pdb_structure.init_index_shift
+            distance_matrix=pdb_structure.distance_matrix
+            full_pdb_distance_matrix=pdb_structure.full_pdb_distance_matrix
+            selection_CB = structure.select('name CB or (resname GLY IGL and name CA)')
+
+            resid = selection_CB.getResindices()
+            N = len(resid)
+            Ns.append(N) # we'll check this later do make sure every structure has the same number of residues
+
+            if self.burial_in_context==True:
+                selected_matrix=full_pdb_distance_matrix # use a matrix that includes extra residues to compute local density and contacts
+                                                         # like the case where we're trying to design a protein that binds 
+                                                         # to another protein, and those residues affect the local environment even if they're
+                                                         # not part of the sequence space that we're sampling
+            else:
+                selected_matrix=distance_matrix
+            sequence_mask_rho = frustration.compute_mask(selected_matrix, 
+                                                        maximum_contact_distance=None, 
+                                                        minimum_sequence_separation = p.min_sequence_separation_rho)
+            sequence_mask_contact = frustration.compute_mask(distance_matrix, 
+                                                        maximum_contact_distance=p.distance_cutoff_contact, 
+                                                        minimum_sequence_separation = p.min_sequence_separation_contact)
+
+            # Calculate rho
+            rho = 0.25 
+            rho *= (1 + np.tanh(p.eta * (selected_matrix- p.r_min)))
+            rho *= (1 + np.tanh(p.eta * (p.r_max - selected_matrix)))
+            rho *= sequence_mask_rho
+            
+            #Calculate sigma water
+            rho_r = (rho).sum(axis=1)
+            if full_pdb_distance_matrix.shape!=distance_matrix.shape:
+                if self.burial_in_context==True:
+                    init_index_shift=pdb_structure.init_index_shift
+                    fin_index_shift=pdb_structure.fin_index_shift
+                    rho_r=rho_r[init_index_shift:fin_index_shift]
+            rho_b = np.expand_dims(rho_r, 1)
+            rho1 = np.expand_dims(rho_r, 0)
+            rho2 = np.expand_dims(rho_r, 1)
+            sigma_water = 0.25 * (1 - np.tanh(p.eta_sigma * (rho1 - p.rho_0))) * (1 - np.tanh(p.eta_sigma * (rho2 - p.rho_0)))
+            sigma_protein = 1 - sigma_water
+
+            #Calculate theta and indicators
+            theta = 0.25 * (1 + np.tanh(p.eta * (distance_matrix - p.r_min))) * (1 + np.tanh(p.eta * (p.r_max - distance_matrix)))
+            thetaII = 0.25 * (1 + np.tanh(p.eta * (distance_matrix - p.r_minII))) * (1 + np.tanh(p.eta * (p.r_maxII - distance_matrix)))
+            burial_indicator = np.tanh(p.burial_kappa * (rho_b - p.burial_ro_min)) + np.tanh(p.burial_kappa * (p.burial_ro_max - rho_b))
+            direct_indicator = theta[:, :, np.newaxis, np.newaxis]
+            water_indicator = thetaII[:, :, np.newaxis, np.newaxis] * sigma_water[:, :, np.newaxis, np.newaxis]
+            protein_indicator = thetaII[:, :, np.newaxis, np.newaxis] * sigma_protein[:, :, np.newaxis, np.newaxis]
+            
+            self.indicators.append([])
+            self.indicators[-1].append(burial_indicator[:,0])
+            self.indicators[-1].append(burial_indicator[:,1])
+            self.indicators[-1].append(burial_indicator[:,2])
+            self.indicators[-1].append(direct_indicator[:,:,0,0]*sequence_mask_contact)
+            self.indicators[-1].append(protein_indicator[:,:,0,0]*sequence_mask_contact)
+            self.indicators[-1].append(water_indicator[:,:,0,0]*sequence_mask_contact)
+
+            # Compute electrostatics
+            if p.k_electrostatics!=0:
+                electrostatics_mask = frustration.compute_mask(distance_matrix, maximum_contact_distance=None, minimum_sequence_separation=p.min_sequence_separation_electrostatics)
+                # ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
+                charges = np.array([0, 1, 0, -1, 0, 0, -1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])
+                charges2 = charges[:,np.newaxis]*charges[np.newaxis,:]
+                electrostatics_indicator = 1 / (distance_matrix + 1E-6) * np.exp(-distance_matrix / p.electrostatics_screening_length) * electrostatics_mask
+                self.indicators[-1].append(electrostatics_indicator)
+            """
+
+        self._native_energy=None # not sure what this does
+
+        #assert len(list(set(Ns))) == 1, f"Not all structures had the same number of residues! Numbers of residues found were {set(Ns)}"
+        #self.N = Ns[0] # doesn't matter which one we choose, they're all the same if we passed the assert
diff --git a/frustratometer/classes/__init__.py b/frustratometer/classes/__init__.py
index 6621727..70dec2d 100644
--- a/frustratometer/classes/__init__.py
+++ b/frustratometer/classes/__init__.py
@@ -7,7 +7,7 @@
 """
 
 from .DCA import DCA
-from .AWSEM import AWSEM
+from .AWSEM import AWSEM, AWSEMEnsemble
 from .Structure import Structure
 from .Map import Map
 from .Gamma import Gamma
diff --git a/frustratometer/optimization/optimization.py b/frustratometer/optimization/optimization.py
index 0bdc5e5..3ade9f9 100644
--- a/frustratometer/optimization/optimization.py
+++ b/frustratometer/optimization/optimization.py
@@ -7,7 +7,7 @@
 
 from frustratometer.classes import Frustratometer
 from frustratometer.classes import Structure
-from frustratometer.classes import AWSEM
+from frustratometer.classes import AWSEM, AWSEMEnsemble
 from frustratometer.optimization.EnergyTerm import EnergyTerm
 from frustratometer.optimization.inner_product import compute_all_region_means
 from frustratometer.optimization.inner_product import build_mean_inner_product_matrix
@@ -1109,111 +1109,29 @@ def find_optimal_replicas(self, max_replicas=32, n_repeats=5, n_steps=10000):
 
 if __name__ == '__main__':
 
-    native_pdb = "tests/data/1r69.pdb"
-    
-    structure_bound = Structure.full_pdb(native_pdb, chain=None)
-    structure_free = Structure.full_pdb(native_pdb, "A")
-
-    model_bound = AWSEM(structure_bound, distance_cutoff_contact=10, min_sequence_separation_contact=2, expose_indicator_functions=True)
-    model_free = AWSEM(structure_free, distance_cutoff_contact=10, min_sequence_separation_contact=2, expose_indicator_functions=True)
-    reduced_alphabet = 'ADEFHIKLMNQRSTVWY'
-
-    print(model_bound.sequence)
-    print(model_free.sequence)
-
-    # binding_region=np.array([1, 2, 3, 4, 26, 27, 28, 29, 30, 31, 32, 33, 49, 50, 51, 52, 53, 54, 55, 56, 57, 68, 69, 70, 90, 91, 92, 93, 94, 95, 96, 97, 109, 110, 111, 112, 113, 114, 115, 116, 117, 127, 128, 129, 130, 131, 132, 133, 134, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 190, 191, 192, 193, 194, 195, 196, 197])-1
-    energy_bound = AwsemEnergySelected(model_bound, alphabet=reduced_alphabet, selection=np.array(range(len(model_free.sequence))))
-    energy_free = AwsemEnergySelected(model_free, alphabet=reduced_alphabet)
-    energy_average = AwsemEnergyAverage(model_free, alphabet=reduced_alphabet)
-    energy_std = AwsemEnergyStd(model_free, alphabet=reduced_alphabet)
-    energy_std_100 = AwsemEnergyStd(model_free, alphabet=reduced_alphabet, n_decoys=100)
-    energy_std_1000 = AwsemEnergyStd(model_free, alphabet=reduced_alphabet, n_decoys=1000)
-    energy_std_10000 = AwsemEnergyStd(model_free, alphabet=reduced_alphabet, n_decoys=10000)
-    energy_variance = AwsemEnergyVariance(model_free, alphabet=reduced_alphabet)
+    pdb_list = ["tests/data/1r69.pdb"]
+    pdb_structures = (Structure(pdb, chain=None) for pdb in pdb_list)
+    ensemble = AWSEMEnsemble(pdb_structures, distance_cutoff_contact=10, min_sequence_separation_contact=2, expose_indicator_functions=True)
+    ###########################################################
+    # temporary changes for testing it while it's not yet fully implemented
+    assert len(ensemble.indicators)==1, ensemble.indicators
+    #assert len(ensemble.potts_model['h'])==1, ensemble.potts_model['h']
+    #assert len(ensemble.potts_model['J'])==1, ensemble.potts_model['J']
+    ensemble.indicators = ensemble.indicators[0]
+    #ensemble.potts_model['h'] = ensemble.potts_model['h'][0]
+    #ensemble.potts_model['J'] = ensemble.potts_model['J'][0]
+    ensemble.mask = np.ones((63,63))
+    #########################################################
+
+    reduced_alphabet = 'ADEFGHIKLMNQRSTVWY'
+
+    awsem_energy = AwsemEnergy(ensemble, alphabet=reduced_alphabet)
     heterogeneity = Heterogeneity(exact=False, use_numba=True)
-    similarity = Similarity(model_free.sequence, use_numba=True)
-
-    # energy_mix = energy_free - 20 * heterogeneity
-    energy_mix = (energy_free - energy_average) / energy_std
-    # energy_mix = energy_bound - energy_free
-
-    energy_mixes = {"EnergyFree": energy_free,
-                    "EnergyBound": energy_bound,
-                    "Heterogeneity": heterogeneity,
-                    "EnergyAverage": energy_average, 
-                    "EnergyStd_ndecoys100": energy_std_100,
-                    "EnergyStd_ndecoys1000": energy_std_1000,
-                    "EnergyStd_ndecoys10000": energy_std_10000,
-                    "EnergyStd": energy_std,
-                    "Zscore_ndecoys10000":(energy_free - energy_average) / energy_std_10000,
-                    "Zscore":(energy_free - energy_average) / energy_std,
-                    "EnergyVariance": energy_variance,
-                    "Binding": (energy_bound - energy_free),
-                    "Similarity": similarity, 
-                    "Ivan":energy_bound - 40 * heterogeneity, 
-                    "Takada": (energy_bound - energy_average) / energy_std, 
-                    "Ivan_binding":(energy_bound - energy_free) - 40 * heterogeneity,
-                    "Takada_binding":(energy_free - energy_average) / energy_std + (energy_bound - energy_free),
-                    "Ivan_Takada_binding": (energy_free - energy_average) / energy_std + (energy_bound - energy_free) - 40 * heterogeneity,
-                    "Corrected_Takada": (energy_bound - energy_average) / (energy_std+5), 
-                    "Corrected_Takada_binding":(energy_free - energy_average) / (energy_std+5) + (energy_bound - energy_free),
-                    "Ivan_Corrected_Takada_binding": (energy_free - energy_average) / (energy_std+5) + (energy_bound - energy_free) - 40 * heterogeneity,
-                    "Ivan_bindidng similarity": (energy_bound - energy_free) - 40 * heterogeneity - 100*similarity,
-                    "Corrected_Takada_binding_similarity":(energy_free - energy_average) / (energy_std+5) + (energy_bound - energy_free) - 100*similarity,
-                    "Ivan_bindidng_similarityv2": (energy_bound - energy_free) - 40 * heterogeneity}
-    
-    for energy_name,energy_term in energy_mixes.items():
-        print (f"Energy term: {energy_name}")
-        energy_term.benchmark(seq_indices=np.random.randint(0, len(reduced_alphabet), size=(100,len(structure_free.sequence))))
-        if "ndecoys" not in energy_name:
-            energy_term.test(seq_index=np.random.randint(0, len(reduced_alphabet), size=len(structure_free.sequence)))
-        
-        monte_carlo = MonteCarlo(sequence = structure_free.sequence, energy=energy_term, alphabet=reduced_alphabet)
-        monte_carlo.benchmark_montecarlo_steps(n_repeats=3,n_steps=10000)
-        monte_carlo.benchmark_parallel_montecarlo_steps(n_repeats=3, n_steps=10000, n_replicas=8)
-
-
-
-    # Profiling of the parallel tempering
-    import cProfile
-    import pstats
-    import io
 
-    monte_carlo = MonteCarlo(sequence=model_free.sequence,  energy=energy_mix, alphabet=reduced_alphabet)
-                                    # evaluation_energies={"EnergyFree": energy_free, "Heterogeneity": heterogeneity, 
-                                    #                     "EnergyAverage": energy_average, "EnergyStd": energy_std,
-                                    #                     "Similarity": similarity, "Zscore":(energy_free - energy_average) / energy_std})
-            
-    monte_carlo.benchmark_montecarlo_steps(n_repeats=3, n_steps=100)
-    for n_replicas in [1, 2, 4, 8, 16]:
-        print(f"Running parallel tempering with {n_replicas} replicas")
-        monte_carlo.benchmark_parallel_montecarlo_steps(n_repeats=3, n_steps=100, n_replicas=n_replicas)
-
-    for n_replicas in [1, 2, 4, 8, 16]:
-        print(f"Running parallel tempering with {n_replicas} replicas")
-        monte_carlo.benchmark_parallel_montecarlo_steps(n_repeats=3, n_steps=1000, n_replicas=n_replicas)
-
-    monte_carlo.find_optimal_replicas(max_replicas=32, n_repeats=5, n_steps=1000)
-    monte_carlo.find_optimal_replicas(max_replicas=8, n_repeats=5, n_steps=10000)
-    monte_carlo.find_optimal_replicas(max_replicas=8, n_repeats=5, n_steps=100000)
-
-    # # Run the profiler
-    # profiler = cProfile.Profile()
-    # profiler.enable()
-
-    # monte_carlo.parallel_tempering(temperatures=np.logspace(3,-4,8), n_steps=1E4, n_steps_per_cycle=1E2)
-    # profiler.disable()
-    
-    # # Print the stats
-    # s = io.StringIO()
-    # ps = pstats.Stats(profiler, stream=s).sort_stats('cumulative')
-    # ps.print_stats()
-    # ps.dump_stats('parallel_temperingv2.prof')
-    
-    
-    
-    
+    energy_mix = awsem_energy - 10*heterogeneity
     
+    monte_carlo = MonteCarlo(sequence = "SISSRVKSKRIQLGLNQAELAQKVGTTQQSIEQLENGKTKRPRFLPELASALGVSVDWLLNGT", energy=energy_mix, alphabet=reduced_alphabet)
+    monte_carlo.annealing(n_steps=1000)
     
     
     

From fa23e340479b01d49e51c36e44d1237eb1f13314 Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Tue, 8 Jul 2025 17:16:00 -0500
Subject: [PATCH 02/20] fixed typo

---
 frustratometer/classes/AWSEM.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 72f6f9a..15b103d 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -130,7 +130,7 @@ def pdb_structure(self,pdb_structure):
         self.resid = resid
         self.N = N
         # set structure-dependent proterties
-        self.pdb_structure  = pdb_structure
+        self._pdb_structure  = pdb_structure
         self.structure=pdb_structure.structure
         self.chain=pdb_structure.chain
         self.pdb_file=pdb_structure.pdb_file

From fce6e82744a21d05a8afc6b9d3ba5ab75a93facb Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Tue, 8 Jul 2025 17:34:39 -0500
Subject: [PATCH 03/20] added missing 'self' references

---
 frustratometer/classes/AWSEM.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 15b103d..b84513d 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -140,7 +140,7 @@ def pdb_structure(self,pdb_structure):
         # reset indicator functions, energies, and potts model
         self.compute_indicators_energies_potts_model()
 
-    def compute_indicators_energies_potts_model():
+    def compute_indicators_energies_potts_model(self):
         if self.burial_in_context==True:
             selected_matrix=self.full_pdb_distance_matrix
         else:
@@ -190,7 +190,7 @@ def compute_indicators_energies_potts_model():
             self.gamma_array=[]
             temp_burial_gamma=self.burial_gamma[self.aa_map_awsem_list]
             temp_burial_gamma[0]=0
-            temp_burial_gamma *= -0.5 * p.k_contact
+            temp_burial_gamma *= -0.5 * self.p.k_contact
             self.gamma_array.append(temp_burial_gamma[:,0])
             self.gamma_array.append(temp_burial_gamma[:,1])
             self.gamma_array.append(temp_burial_gamma[:,2])
@@ -212,14 +212,14 @@ def compute_indicators_energies_potts_model():
         h_index = np.meshgrid(range(self.N), range(self.q), indexing='ij', sparse=False)
         
         # compute burial and contact energies
-        self.burial_energy = 0.5 * p.k_contact * self.burial_gamma[h_index[1]] * burial_indicator[:, np.newaxis, :]
+        self.burial_energy = 0.5 * self.p.k_contact * self.burial_gamma[h_index[1]] * burial_indicator[:, np.newaxis, :]
         direct = direct_indicator * self.direct_gamma[J_index[2], J_index[3]]
         water_mediated = water_indicator * self.water_gamma[J_index[2], J_index[3]]
         protein_mediated = protein_indicator  * self.protein_gamma[J_index[2], J_index[3]]
-        self.contact_energy = self.p.k_contact * np.array([direct, water_mediated, protein_mediated]) * sequence_mask_contact[np.newaxis, :, :, np.newaxis, np.newaxis]
+        contact_energy = self.p.k_contact * np.array([direct, water_mediated, protein_mediated]) * sequence_mask_contact[np.newaxis, :, :, np.newaxis, np.newaxis]
         # Compute electrostatics and add to contact energy
         if self.p.k_electrostatics!=0:
-            self.sequence_cutoff=min(p.min_sequence_separation_electrostatics, self.p.min_sequence_separation_contact)
+            self.sequence_cutoff=min(self.p.min_sequence_separation_electrostatics, self.p.min_sequence_separation_contact)
             self.distance_cutoff=None
             electrostatics_mask = frustration.compute_mask(self.distance_matrix, maximum_contact_distance=None, minimum_sequence_separation=self.p.min_sequence_separation_electrostatics)
             # ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
@@ -242,8 +242,8 @@ def compute_indicators_energies_potts_model():
 
         # Compute potts model
         self.potts_model = {}
-        self.potts_model['h'] = burial_energy.sum(axis=-1)[:, self.aa_map_awsem_list]
-        self.potts_model['J'] = contact_energy.sum(axis=0)[:, :, self.aa_map_awsem_x, self.aa_map_awsem_y]
+        self.potts_model['h'] = self.burial_energy.sum(axis=-1)[:, self.aa_map_awsem_list]
+        self.potts_model['J'] = self.contact_energy.sum(axis=0)[:, :, self.aa_map_awsem_x, self.aa_map_awsem_y]
         # Set the gap energy to zero
         self.potts_model['h'][:, 0] = 0
         self.potts_model['J'][:, :, 0, :] = 0

From 74839593d908ddd36ceb92a13d51adcba961f6f8 Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Tue, 8 Jul 2025 20:54:21 -0500
Subject: [PATCH 04/20] refactored; basic setup of decoy ensemble is working
 now

---
 frustratometer/classes/AWSEM.py             | 108 +++++---------------
 frustratometer/optimization/optimization.py |   7 --
 2 files changed, 27 insertions(+), 88 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index b84513d..7c94570 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -115,7 +115,8 @@ def __init__(self,
         self.full_to_aligned_index_dict=pdb_structure.full_to_aligned_index_dict
         self.pdb_structure = pdb_structure
 
-
+    
+    # allows us to update coordinates only by passing in a pdb file
     @property
     def pdb_structure(self):
         return self._pdb_structure
@@ -397,7 +398,8 @@ def __init__(self,
         
         Returns
         -------
-        AWSEMEnsemble object
+        AWSEMEnsemble object, which holds a list of indicator functions for each decoy structure,
+            as calculated by the AWSEM class. 
         """
         
         #Set attributes
@@ -427,7 +429,6 @@ def __init__(self,
         self.water_gamma = gamma['Water'][0]
         self.burial_in_context=p.burial_in_context # need to be careful here--the same choice will have to apply to all structures,
                                               # the way this code is currently written
-        self.indicators = [] # we're always going to expose indicator functions for this class
         #self._decoy_fluctuation = {} # not sure what this does
         if p.k_electrostatics!=0:
             self.sequence_cutoff=min(p.min_sequence_separation_electrostatics, p.min_sequence_separation_contact)
@@ -436,82 +437,27 @@ def __init__(self,
             self.sequence_cutoff=p.min_sequence_separation_contact
             self.distance_cutoff=p.distance_cutoff_contact
        
-        Ns = [] # number of residues in each structure
+        burial_low_density_indicators = []
+        burial_medium_density_indicators = []   
+        burial_high_density_indicators = []
+        contact_direct_indicators = []
+        contact_protein_indicators = []
+        contact_water_indicators = []
+        electrostatics_indicators = []
         for pdb_structure in pdb_structures: 
-            self.indicators.append(AWSEM(pdb_structure,expose_indicator_functions=True).indicators)
-            """
-            #Structure details
-            # we can exclude most of the details present in the AWSEM class
-            structure=pdb_structure.structure
-            init_index_shift=pdb_structure.init_index_shift
-            distance_matrix=pdb_structure.distance_matrix
-            full_pdb_distance_matrix=pdb_structure.full_pdb_distance_matrix
-            selection_CB = structure.select('name CB or (resname GLY IGL and name CA)')
-
-            resid = selection_CB.getResindices()
-            N = len(resid)
-            Ns.append(N) # we'll check this later do make sure every structure has the same number of residues
-
-            if self.burial_in_context==True:
-                selected_matrix=full_pdb_distance_matrix # use a matrix that includes extra residues to compute local density and contacts
-                                                         # like the case where we're trying to design a protein that binds 
-                                                         # to another protein, and those residues affect the local environment even if they're
-                                                         # not part of the sequence space that we're sampling
-            else:
-                selected_matrix=distance_matrix
-            sequence_mask_rho = frustration.compute_mask(selected_matrix, 
-                                                        maximum_contact_distance=None, 
-                                                        minimum_sequence_separation = p.min_sequence_separation_rho)
-            sequence_mask_contact = frustration.compute_mask(distance_matrix, 
-                                                        maximum_contact_distance=p.distance_cutoff_contact, 
-                                                        minimum_sequence_separation = p.min_sequence_separation_contact)
-
-            # Calculate rho
-            rho = 0.25 
-            rho *= (1 + np.tanh(p.eta * (selected_matrix- p.r_min)))
-            rho *= (1 + np.tanh(p.eta * (p.r_max - selected_matrix)))
-            rho *= sequence_mask_rho
-            
-            #Calculate sigma water
-            rho_r = (rho).sum(axis=1)
-            if full_pdb_distance_matrix.shape!=distance_matrix.shape:
-                if self.burial_in_context==True:
-                    init_index_shift=pdb_structure.init_index_shift
-                    fin_index_shift=pdb_structure.fin_index_shift
-                    rho_r=rho_r[init_index_shift:fin_index_shift]
-            rho_b = np.expand_dims(rho_r, 1)
-            rho1 = np.expand_dims(rho_r, 0)
-            rho2 = np.expand_dims(rho_r, 1)
-            sigma_water = 0.25 * (1 - np.tanh(p.eta_sigma * (rho1 - p.rho_0))) * (1 - np.tanh(p.eta_sigma * (rho2 - p.rho_0)))
-            sigma_protein = 1 - sigma_water
-
-            #Calculate theta and indicators
-            theta = 0.25 * (1 + np.tanh(p.eta * (distance_matrix - p.r_min))) * (1 + np.tanh(p.eta * (p.r_max - distance_matrix)))
-            thetaII = 0.25 * (1 + np.tanh(p.eta * (distance_matrix - p.r_minII))) * (1 + np.tanh(p.eta * (p.r_maxII - distance_matrix)))
-            burial_indicator = np.tanh(p.burial_kappa * (rho_b - p.burial_ro_min)) + np.tanh(p.burial_kappa * (p.burial_ro_max - rho_b))
-            direct_indicator = theta[:, :, np.newaxis, np.newaxis]
-            water_indicator = thetaII[:, :, np.newaxis, np.newaxis] * sigma_water[:, :, np.newaxis, np.newaxis]
-            protein_indicator = thetaII[:, :, np.newaxis, np.newaxis] * sigma_protein[:, :, np.newaxis, np.newaxis]
-            
-            self.indicators.append([])
-            self.indicators[-1].append(burial_indicator[:,0])
-            self.indicators[-1].append(burial_indicator[:,1])
-            self.indicators[-1].append(burial_indicator[:,2])
-            self.indicators[-1].append(direct_indicator[:,:,0,0]*sequence_mask_contact)
-            self.indicators[-1].append(protein_indicator[:,:,0,0]*sequence_mask_contact)
-            self.indicators[-1].append(water_indicator[:,:,0,0]*sequence_mask_contact)
-
-            # Compute electrostatics
-            if p.k_electrostatics!=0:
-                electrostatics_mask = frustration.compute_mask(distance_matrix, maximum_contact_distance=None, minimum_sequence_separation=p.min_sequence_separation_electrostatics)
-                # ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
-                charges = np.array([0, 1, 0, -1, 0, 0, -1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])
-                charges2 = charges[:,np.newaxis]*charges[np.newaxis,:]
-                electrostatics_indicator = 1 / (distance_matrix + 1E-6) * np.exp(-distance_matrix / p.electrostatics_screening_length) * electrostatics_mask
-                self.indicators[-1].append(electrostatics_indicator)
-            """
-
-        self._native_energy=None # not sure what this does
-
-        #assert len(list(set(Ns))) == 1, f"Not all structures had the same number of residues! Numbers of residues found were {set(Ns)}"
-        #self.N = Ns[0] # doesn't matter which one we choose, they're all the same if we passed the assert
+            all_indicators = AWSEM(pdb_structure,expose_indicator_functions=True).indicators
+            burial_low_density_indicators.append(all_indicators[0])
+            burial_medium_density_indicators.append(all_indicators[1]) 
+            burial_high_density_indicators.append(all_indicators[2])
+            contact_direct_indicators.append(all_indicators[3])
+            contact_protein_indicators.append(all_indicators[4])
+            contact_water_indicators.append(all_indicators[5])
+            electrostatics_indicators.append(all_indicators[6])
+            # indicator function order: burial low density, burial medium density, burial high density,
+            #                           direct, protein, water, electrostatics
+        self.indicators = [np.array(burial_low_density_indicators),
+                           np.array(burial_medium_density_indicators),
+                           np.array(contact_direct_indicators),
+                           np.array(contact_protein_indicators),
+                           np.array(contact_water_indicators),
+                           np.array(electrostatics_indicators),]
diff --git a/frustratometer/optimization/optimization.py b/frustratometer/optimization/optimization.py
index 3ade9f9..2ecf44c 100644
--- a/frustratometer/optimization/optimization.py
+++ b/frustratometer/optimization/optimization.py
@@ -1113,13 +1113,6 @@ def find_optimal_replicas(self, max_replicas=32, n_repeats=5, n_steps=10000):
     pdb_structures = (Structure(pdb, chain=None) for pdb in pdb_list)
     ensemble = AWSEMEnsemble(pdb_structures, distance_cutoff_contact=10, min_sequence_separation_contact=2, expose_indicator_functions=True)
     ###########################################################
-    # temporary changes for testing it while it's not yet fully implemented
-    assert len(ensemble.indicators)==1, ensemble.indicators
-    #assert len(ensemble.potts_model['h'])==1, ensemble.potts_model['h']
-    #assert len(ensemble.potts_model['J'])==1, ensemble.potts_model['J']
-    ensemble.indicators = ensemble.indicators[0]
-    #ensemble.potts_model['h'] = ensemble.potts_model['h'][0]
-    #ensemble.potts_model['J'] = ensemble.potts_model['J'][0]
     ensemble.mask = np.ones((63,63))
     #########################################################
 

From c9715d710bbb38f4fafa9d19e18426b085ebf907 Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Tue, 8 Jul 2025 20:55:26 -0500
Subject: [PATCH 05/20] changed 'AWSEMEnsemble' class name to 'DecoyEnsemble'

---
 frustratometer/classes/AWSEM.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 7c94570..2c1c1db 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -375,7 +375,7 @@ def configurational_frustration(self,aa_freq=None, correction=0, n_decoys=4000):
         return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction)
 
 
-class AWSEMEnsemble(): # don't think it's necessary for this one to inherit from Frustratometer
+class DecoyEnsemble(): # don't think it's necessary for this one to inherit from Frustratometer
                        # also, note that the functions compute_configurational_decoy_statistics,
                        # compute_configurational_energies, and configuration_frustration are
                        # present in the AWSEM class but removed here, since we don't expect to 

From 641dfb997aac9d0ca4a7e333a667a02031920bb7 Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Wed, 9 Jul 2025 10:33:53 -0500
Subject: [PATCH 06/20] stripped DecoyEnsemble down to the bare minimum

---
 frustratometer/classes/AWSEM.py    | 65 ++++++++----------------------
 frustratometer/classes/__init__.py |  2 +-
 2 files changed, 17 insertions(+), 50 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 2c1c1db..468dc96 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -7,7 +7,7 @@
 from pydantic.types import Path
 from typing import List,Optional,Union,Generator
 
-__all__ = ['AWSEM','AWSEMEnsemble']
+__all__ = ['AWSEM','DecoyEnsemble']
 
 class AWSEMParameters(BaseModel):
     model_config = ConfigDict(extra='ignore', arbitrary_types_allowed=True)
@@ -379,64 +379,28 @@ class DecoyEnsemble(): # don't think it's necessary for this one to inherit from
                        # also, note that the functions compute_configurational_decoy_statistics,
                        # compute_configurational_energies, and configuration_frustration are
                        # present in the AWSEM class but removed here, since we don't expect to 
-                       # compute frustration on an entire ensemble
-    #Mapping to DCA
-    q = 20
-    aa_map_awsem_list = [0, 0, 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15, 16, 19, 17, 18] #A gap has no energy
-    aa_map_awsem_x, aa_map_awsem_y = np.meshgrid(aa_map_awsem_list, aa_map_awsem_list, indexing='ij')
-    
+                       # compute frustration on an entire ensemble  
     def __init__(self, 
                  pdb_structures: Generator[object,None,None],
                  **parameters)->object:
         """
-        Generate AWSEMEnsemble object
+        Generate DecoyEnsemble object
 
         Parameters
         ----------
         pdb_structures : Generator[object,None,None]
             yields Structure objects representing decoy structures
+        other parameters:
+            masks and cutoffs affecting the AWSEM class's indicator function calculations;
+            they must be the same for all structures; burial_in_context also available, but use at your own risk
         
         Returns
         -------
-        AWSEMEnsemble object, which holds a list of indicator functions for each decoy structure,
-            as calculated by the AWSEM class. 
+        DecoyEnsemble object, which holds a list of 7 numpy arrays, ordered by indicator function type:
+            [low density (rho), medium density (rho), high density (rho), direct, protein, water, electrostatics].
+            The numpy arrays' first axes vary the structure, while the second axis and third axis, if it exists, 
+            hold the (appropriately masked) indicator functions for each residue or pair of residues
         """
-        
-        #Set attributes
-        p = AWSEMParameters(**parameters)
-        if p.min_sequence_separation_contact is None:
-            p.min_sequence_separation_contact = 1
-        if p.min_sequence_separation_rho is None:
-            p.min_sequence_separation_rho = 1
-        if p.min_sequence_separation_electrostatics is None:
-            p.min_sequence_separation_electrostatics = 1
-
-        for field, value in p:
-            setattr(self, field, value)
-        
-        #Gamma parameters
-        if isinstance(p.gamma, Gamma):
-            gamma = p.gamma
-        elif isinstance(p.gamma, Path):
-            gamma = Gamma(p.gamma)
-        else:
-            raise ValueError("Gamma parameter must be a path or a Gamma object.")
-                
-        self.gamma=gamma
-        self.burial_gamma = gamma['Burial'].T
-        self.direct_gamma = gamma['Direct'][0]
-        self.protein_gamma = gamma['Protein'][0]
-        self.water_gamma = gamma['Water'][0]
-        self.burial_in_context=p.burial_in_context # need to be careful here--the same choice will have to apply to all structures,
-                                              # the way this code is currently written
-        #self._decoy_fluctuation = {} # not sure what this does
-        if p.k_electrostatics!=0:
-            self.sequence_cutoff=min(p.min_sequence_separation_electrostatics, p.min_sequence_separation_contact)
-            self.distance_cutoff=None
-        else:
-            self.sequence_cutoff=p.min_sequence_separation_contact
-            self.distance_cutoff=p.distance_cutoff_contact
-       
         burial_low_density_indicators = []
         burial_medium_density_indicators = []   
         burial_high_density_indicators = []
@@ -445,7 +409,10 @@ def __init__(self,
         contact_water_indicators = []
         electrostatics_indicators = []
         for pdb_structure in pdb_structures: 
-            all_indicators = AWSEM(pdb_structure,expose_indicator_functions=True).indicators
+            # the AWSEM class takes care of the indicator calculation (including masking) for us
+            #     AWSEM normally accepts an amino acid sequence argument, but we don't need that here
+            #     However, we do need to pass through parameters used to generate the indicator functions
+            all_indicators = AWSEM(pdb_structure, expose_indicator_functions=True, **parameters).indicators
             burial_low_density_indicators.append(all_indicators[0])
             burial_medium_density_indicators.append(all_indicators[1]) 
             burial_high_density_indicators.append(all_indicators[2])
@@ -453,8 +420,8 @@ def __init__(self,
             contact_protein_indicators.append(all_indicators[4])
             contact_water_indicators.append(all_indicators[5])
             electrostatics_indicators.append(all_indicators[6])
-            # indicator function order: burial low density, burial medium density, burial high density,
-            #                           direct, protein, water, electrostatics
+        # the idea here is that we have different conformers of a chain of a particular length;
+        #     if not, we'll get a ValueError when trying to initialize numpy arrays from a ragged set of lists
         self.indicators = [np.array(burial_low_density_indicators),
                            np.array(burial_medium_density_indicators),
                            np.array(contact_direct_indicators),
diff --git a/frustratometer/classes/__init__.py b/frustratometer/classes/__init__.py
index 70dec2d..4ff9b4b 100644
--- a/frustratometer/classes/__init__.py
+++ b/frustratometer/classes/__init__.py
@@ -7,7 +7,7 @@
 """
 
 from .DCA import DCA
-from .AWSEM import AWSEM, AWSEMEnsemble
+from .AWSEM import AWSEM, DecoyEnsemble
 from .Structure import Structure
 from .Map import Map
 from .Gamma import Gamma

From 107d780cd9ac792c5cba2eca7b421cba064044d7 Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Wed, 9 Jul 2025 20:33:16 -0500
Subject: [PATCH 07/20] updated call to Structure in test_optimization

---
 tests/test_optimization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_optimization.py b/tests/test_optimization.py
index 7b939a1..2f1414b 100644
--- a/tests/test_optimization.py
+++ b/tests/test_optimization.py
@@ -347,7 +347,7 @@ def test_diff_mean_inner_product_1_by_1(n_elements = 10):
 def model(request):
     native_pdb = "tests/data/1bfz.pdb"
     distance_cutoff_contact, min_sequence_separation_contact, k_electrostatics = request.param
-    structure = Structure.full_pdb(native_pdb, "A")
+    structure = Structure(native_pdb, "A")
     model = AWSEM(structure, distance_cutoff_contact=distance_cutoff_contact, min_sequence_separation_contact=min_sequence_separation_contact, expose_indicator_functions=True, k_electrostatics=k_electrostatics)   
     return model
 

From 2bb3b832b908e12017dada49f3afcb3517660530 Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Wed, 9 Jul 2025 22:20:08 -0500
Subject: [PATCH 08/20] refactored, passing most tests but failing some due to
 inaccurate electrostatics energy calculation

---
 frustratometer/classes/AWSEM.py             | 371 +++++++++++++-------
 frustratometer/optimization/optimization.py | 173 ++++++++-
 2 files changed, 405 insertions(+), 139 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 468dc96..38288db 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -47,15 +47,15 @@ class AWSEMParameters(BaseModel):
     k_electrostatics: float = Field(17.3636, description="Coefficient for electrostatic interactions. (kJ/mol)")
     electrostatics_screening_length: float = Field(10, description="Screening length for electrostatic interactions. (Angstrom)")
 
-class AWSEM(Frustratometer):
+class AWSEMBase(Frustratometer):
+
     #Mapping to DCA
     q = 20
     aa_map_awsem_list = [0, 0, 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15, 16, 19, 17, 18] #A gap has no energy
     aa_map_awsem_x, aa_map_awsem_y = np.meshgrid(aa_map_awsem_list, aa_map_awsem_list, indexing='ij')
     
     def __init__(self, 
-                 pdb_structure: object,
-                 sequence: str =None,
+                 sequence: str,
                  expose_indicator_functions: bool=False,
                  **parameters)->object:
         """
@@ -63,10 +63,8 @@ def __init__(self,
 
         Parameters
         ----------
-        pdb_structure : object
-            Structure object generated by Structure class
-        sequence :  str
-            The amino acid sequence of the protein. The sequence is assumed to be in one-letter code. 
+        sequence: str
+            The amino acid sequence
         expose_indicator_functions: bool
             If set to True, indicator functions of the contact and burial energy terms can be accessed by user.
         
@@ -75,16 +73,26 @@ def __init__(self,
         AWSEM object
         """
         
-        #Set parameters attributes
-        self.p = AWSEMParameters(**parameters)
-        if self.p.min_sequence_separation_contact is None:
-            self.p.min_sequence_separation_contact = 1
-        if self.p.min_sequence_separation_rho is None:
-            self.p.min_sequence_separation_rho = 1
-        if self.p.min_sequence_separation_electrostatics is None:
-            self.p.min_sequence_separation_electrostatics = 1
-        for field, value in self.p:
+        # set sequence based on argument
+        self.sequence = sequence
+
+        # set indicator function exposure based on argument
+        #     i guess not exposing indicator functions saves memory?
+        self.expose_indicator_functions = expose_indicator_functions
+
+        # parse other arguments
+        p = AWSEMParameters(**parameters)
+        if p.min_sequence_separation_contact is None:
+            p.min_sequence_separation_contact = 1
+        if p.min_sequence_separation_rho is None:
+            p.min_sequence_separation_rho = 1
+        if p.min_sequence_separation_electrostatics is None:
+            p.min_sequence_separation_electrostatics = 1
+        for field, value in p:
             setattr(self, field, value)
+        self.p = p
+
+        # set gamma
         if isinstance(self.p.gamma, Gamma):
             gamma = self.p.gamma
         elif isinstance(self.p.gamma, Path):
@@ -96,38 +104,117 @@ def __init__(self,
         self.direct_gamma = gamma['Direct'][0]
         self.protein_gamma = gamma['Protein'][0]
         self.water_gamma = gamma['Water'][0]
-        self.burial_in_context=self.p.burial_in_context
 
+        # set other attributes
+        self.burial_in_context = self.p.burial_in_context
+        self.aa_freq = frustration.compute_aa_freq(self.sequence)
+        self.contact_freq = frustration.compute_contact_freq(self.sequence)
+        if self.p.k_electrostatics == 0:
+            self.sequence_cutoff=min(self.p.min_sequence_separation_electrostatics, self.p.min_sequence_separation_contact)
+            self.distance_cutoff=None # the distance matrix isn't guaranteed to exist in all subclasses,
+                                      # but it doesn't hurt to define the distance_cutoff attribute--
+                                      # it's just like any other parameter, such as sequence_cutoff,
+                                      # that only matters if we need to compute a mask from a distance matrix 
+        else:
+            self.sequence_cutoff=self.p.min_sequence_separation_contact
+            self.distance_cutoff=self.p.distance_cutoff_contact # the distance matrix isn't guaranteed to exist in all subclasses,
+                                                                # but it doesn't hurt to define the distance_cutoff attribute--
+                                                                # it's just like any other parameter, such as sequence_cutoff,
+                                                                # that only matters if we need to compute a mask from a distance matrix
         # ??????
         self._decoy_fluctuation = {} # don't know what this does
         self.minimally_frustrated_threshold=.78 # this should be a class variable or an argument to __init__
 
-        # sequence details
-        if sequence is None:
-            self.sequence=pdb_structure.sequence
-        else:
-            self.sequence=sequence
-        self.aa_freq = frustration.compute_aa_freq(self.sequence)
-        self.contact_freq = frustration.compute_contact_freq(self.sequence)
+    def setup_model(self):
+        self.calculate_indicators()
+        self.calculate_energy_and_potts()
 
-        # structure details
-        self.expose_indicator_functions = expose_indicator_functions
-        self.full_to_aligned_index_dict=pdb_structure.full_to_aligned_index_dict
-        self.pdb_structure = pdb_structure
+    def calculate_indicators(self):
+        raise NotImplementedError("Subclasses must this method")
 
-    
-    # allows us to update coordinates only by passing in a pdb file
-    @property
-    def pdb_structure(self):
-        return self._pdb_structure
-    @pdb_structure.setter
-    def pdb_structure(self,pdb_structure):
+    def calculate_energy_and_potts(self):
+
+        J_index = np.meshgrid(range(self.N), range(self.N), range(self.q), range(self.q), indexing='ij', sparse=False)
+        h_index = np.meshgrid(range(self.N), range(self.q), indexing='ij', sparse=False)
+        
+        # compute burial and contact energies
+        self.burial_energy = 0.5 * self.p.k_contact * self.burial_gamma[h_index[1]] * self.burial_indicator[:, np.newaxis, :]
+        direct = self.direct_indicator * self.direct_gamma[J_index[2], J_index[3]]
+        water_mediated = self.water_indicator * self.water_gamma[J_index[2], J_index[3]]
+        protein_mediated = self.protein_indicator  * self.protein_gamma[J_index[2], J_index[3]]
+        contact_energy = self.p.k_contact * np.array([direct, water_mediated, protein_mediated]) * self.sequence_mask_contact[np.newaxis, :, :, np.newaxis, np.newaxis]
+        
+        # Compute electrostatics and add to contact energy
+        if self.p.k_electrostatics!=0:
+            electrostatics_energy = self.electrostatics_gamma * self.electrostatics_indicator[:,:,np.newaxis,np.newaxis]
+            contact_energy = np.append(contact_energy, electrostatics_energy[np.newaxis,:,:,:,:], axis=0)
+
+        self.contact_energy = contact_energy
+
+        # Compute potts model
+        self.potts_model = {}
+        self.potts_model['h'] = self.burial_energy.sum(axis=-1)[:, self.aa_map_awsem_list]
+        self.potts_model['J'] = self.contact_energy.sum(axis=0)[:, :, self.aa_map_awsem_x, self.aa_map_awsem_y]
+        # Set the gap energy to zero
+        self.potts_model['h'][:, 0] = 0
+        self.potts_model['J'][:, :, 0, :] = 0
+        self.potts_model['J'][:, :, :, 0] = 0
+        self._native_energy=None # don't know what this does
+
+
+    def compute_configurational_decoy_statistics(self):
+        raise NotImplementedError("Subclasses must define this method")
+
+    def compute_configurational_energies(self):
+        raise NotImplementedError("Subclasses must define this method")
+
+    def configurational_frustration(self,aa_freq=None, correction=0, n_decoys=4000):
+        mean_decoy_energy, std_decoy_energy = self.compute_configurational_decoy_statistics(n_decoys=n_decoys,aa_freq=aa_freq)
+        return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction)
+
+
+
+
+
+class AWSEM(AWSEMBase):
+
+    def __init__(self, 
+                 pdb_structure: object,
+                 sequence: str =None,
+                 expose_indicator_functions: bool=False,
+                 **parameters)->object:
+        # assume the user wanted the sequence from the pdb structure if not given
+        if not sequence:
+            sequence = pdb_structure.sequence
+        # load structure-independent parameters and methods
+        super().__init__(sequence, expose_indicator_functions, **parameters) 
+        # set up strucure
+        self.setup_structure(pdb_structure)
+        # calculate masks
+        if self.burial_in_context==True:
+            selected_matrix=self.full_pdb_distance_matrix
+        else:
+            selected_matrix=self.distance_matrix
+        self.sequence_mask_rho = frustration.compute_mask(selected_matrix, 
+                                                     maximum_contact_distance=None, 
+                                                     minimum_sequence_separation = self.p.min_sequence_separation_rho)
+        self.sequence_mask_contact = frustration.compute_mask(self.distance_matrix, 
+                                                     maximum_contact_distance=self.p.distance_cutoff_contact, 
+                                                     minimum_sequence_separation = self.p.min_sequence_separation_contact)
+        self.electrostatics_mask = frustration.compute_mask(self.distance_matrix, 
+                                                     maximum_contact_distance=None, 
+                                                     minimum_sequence_separation=self.p.min_sequence_separation_electrostatics)
+        self.mask = frustration.compute_mask(self.distance_matrix, 
+                                             maximum_contact_distance=self.distance_cutoff, 
+                                             minimum_sequence_separation = self.sequence_cutoff)
+        self.selected_matrix = selected_matrix # we'll need this in the calculate_indicators function 
+        self.setup_model()
+
+    def setup_structure(self, pdb_structure):
         # check structure
         selection_CB = pdb_structure.structure.select('name CB or (resname GLY IGL and name CA)')
         resid = selection_CB.getResindices()
         N=len(resid)
-        if N != len(self.sequence):
-            raise ValueError("The pdb is incomplete. Try setting 'repair_pdb=True' when constructing the Structure object.")
         self.resid = resid
         self.N = N
         # set structure-dependent proterties
@@ -136,34 +223,38 @@ def pdb_structure(self,pdb_structure):
         self.chain=pdb_structure.chain
         self.pdb_file=pdb_structure.pdb_file
         self.init_index_shift=pdb_structure.init_index_shift
+        self.full_to_aligned_index_dict=pdb_structure.full_to_aligned_index_dict
         self.distance_matrix=pdb_structure.distance_matrix
         self.full_pdb_distance_matrix=pdb_structure.full_pdb_distance_matrix
-        # reset indicator functions, energies, and potts model
-        self.compute_indicators_energies_potts_model()
 
-    def compute_indicators_energies_potts_model(self):
-        if self.burial_in_context==True:
-            selected_matrix=self.full_pdb_distance_matrix
-        else:
-            selected_matrix=self.distance_matrix
-        sequence_mask_rho = frustration.compute_mask(selected_matrix, 
-                                                     maximum_contact_distance=None, 
-                                                     minimum_sequence_separation = self.p.min_sequence_separation_rho)
-        sequence_mask_contact = frustration.compute_mask(self.distance_matrix, 
-                                                     maximum_contact_distance=self.p.distance_cutoff_contact, 
-                                                     minimum_sequence_separation = self.p.min_sequence_separation_contact)
+    @property
+    def pdb_structure(self):
+        return self._pdb_structure
+    @pdb_structure.setter
+    def pdb_structure(self,pdb_structure):
+        # reset structural attributes
+        self.setup_structure(pdb_structure)
+        # check that our new structure is compatible with our old one
+        if self.N != len(self.sequence):
+            raise ValueError("The pdb is incomplete. Try setting 'repair_pdb=True' when constructing the Structure object.")
+        self.calculate_indicators()
+    def change_conformation(alternative_pdb_structure):
+        # this function is an alias for the pdb_structure setter
+        self.pdb_structure = alternative_pdb_structure
+
+    def calculate_indicators(self):
         # Calculate rho
         rho = 0.25 
-        rho *= (1 + np.tanh(self.p.eta * (selected_matrix- self.p.r_min)))
-        rho *= (1 + np.tanh(self.p.eta * (self.p.r_max - selected_matrix)))
-        rho *= sequence_mask_rho
+        rho *= (1 + np.tanh(self.p.eta * (self.selected_matrix - self.p.r_min)))
+        rho *= (1 + np.tanh(self.p.eta * (self.p.r_max - self.selected_matrix)))
+        rho *= self.sequence_mask_rho
         self.rho=rho
         #Calculate sigma water
         rho_r = (rho).sum(axis=1)
         if self.full_pdb_distance_matrix.shape!=self.distance_matrix.shape:
             if self.burial_in_context==True:
-                self.init_index_shift=pdb_structure.init_index_shift
-                self.fin_index_shift=pdb_structure.fin_index_shift
+                self.init_index_shift=self.pdb_structure.init_index_shift
+                self.fin_index_shift=self.pdb_structure.fin_index_shift
                 rho_r=rho_r[self.init_index_shift:self.fin_index_shift]
         self.rho_r=rho_r
         rho_b = np.expand_dims(rho_r, 1)
@@ -179,81 +270,54 @@ def compute_indicators_energies_potts_model(self):
         water_indicator = thetaII[:, :, np.newaxis, np.newaxis] * sigma_water[:, :, np.newaxis, np.newaxis]
         protein_indicator = thetaII[:, :, np.newaxis, np.newaxis] * sigma_protein[:, :, np.newaxis, np.newaxis]
         # store indicators and gammas for our particular sequence as attributes
-        if self.expose_indicator_functions:
-            self.indicators=[]
-            self.indicators.append(burial_indicator[:,0])
-            self.indicators.append(burial_indicator[:,1])
-            self.indicators.append(burial_indicator[:,2])
-            self.indicators.append(direct_indicator[:,:,0,0]*sequence_mask_contact)
-            self.indicators.append(protein_indicator[:,:,0,0]*sequence_mask_contact)
-            self.indicators.append(water_indicator[:,:,0,0]*sequence_mask_contact)
-            
-            self.gamma_array=[]
-            temp_burial_gamma=self.burial_gamma[self.aa_map_awsem_list]
-            temp_burial_gamma[0]=0
-            temp_burial_gamma *= -0.5 * self.p.k_contact
-            self.gamma_array.append(temp_burial_gamma[:,0])
-            self.gamma_array.append(temp_burial_gamma[:,1])
-            self.gamma_array.append(temp_burial_gamma[:,2])
-
-            for contact_gamma in [self.direct_gamma, self.protein_gamma, self.water_gamma]:
-                temp_gamma = contact_gamma[self.aa_map_awsem_x, self.aa_map_awsem_y].copy()
-                temp_gamma[0, :] = 0
-                temp_gamma[:, 0] = 0
-                temp_gamma *= -0.5 * self.k_contact
-                self.gamma_array.append(temp_gamma)
-
-            self.burial_indicator = burial_indicator
-            self.direct_indicator = direct_indicator
-            self.water_indicator = water_indicator
-            self.protein_indicator = protein_indicator
-            
-        
-        J_index = np.meshgrid(range(self.N), range(self.N), range(self.q), range(self.q), indexing='ij', sparse=False)
-        h_index = np.meshgrid(range(self.N), range(self.q), indexing='ij', sparse=False)
-        
-        # compute burial and contact energies
-        self.burial_energy = 0.5 * self.p.k_contact * self.burial_gamma[h_index[1]] * burial_indicator[:, np.newaxis, :]
-        direct = direct_indicator * self.direct_gamma[J_index[2], J_index[3]]
-        water_mediated = water_indicator * self.water_gamma[J_index[2], J_index[3]]
-        protein_mediated = protein_indicator  * self.protein_gamma[J_index[2], J_index[3]]
-        contact_energy = self.p.k_contact * np.array([direct, water_mediated, protein_mediated]) * sequence_mask_contact[np.newaxis, :, :, np.newaxis, np.newaxis]
-        # Compute electrostatics and add to contact energy
-        if self.p.k_electrostatics!=0:
-            self.sequence_cutoff=min(self.p.min_sequence_separation_electrostatics, self.p.min_sequence_separation_contact)
-            self.distance_cutoff=None
-            electrostatics_mask = frustration.compute_mask(self.distance_matrix, maximum_contact_distance=None, minimum_sequence_separation=self.p.min_sequence_separation_electrostatics)
+        self.indicators=[]
+        self.indicators.append(burial_indicator[:,0])
+        self.indicators.append(burial_indicator[:,1])
+        self.indicators.append(burial_indicator[:,2])
+        self.indicators.append(direct_indicator[:,:,0,0]*self.sequence_mask_contact)
+        self.indicators.append(protein_indicator[:,:,0,0]*self.sequence_mask_contact)
+        self.indicators.append(water_indicator[:,:,0,0]*self.sequence_mask_contact)
+        self.gamma_array=[]
+        temp_burial_gamma=self.burial_gamma[self.aa_map_awsem_list]
+        temp_burial_gamma[0]=0
+        temp_burial_gamma *= -0.5 * self.p.k_contact
+        self.gamma_array.append(temp_burial_gamma[:,0])
+        self.gamma_array.append(temp_burial_gamma[:,1])
+        self.gamma_array.append(temp_burial_gamma[:,2])
+        for contact_gamma in [self.direct_gamma, self.protein_gamma, self.water_gamma]:
+            temp_gamma = contact_gamma[self.aa_map_awsem_x, self.aa_map_awsem_y].copy()
+            temp_gamma[0, :] = 0
+            temp_gamma[:, 0] = 0
+            temp_gamma *= -0.5 * self.k_contact
+            self.gamma_array.append(temp_gamma)
+        self.burial_indicator = burial_indicator # probably could get rid of either this or indicators list
+        self.direct_indicator = direct_indicator # probably could get rid of either this or indicators list
+        self.water_indicator = water_indicator   # probably could get rid of either this or indicators list
+        self.protein_indicator = protein_indicator # probably could get rid of either this or indicators list
+        if self.p.k_electrostatics != 0:
             # ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
             charges = np.array([0, 1, 0, -1, 0, 0, -1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])
             charges2 = charges[:,np.newaxis]*charges[np.newaxis,:]
-            electrostatics_indicator = 1 / (self.distance_matrix + 1E-6) * np.exp(-self.distance_matrix / self.p.electrostatics_screening_length) * electrostatics_mask
-            electrostatics_energy = -self.p.k_electrostatics * (charges2[np.newaxis,np.newaxis,:,:]*electrostatics_indicator[:,:,np.newaxis,np.newaxis])
-            contact_energy = np.append(contact_energy, electrostatics_energy[np.newaxis,:,:,:,:], axis=0)
-            if self.expose_indicator_functions:
-                self.indicators.append(electrostatics_indicator)
-                temp_gamma=0.5 * self.p.k_electrostatics * charges2[self.aa_map_awsem_x, self.aa_map_awsem_y]
-                temp_gamma[0,:]=0
-                temp_gamma[:,0]=0
-                self.gamma_array.append(temp_gamma)
-        else:
-            self.sequence_cutoff=self.p.min_sequence_separation_contact
-            self.distance_cutoff=self.p.distance_cutoff_contact
-        self.mask = frustration.compute_mask(self.distance_matrix, maximum_contact_distance=self.distance_cutoff, minimum_sequence_separation = self.sequence_cutoff)
-        self.contact_energy = contact_energy
-
-        # Compute potts model
-        self.potts_model = {}
-        self.potts_model['h'] = self.burial_energy.sum(axis=-1)[:, self.aa_map_awsem_list]
-        self.potts_model['J'] = self.contact_energy.sum(axis=0)[:, :, self.aa_map_awsem_x, self.aa_map_awsem_y]
-        # Set the gap energy to zero
-        self.potts_model['h'][:, 0] = 0
-        self.potts_model['J'][:, :, 0, :] = 0
-        self.potts_model['J'][:, :, :, 0] = 0
-        self._native_energy=None # don't know what this does
-
-    def change_conformation(alternative_pdb_structure):
-        # this function is an alias for the pdb_structure setter
-        self.pdb_structure = alternative_pdb_structure
+            electrostatics_indicator = 1 / (self.distance_matrix + 1E-6) * np.exp(-self.distance_matrix / self.p.electrostatics_screening_length) * self.electrostatics_mask
+            self.indicators.append(electrostatics_indicator)
+            self.electrostatics_indicator = electrostatics_indicator # probably could get rid of either this or indicators list
+            self.electrostatics_gamma = -self.p.k_electrostatics * charges2[self.aa_map_awsem_x, self.aa_map_awsem_y][1:,1:]
+            temp_gamma = 0.5 * self.p.k_electrostatics * charges2[self.aa_map_awsem_x, self.aa_map_awsem_y]
+            temp_gamma[0,:]=0
+            temp_gamma[:,0]=0
+            self.gamma_array.append(temp_gamma)
+
+    def calculate_energy_and_potts(self):
+        super().calculate_energy_and_potts()
+        if not self.expose_indicator_functions:
+            del self.burial_indicator
+            del self.direct_indicator
+            del self.water_indicator
+            del self.protein_indicator
+            if "electrostatics_indicator" in dir(self):
+                # won't exist if electrostatics are turned off
+                del self.electrostatics_indicator
+            del self.indicators
 
     def compute_configurational_decoy_statistics(self, n_decoys=4000,aa_freq=None):
         # ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
@@ -370,9 +434,37 @@ def compute_configurational_energies(self):
         # import pandas as pd
         return configurational_energies #, pd.DataFrame(decoy_data, columns=decoy_data_columns)
     
-    def configurational_frustration(self,aa_freq=None, correction=0, n_decoys=4000):
-        mean_decoy_energy, std_decoy_energy = self.compute_configurational_decoy_statistics(n_decoys=n_decoys,aa_freq=aa_freq)
-        return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction)
+
+class AWSEMIndicators(AWSEMBase):
+
+    def __init__(self, 
+                 indicators: list,
+                 sequence: str,          # sequence is optional if we initialize from a Structure but not here
+                 expose_indicator_functions: bool=False,
+                 **parameters)->object:
+        """
+        A stripped-down version of the AWSEM class that can be initialized from a set of indicator functions
+
+        Parameters
+        ----------
+        indicators : list
+            List of numpy.ndarray holding the indicator functions, with different decoys stacked along axis 0
+        sequence :  str
+            The amino acid sequence of the protein. The sequence is assumed to be in one-letter code. 
+        expose_indicator_functions: bool
+            If set to True, indicator functions of the contact and burial energy terms can be accessed by user.
+        
+        Returns
+        -------
+        AWSEMIndicators object
+
+        """
+        super().__init__(sequence, expose_indicator_functions, **parameters)
+        self.indicators = indicators
+        self.setup_model()
+
+    def calculate_indicators(self):
+        pass # the function was initialized with indicators, so there's nothing to do
 
 
 class DecoyEnsemble(): # don't think it's necessary for this one to inherit from Frustratometer
@@ -408,11 +500,14 @@ def __init__(self,
         contact_protein_indicators = []
         contact_water_indicators = []
         electrostatics_indicators = []
+        # the AWSEM class takes care of the indicator calculation (including masking) for us
+        #     AWSEM normally accepts an amino acid sequence argument, but we don't need that here
+        #     However, we do need to pass through parameters used to generate the indicator functions
+        awsem_obj = AWSEM(pdb_structures[0], expose_indicator_functions=True, **parameters)
         for pdb_structure in pdb_structures: 
-            # the AWSEM class takes care of the indicator calculation (including masking) for us
-            #     AWSEM normally accepts an amino acid sequence argument, but we don't need that here
-            #     However, we do need to pass through parameters used to generate the indicator functions
-            all_indicators = AWSEM(pdb_structure, expose_indicator_functions=True, **parameters).indicators
+            awsem_obj.pdb_structure = pdb_structures[pdb_structure] # we can use the pdb_structure setter to update structural 
+                                                                    # stuff without fully re-initializing the object
+            all_indicators = awsem_obj.indicators
             burial_low_density_indicators.append(all_indicators[0])
             burial_medium_density_indicators.append(all_indicators[1]) 
             burial_high_density_indicators.append(all_indicators[2])
@@ -428,3 +523,11 @@ def __init__(self,
                            np.array(contact_protein_indicators),
                            np.array(contact_water_indicators),
                            np.array(electrostatics_indicators),]
+        # Although a DecoyEnsemble does not have an energy or frustration in the same sense as an AWSEM,
+        #     it is helpful to attach gamma parameters to the DecoyEnsemble so that DecoyEnergyAverage, etc.
+        #     can read them through class inheritance in the same way that AwsemEnergyAverage, etc. read the gammas
+        #     from the AWSEM class.
+        # We take the gammas directly from the AWSEM that we used to get our indicators.
+        #     Technically, we generated one AWSEM for each pdb_structure, but the parameters are identical,
+        #     so we can for convenience read the gammas from the most recently initialized awsem_obj
+        self.gamma_array = awsem_obj.gamma_array
diff --git a/frustratometer/optimization/optimization.py b/frustratometer/optimization/optimization.py
index 2ecf44c..6ce30a8 100644
--- a/frustratometer/optimization/optimization.py
+++ b/frustratometer/optimization/optimization.py
@@ -7,7 +7,7 @@
 
 from frustratometer.classes import Frustratometer
 from frustratometer.classes import Structure
-from frustratometer.classes import AWSEM, AWSEMEnsemble
+from frustratometer.classes import AWSEM, DecoyEnsemble
 from frustratometer.optimization.EnergyTerm import EnergyTerm
 from frustratometer.optimization.inner_product import compute_all_region_means
 from frustratometer.optimization.inner_product import build_mean_inner_product_matrix
@@ -512,6 +512,171 @@ def regression_test(self, seq_index):
         energy=self.compute_energy(seq_index)
         assert np.isclose(energy,expected_energy), f"Expected energy {expected_energy} but got {energy}"
 
+class EnsembleEnergyStatistics(EnergyTerm):
+    """
+    An abstract class (but not using the ABC framework) for ensemble statistics that are distributive in the gammas,
+    meaning the quantity that we want to calculate, (<gammas * indicators>_decoys), is equal to (gammas * <indicators>_decoys).
+    This allows us to average the indicator functions over all decoys just once, and then conduct the MC search as we would for 
+    a single sequence. 
+    
+    When this class was initially written, the intended use cases are for EnsembleEnergyAverage and EnsembleEnergyStd. 
+    These classes just need to call the EnsembleEnergyStatistics __init__ and define the collapse_indicators function.
+    """
+    def __init__(self, model:DecoyEnsemble, use_numba=True, alphabet=_AA):
+        #
+        # boilerplate from Awsem*Average classes
+        if not type(model)==DecoyEnsemble:
+            raise TypeError("EnsembleEnergyAverage may only be initialized from a DecoyEnsemble")
+        self.model=model
+        self._use_numba=use_numba
+        self.alphabet=alphabet
+        self.reindex_dca=[_AA.index(aa) for aa in alphabet]
+        self.alphabet_size=len(alphabet)
+        #TODO: Fix the gamma matrix to account for elecrostatics-- i think this is already done!
+        self.gamma = np.concatenate([(a[self.reindex_dca].ravel() if len(a.shape)==1 else a[self.reindex_dca][:,self.reindex_dca].ravel()) for a in model.gamma_array])
+        #
+        # collapse over decoys (axis 0 of our indicator arrays)
+        self.indicators = self.collapse_indicators(model.indicators)
+
+        self.initialize_functions()
+    
+    def initialize_functions(self):
+        len_alphabet=self.alphabet_size
+        gamma=self.gamma
+        
+        # Precompute the mean of the indicators
+        means = [np.average(indicator_type, axis=-1) for indicator_type in self.indicators]
+        
+        def compute_energy(seq_index):
+            counts = np.zeros(len_alphabet, dtype=np.int64)
+            for val in seq_index:
+                counts[val] += 1
+            
+            # Calculate phi_mean
+            phi_mean = np.zeros(len_alphabet*len_indicators1D + len_alphabet**2*len_indicators2D)
+            
+            # 1D indicators
+            c=0
+            for i in range(len_indicators1D):
+                for j in range(len_alphabet):
+                    phi_mean[c] = indicator_means[i] * counts[j]
+                    c += 1
+
+            # 2D indicators
+            for i in range(len_indicators2D):
+                for j in range(len_alphabet):
+                    for k in range(len_alphabet):
+                        t=1 if j==k else 0
+                        phi_mean[c] = indicator_means[i+ len_indicators1D] * counts[j] * (counts[k] - t)
+                        c += 1
+
+            # Calculate energy
+            energy = 0
+            for i in range(phi_len):
+                energy += gamma[i] * phi_mean[i]
+            
+            return energy
+        
+        def denergy_mutation(seq_index, pos, aa):
+            counts = np.zeros(len_alphabet, dtype=np.int64)
+            for val in seq_index:
+                counts[val] += 1
+            aa_old=seq_index[pos]
+            if aa_old==aa:
+                return 0.
+            
+            # Calculate phi_mean
+
+            dphi_mean = np.zeros(len_alphabet*len_indicators1D + len_alphabet**2*len_indicators2D)
+            
+            # 1D indicators
+            for i in range(len_indicators1D):
+                dphi_mean[i*len_alphabet + aa_old] -= indicator_means[i]
+                dphi_mean[i*len_alphabet + aa] += indicator_means[i]
+
+            offset = len_alphabet*len_indicators1D
+            for i in range(len_indicators2D):
+                for j in range(len_alphabet):
+                    k=aa_old
+                    if j==k:
+                        dphi_mean[offset + i*len_alphabet**2 + j*len_alphabet + k] -= 2 * indicator_means[i + len_indicators1D] *  (counts[j]-1)
+                    elif j==aa:
+                        dphi_mean[offset + i*len_alphabet**2 + j*len_alphabet + k] += indicator_means[i+ len_indicators1D] * (counts[k] - counts[j] -1)
+                    else:
+                        dphi_mean[offset + i*len_alphabet**2 + j*len_alphabet + k] -= indicator_means[i + len_indicators1D] * counts[j]
+                        dphi_mean[offset + i*len_alphabet**2 + k*len_alphabet + j] -= indicator_means[i + len_indicators1D] * counts[j]                    
+                    k=aa
+                    if j==k:
+                        dphi_mean[offset + i*len_alphabet**2 + j*len_alphabet + k] += 2 * indicator_means[i + len_indicators1D] *  counts[j]
+                    elif j==aa_old:
+                        dphi_mean[offset + i*len_alphabet**2 + j*len_alphabet + k] += indicator_means[i+ len_indicators1D] * (counts[j] - counts[k] -1)
+                    else:
+                        dphi_mean[offset + i*len_alphabet**2 + j*len_alphabet + k] += indicator_means[i + len_indicators1D] * counts[j]
+                        dphi_mean[offset + i*len_alphabet**2 + k*len_alphabet + j] += indicator_means[i + len_indicators1D] * counts[j]
+            
+            # Calculate energy
+            denergy = 0
+            for i in range(phi_len):
+                denergy += gamma[i] * dphi_mean[i]
+            
+            return denergy
+        
+        self.compute_energy = compute_energy
+        self.compute_denergy_mutation = denergy_mutation
+
+        awsem_energy = AwsemEnergy(use_numba=self.use_numba, model=self.model, alphabet=self.alphabet).energy_function
+
+        def compute_energy_sample(seq_index,n_decoys=100000):
+            """ Function to compute the variance of the energy of permutations of a sequence using random shuffling.
+                This function is much faster than compute_energy_permutation but is an approximation"""
+            energies=np.zeros(n_decoys)
+            shuffled_index=seq_index.copy()
+            for i in numba.prange(n_decoys):
+                energies[i]=awsem_energy(shuffled_index[np.random.permutation(len(shuffled_index))])
+            return np.mean(energies)
+
+        def compute_energy_permutation(seq_index):
+            """ Function to compute the variance of the energy of all permutations of a sequence 
+                Caution: This function is very slow for normal sequences """
+            from itertools import permutations
+            decoy_sequences = np.array(list(permutations(seq_index)))
+            energies=np.zeros(len(decoy_sequences))
+            for i in numba.prange(len(decoy_sequences)):
+                energies[i]=awsem_energy(decoy_sequences[i])
+            return np.mean(energies)
+
+        def collapse_indicators(uncollapsed_indicators):
+            raise NotImplementedError("EnsembleEnergyStatistics is an abstract class. Use a subclass implementing this function.")
+        
+        self.compute_energy_sample=self.numbify(compute_energy_sample,parallel=True)
+        self.compute_energy_permutation=compute_energy_permutation
+
+    def regression_test(self, seq_index):
+        expected_energy=self.compute_energy_permutation(seq_index)
+        energy=self.compute_energy(seq_index)
+        assert np.isclose(energy,expected_energy), f"Expected energy {expected_energy} but got {energy}"
+
+class EnsembleEnergyAverage(EnsembleEnergyStatistics):
+    """
+    See documentation for EnsembleEnergyStatistics
+    """
+    def __init__(self, model:DecoyEnsemble, use_numba=True, alphabet=_AA):
+        super().__init__(model, use_numba, alphabet)
+
+    def collapse_indicators(uncollapsed_indicators):
+        self.indicators = [np.average(indicator, axis=0) for indicator in uncollapsed_indicators]
+
+class EnsembleEnergyStd(EnsembleEnergyStatistics):
+    """
+    See documentation for EnsembleEnergyStatistics
+    """
+    def __init__(self, model:DecoyEnsemble, use_numba=True, alphabet=_AA):
+        super().__init__(model, use_numba, alphabet)
+
+    def collapse_indicators(uncollapsed_indicators):
+        self.indicators = [np.std(indicator, axis=0) for indicator in uncollapsed_indicators]
+
+
 class AwsemEnergyVariance(EnergyTerm):   
     def __init__(self, model:Frustratometer, use_numba=True, alphabet=_AA):
         self._use_numba=use_numba
@@ -1111,10 +1276,8 @@ def find_optimal_replicas(self, max_replicas=32, n_repeats=5, n_steps=10000):
 
     pdb_list = ["tests/data/1r69.pdb"]
     pdb_structures = (Structure(pdb, chain=None) for pdb in pdb_list)
-    ensemble = AWSEMEnsemble(pdb_structures, distance_cutoff_contact=10, min_sequence_separation_contact=2, expose_indicator_functions=True)
-    ###########################################################
-    ensemble.mask = np.ones((63,63))
-    #########################################################
+    ensemble = DecoyEnsemble(pdb_structures, distance_cutoff_contact=10, min_sequence_separation_contact=10)
+
 
     reduced_alphabet = 'ADEFGHIKLMNQRSTVWY'
 

From d0b23f35b20c58ded1bd80eb9f47c5487788dedd Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Thu, 10 Jul 2025 13:19:41 -0500
Subject: [PATCH 09/20] tests passing now (except for the dca tests requiring
 hmmer, which have been failing for a while)

---
 frustratometer/classes/AWSEM.py             | 45 ++++++++++++++++++---
 frustratometer/optimization/optimization.py | 11 ++---
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 38288db..1462754 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -7,7 +7,7 @@
 from pydantic.types import Path
 from typing import List,Optional,Union,Generator
 
-__all__ = ['AWSEM','DecoyEnsemble']
+__all__ = ['AWSEM','AWSEMIndicators','DecoyEnsemble']
 
 class AWSEMParameters(BaseModel):
     model_config = ConfigDict(extra='ignore', arbitrary_types_allowed=True)
@@ -74,6 +74,7 @@ def __init__(self,
         """
         
         # set sequence based on argument
+        self.N = len(sequence)
         self.sequence = sequence
 
         # set indicator function exposure based on argument
@@ -109,7 +110,7 @@ def __init__(self,
         self.burial_in_context = self.p.burial_in_context
         self.aa_freq = frustration.compute_aa_freq(self.sequence)
         self.contact_freq = frustration.compute_contact_freq(self.sequence)
-        if self.p.k_electrostatics == 0:
+        if self.p.k_electrostatics != 0:
             self.sequence_cutoff=min(self.p.min_sequence_separation_electrostatics, self.p.min_sequence_separation_contact)
             self.distance_cutoff=None # the distance matrix isn't guaranteed to exist in all subclasses,
                                       # but it doesn't hurt to define the distance_cutoff attribute--
@@ -204,9 +205,14 @@ def __init__(self,
         self.electrostatics_mask = frustration.compute_mask(self.distance_matrix, 
                                                      maximum_contact_distance=None, 
                                                      minimum_sequence_separation=self.p.min_sequence_separation_electrostatics)
+        with open('my_data.txt','w') as f:
+            f.write(f"self.distance_cutoff: {self.distance_cutoff}\n")
+            f.write(f"self.sequence_cutoff: {self.sequence_cutoff}\n")
+        np.save('my_distance_matrix.npy',self.distance_matrix)
         self.mask = frustration.compute_mask(self.distance_matrix, 
                                              maximum_contact_distance=self.distance_cutoff, 
                                              minimum_sequence_separation = self.sequence_cutoff)
+        np.save('my_mask_new.npy',self.mask)
         self.selected_matrix = selected_matrix # we'll need this in the calculate_indicators function 
         self.setup_model()
 
@@ -301,7 +307,7 @@ def calculate_indicators(self):
             electrostatics_indicator = 1 / (self.distance_matrix + 1E-6) * np.exp(-self.distance_matrix / self.p.electrostatics_screening_length) * self.electrostatics_mask
             self.indicators.append(electrostatics_indicator)
             self.electrostatics_indicator = electrostatics_indicator # probably could get rid of either this or indicators list
-            self.electrostatics_gamma = -self.p.k_electrostatics * charges2[self.aa_map_awsem_x, self.aa_map_awsem_y][1:,1:]
+            self.electrostatics_gamma = -self.p.k_electrostatics * charges2[np.newaxis, np.newaxis, :, :]
             temp_gamma = 0.5 * self.p.k_electrostatics * charges2[self.aa_map_awsem_x, self.aa_map_awsem_y]
             temp_gamma[0,:]=0
             temp_gamma[:,0]=0
@@ -439,6 +445,10 @@ class AWSEMIndicators(AWSEMBase):
 
     def __init__(self, 
                  indicators: list,
+                 burial_indicator,
+                 direct_indicator,
+                 protein_indicator,
+                 water_indicator,
                  sequence: str,          # sequence is optional if we initialize from a Structure but not here
                  expose_indicator_functions: bool=False,
                  **parameters)->object:
@@ -460,7 +470,12 @@ def __init__(self,
 
         """
         super().__init__(sequence, expose_indicator_functions, **parameters)
+        self.N
         self.indicators = indicators
+        self.burial_indicator = burial_indicator
+        self.direct_indicator = direct_indicator
+        self.protein_indicator = protein_indicator
+        self.water_indicator = water_indicator
         self.setup_model()
 
     def calculate_indicators(self):
@@ -500,13 +515,18 @@ def __init__(self,
         contact_protein_indicators = []
         contact_water_indicators = []
         electrostatics_indicators = []
+        burial_indicators_other = []
+        direct_indicators_other = []
+        protein_indicators_other = []
+        water_indicators_other = []
+        electrostatics_indicators_other = []
         # the AWSEM class takes care of the indicator calculation (including masking) for us
         #     AWSEM normally accepts an amino acid sequence argument, but we don't need that here
         #     However, we do need to pass through parameters used to generate the indicator functions
-        awsem_obj = AWSEM(pdb_structures[0], expose_indicator_functions=True, **parameters)
+        awsem_obj = AWSEM(next(pdb_structures), expose_indicator_functions=True, **parameters)
         for pdb_structure in pdb_structures: 
-            awsem_obj.pdb_structure = pdb_structures[pdb_structure] # we can use the pdb_structure setter to update structural 
-                                                                    # stuff without fully re-initializing the object
+            awsem_obj.pdb_structure = pdb_structure # we can use the pdb_structure setter to update structural 
+                                                    # stuff without fully re-initializing the object
             all_indicators = awsem_obj.indicators
             burial_low_density_indicators.append(all_indicators[0])
             burial_medium_density_indicators.append(all_indicators[1]) 
@@ -515,6 +535,11 @@ def __init__(self,
             contact_protein_indicators.append(all_indicators[4])
             contact_water_indicators.append(all_indicators[5])
             electrostatics_indicators.append(all_indicators[6])
+            burial_indicators_other.append(awsem_obj.electrostatics_indicator)
+            direct_indicators_other.append(awsem_obj.electrostatics_indicator)
+            protein_indicators_other.append(awsem_obj.electrostatics_indicator)
+            water_indicators_other.append(awsem_obj.electrostatics_indicator)
+            electrostatics_indicators_other.append(awsem_obj.electrostatics_indicator)
         # the idea here is that we have different conformers of a chain of a particular length;
         #     if not, we'll get a ValueError when trying to initialize numpy arrays from a ragged set of lists
         self.indicators = [np.array(burial_low_density_indicators),
@@ -523,6 +548,11 @@ def __init__(self,
                            np.array(contact_protein_indicators),
                            np.array(contact_water_indicators),
                            np.array(electrostatics_indicators),]
+        self.burial_indicator = np.average(np.array(burial_indicators_other),axis=0)
+        self.direct_indicator = np.average(np.array(direct_indicators_other),axis=0)
+        self.protein_indicator = np.average(np.array(protein_indicators_other),axis=0)
+        self.water_indicator = np.average(np.array(water_indicators_other),axis=0)
+        self.electrostatics_indicator = np.average(np.array(electrostatics_indicators_other),axis=0)
         # Although a DecoyEnsemble does not have an energy or frustration in the same sense as an AWSEM,
         #     it is helpful to attach gamma parameters to the DecoyEnsemble so that DecoyEnergyAverage, etc.
         #     can read them through class inheritance in the same way that AwsemEnergyAverage, etc. read the gammas
@@ -531,3 +561,6 @@ def __init__(self,
         #     Technically, we generated one AWSEM for each pdb_structure, but the parameters are identical,
         #     so we can for convenience read the gammas from the most recently initialized awsem_obj
         self.gamma_array = awsem_obj.gamma_array
+
+    def average(self):
+        return [np.average(indicator, axis=0) for indicator in self.indicators], self.burial_indicator, self.direct_indicator, self.protein_indicator, self.water_indicator
\ No newline at end of file
diff --git a/frustratometer/optimization/optimization.py b/frustratometer/optimization/optimization.py
index 6ce30a8..bc2abe2 100644
--- a/frustratometer/optimization/optimization.py
+++ b/frustratometer/optimization/optimization.py
@@ -7,7 +7,7 @@
 
 from frustratometer.classes import Frustratometer
 from frustratometer.classes import Structure
-from frustratometer.classes import AWSEM, DecoyEnsemble
+from frustratometer.classes import AWSEM, AWSEMIndicators, DecoyEnsemble
 from frustratometer.optimization.EnergyTerm import EnergyTerm
 from frustratometer.optimization.inner_product import compute_all_region_means
 from frustratometer.optimization.inner_product import build_mean_inner_product_matrix
@@ -1274,14 +1274,15 @@ def find_optimal_replicas(self, max_replicas=32, n_repeats=5, n_steps=10000):
 
 if __name__ == '__main__':
 
-    pdb_list = ["tests/data/1r69.pdb"]
+    pdb_list = ["tests/data/1r69.pdb","tests/data/1r69.pdb","tests/data/1r69.pdb"]
     pdb_structures = (Structure(pdb, chain=None) for pdb in pdb_list)
-    ensemble = DecoyEnsemble(pdb_structures, distance_cutoff_contact=10, min_sequence_separation_contact=10)
-
+    ensemble = DecoyEnsemble(pdb_structures,  distance_cutoff_contact=10, min_sequence_separation_contact=10)
+    indicators, burial_indicators, direct_indicators, protein_indicators, water_indicators = ensemble.average()
+    average = AWSEMIndicators(indicators, burial_indicators, direct_indicators, protein_indicators, water_indicators,"SISSRVKSKRIQLGLNQAELAQKVGTTQQSIEQLENGKTKRPRFLPELASALGVSVDWLLNGT")
 
     reduced_alphabet = 'ADEFGHIKLMNQRSTVWY'
 
-    awsem_energy = AwsemEnergy(ensemble, alphabet=reduced_alphabet)
+    awsem_energy = AwsemEnergy(average, alphabet=reduced_alphabet)
     heterogeneity = Heterogeneity(exact=False, use_numba=True)
 
     energy_mix = awsem_energy - 10*heterogeneity

From 9fe70c2c1f7f5e4ce74683fce7e2f9da729651bb Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Thu, 10 Jul 2025 17:10:59 -0500
Subject: [PATCH 10/20] tests still passing (except for that hmmer issue) and
 AWSEMIndicators/DecoyEnsemble now working as expected

---
 frustratometer/classes/AWSEM.py             | 100 ++++++++------------
 frustratometer/classes/__init__.py          |   2 +-
 frustratometer/optimization/optimization.py |   7 +-
 3 files changed, 45 insertions(+), 64 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 1462754..00358d0 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -37,7 +37,6 @@ class AWSEMParameters(BaseModel):
     r_maxII: float = Field(9.5, description="Maximum distance for mediated contact potential. (Angstrom)")
     eta_sigma: float = Field(7.0, description="Sharpness of the density-based switching function between protein-mediated and water-mediated contacts.")
     
-
     #Membrane
     membrane_gamma: Union[Path,Gamma] = Field(_path/'data'/'AWSEM_membrane_2015.json', description="File or Gamma object containing the membrane Gamma values (for membrane proteins)")
     eta_switching: int = Field(10, description="Switching distance for the membrane switching function")
@@ -46,6 +45,8 @@ class AWSEMParameters(BaseModel):
     min_sequence_separation_electrostatics: Optional[int] = Field(1, description="Minimum sequence separation for electrostatics calculation.")
     k_electrostatics: float = Field(17.3636, description="Coefficient for electrostatic interactions. (kJ/mol)")
     electrostatics_screening_length: float = Field(10, description="Screening length for electrostatic interactions. (Angstrom)")
+    charges: np.array = Field(np.array([0, 1, 0, -1, 0, 0, -1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]), description="Charge on each residue type")
+    # ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
 
 class AWSEMBase(Frustratometer):
 
@@ -110,18 +111,21 @@ def __init__(self,
         self.burial_in_context = self.p.burial_in_context
         self.aa_freq = frustration.compute_aa_freq(self.sequence)
         self.contact_freq = frustration.compute_contact_freq(self.sequence)
+        charges2 = self.p.charges[:,np.newaxis] * self.p.charges[np.newaxis,:]
         if self.p.k_electrostatics != 0:
             self.sequence_cutoff=min(self.p.min_sequence_separation_electrostatics, self.p.min_sequence_separation_contact)
             self.distance_cutoff=None # the distance matrix isn't guaranteed to exist in all subclasses,
                                       # but it doesn't hurt to define the distance_cutoff attribute--
                                       # it's just like any other parameter, such as sequence_cutoff,
                                       # that only matters if we need to compute a mask from a distance matrix 
+            self.electrostatics_gamma = -self.p.k_electrostatics * charges2[np.newaxis, np.newaxis, :, :]
         else:
             self.sequence_cutoff=self.p.min_sequence_separation_contact
             self.distance_cutoff=self.p.distance_cutoff_contact # the distance matrix isn't guaranteed to exist in all subclasses,
                                                                 # but it doesn't hurt to define the distance_cutoff attribute--
                                                                 # it's just like any other parameter, such as sequence_cutoff,
                                                                 # that only matters if we need to compute a mask from a distance matrix
+        self.charges2 = charges2 
         # ??????
         self._decoy_fluctuation = {} # don't know what this does
         self.minimally_frustrated_threshold=.78 # this should be a class variable or an argument to __init__
@@ -301,14 +305,10 @@ def calculate_indicators(self):
         self.water_indicator = water_indicator   # probably could get rid of either this or indicators list
         self.protein_indicator = protein_indicator # probably could get rid of either this or indicators list
         if self.p.k_electrostatics != 0:
-            # ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
-            charges = np.array([0, 1, 0, -1, 0, 0, -1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])
-            charges2 = charges[:,np.newaxis]*charges[np.newaxis,:]
             electrostatics_indicator = 1 / (self.distance_matrix + 1E-6) * np.exp(-self.distance_matrix / self.p.electrostatics_screening_length) * self.electrostatics_mask
             self.indicators.append(electrostatics_indicator)
             self.electrostatics_indicator = electrostatics_indicator # probably could get rid of either this or indicators list
-            self.electrostatics_gamma = -self.p.k_electrostatics * charges2[np.newaxis, np.newaxis, :, :]
-            temp_gamma = 0.5 * self.p.k_electrostatics * charges2[self.aa_map_awsem_x, self.aa_map_awsem_y]
+            temp_gamma = 0.5 * self.p.k_electrostatics * self.charges2[self.aa_map_awsem_x, self.aa_map_awsem_y]
             temp_gamma[0,:]=0
             temp_gamma[:,0]=0
             self.gamma_array.append(temp_gamma)
@@ -444,11 +444,11 @@ def compute_configurational_energies(self):
 class AWSEMIndicators(AWSEMBase):
 
     def __init__(self, 
-                 indicators: list,
                  burial_indicator,
                  direct_indicator,
                  protein_indicator,
                  water_indicator,
+                 electrostatics_indicator,
                  sequence: str,          # sequence is optional if we initialize from a Structure but not here
                  expose_indicator_functions: bool=False,
                  **parameters)->object:
@@ -470,23 +470,23 @@ def __init__(self,
 
         """
         super().__init__(sequence, expose_indicator_functions, **parameters)
-        self.N
-        self.indicators = indicators
         self.burial_indicator = burial_indicator
         self.direct_indicator = direct_indicator
         self.protein_indicator = protein_indicator
         self.water_indicator = water_indicator
+        self.electrostatics_indicator = electrostatics_indicator
+        self.sequence_mask_contact = np.full((self.N,self.N), True) 
+        self.mask = np.full((self.N,self.N), True) 
+        # mask should have been applied when calculating the indicator functions,
+        #     so we set it such that no further masking is performed
         self.setup_model()
 
     def calculate_indicators(self):
         pass # the function was initialized with indicators, so there's nothing to do
 
 
-class DecoyEnsemble(): # don't think it's necessary for this one to inherit from Frustratometer
-                       # also, note that the functions compute_configurational_decoy_statistics,
-                       # compute_configurational_energies, and configuration_frustration are
-                       # present in the AWSEM class but removed here, since we don't expect to 
-                       # compute frustration on an entire ensemble  
+class DecoyEnsemble():
+
     def __init__(self, 
                  pdb_structures: Generator[object,None,None],
                  **parameters)->object:
@@ -508,18 +508,11 @@ def __init__(self,
             The numpy arrays' first axes vary the structure, while the second axis and third axis, if it exists, 
             hold the (appropriately masked) indicator functions for each residue or pair of residues
         """
-        burial_low_density_indicators = []
-        burial_medium_density_indicators = []   
-        burial_high_density_indicators = []
-        contact_direct_indicators = []
-        contact_protein_indicators = []
-        contact_water_indicators = []
+        burial_indicators = []
+        direct_indicators = []
+        protein_indicators = []
+        water_indicators = []
         electrostatics_indicators = []
-        burial_indicators_other = []
-        direct_indicators_other = []
-        protein_indicators_other = []
-        water_indicators_other = []
-        electrostatics_indicators_other = []
         # the AWSEM class takes care of the indicator calculation (including masking) for us
         #     AWSEM normally accepts an amino acid sequence argument, but we don't need that here
         #     However, we do need to pass through parameters used to generate the indicator functions
@@ -527,40 +520,27 @@ def __init__(self,
         for pdb_structure in pdb_structures: 
             awsem_obj.pdb_structure = pdb_structure # we can use the pdb_structure setter to update structural 
                                                     # stuff without fully re-initializing the object
-            all_indicators = awsem_obj.indicators
-            burial_low_density_indicators.append(all_indicators[0])
-            burial_medium_density_indicators.append(all_indicators[1]) 
-            burial_high_density_indicators.append(all_indicators[2])
-            contact_direct_indicators.append(all_indicators[3])
-            contact_protein_indicators.append(all_indicators[4])
-            contact_water_indicators.append(all_indicators[5])
-            electrostatics_indicators.append(all_indicators[6])
-            burial_indicators_other.append(awsem_obj.electrostatics_indicator)
-            direct_indicators_other.append(awsem_obj.electrostatics_indicator)
-            protein_indicators_other.append(awsem_obj.electrostatics_indicator)
-            water_indicators_other.append(awsem_obj.electrostatics_indicator)
-            electrostatics_indicators_other.append(awsem_obj.electrostatics_indicator)
-        # the idea here is that we have different conformers of a chain of a particular length;
-        #     if not, we'll get a ValueError when trying to initialize numpy arrays from a ragged set of lists
-        self.indicators = [np.array(burial_low_density_indicators),
-                           np.array(burial_medium_density_indicators),
-                           np.array(contact_direct_indicators),
-                           np.array(contact_protein_indicators),
-                           np.array(contact_water_indicators),
-                           np.array(electrostatics_indicators),]
-        self.burial_indicator = np.average(np.array(burial_indicators_other),axis=0)
-        self.direct_indicator = np.average(np.array(direct_indicators_other),axis=0)
-        self.protein_indicator = np.average(np.array(protein_indicators_other),axis=0)
-        self.water_indicator = np.average(np.array(water_indicators_other),axis=0)
-        self.electrostatics_indicator = np.average(np.array(electrostatics_indicators_other),axis=0)
-        # Although a DecoyEnsemble does not have an energy or frustration in the same sense as an AWSEM,
-        #     it is helpful to attach gamma parameters to the DecoyEnsemble so that DecoyEnergyAverage, etc.
-        #     can read them through class inheritance in the same way that AwsemEnergyAverage, etc. read the gammas
-        #     from the AWSEM class.
-        # We take the gammas directly from the AWSEM that we used to get our indicators.
-        #     Technically, we generated one AWSEM for each pdb_structure, but the parameters are identical,
-        #     so we can for convenience read the gammas from the most recently initialized awsem_obj
-        self.gamma_array = awsem_obj.gamma_array
+            burial_indicators.append(awsem_obj.burial_indicator)
+            direct_indicators.append(awsem_obj.direct_indicator)
+            protein_indicators.append(awsem_obj.protein_indicator)
+            water_indicators.append(awsem_obj.water_indicator)
+            if hasattr(awsem_obj, 'electrostatics_indicator'):
+                electrostatics_indicators.append(awsem_obj.electrostatics_indicator)
+        # Stack and average indicators, ensuring correct shape for calculate_energy_and_potts
+        self.burial_indicator = np.mean(np.stack(burial_indicators, axis=0), axis=0)  # (N, 3)
+        self.direct_indicator = np.mean(np.stack(direct_indicators, axis=0), axis=0)  # (N, N, 1, 1)
+        self.protein_indicator = np.mean(np.stack(protein_indicators, axis=0), axis=0)  # (N, N, 1, 1)
+        self.water_indicator = np.mean(np.stack(water_indicators, axis=0), axis=0)  # (N, N, 1, 1)
+        if electrostatics_indicators:
+            self.electrostatics_indicator = np.mean(np.stack(electrostatics_indicators, axis=0), axis=0)  # (N, N)
+        else:
+            self.electrostatics_indicator = None
+        # Attach gamma parameters from the AWSEM object
+        self.burial_gamma = awsem_obj.burial_gamma
+        self.direct_gamma = awsem_obj.direct_gamma
+        self.protein_gamma = awsem_obj.protein_gamma
+        self.water_gamma = awsem_obj.water_gamma
+        self.electrostatics_gamma = getattr(awsem_obj, 'electrostatics_gamma', None)
 
     def average(self):
-        return [np.average(indicator, axis=0) for indicator in self.indicators], self.burial_indicator, self.direct_indicator, self.protein_indicator, self.water_indicator
\ No newline at end of file
+        return self.burial_indicator, self.direct_indicator, self.protein_indicator, self.water_indicator, self.electrostatics_indicator
\ No newline at end of file
diff --git a/frustratometer/classes/__init__.py b/frustratometer/classes/__init__.py
index 4ff9b4b..fd8b79b 100644
--- a/frustratometer/classes/__init__.py
+++ b/frustratometer/classes/__init__.py
@@ -7,7 +7,7 @@
 """
 
 from .DCA import DCA
-from .AWSEM import AWSEM, DecoyEnsemble
+from .AWSEM import AWSEM, AWSEMIndicators, DecoyEnsemble
 from .Structure import Structure
 from .Map import Map
 from .Gamma import Gamma
diff --git a/frustratometer/optimization/optimization.py b/frustratometer/optimization/optimization.py
index bc2abe2..34f93c7 100644
--- a/frustratometer/optimization/optimization.py
+++ b/frustratometer/optimization/optimization.py
@@ -1277,9 +1277,10 @@ def find_optimal_replicas(self, max_replicas=32, n_repeats=5, n_steps=10000):
     pdb_list = ["tests/data/1r69.pdb","tests/data/1r69.pdb","tests/data/1r69.pdb"]
     pdb_structures = (Structure(pdb, chain=None) for pdb in pdb_list)
     ensemble = DecoyEnsemble(pdb_structures,  distance_cutoff_contact=10, min_sequence_separation_contact=10)
-    indicators, burial_indicators, direct_indicators, protein_indicators, water_indicators = ensemble.average()
-    average = AWSEMIndicators(indicators, burial_indicators, direct_indicators, protein_indicators, water_indicators,"SISSRVKSKRIQLGLNQAELAQKVGTTQQSIEQLENGKTKRPRFLPELASALGVSVDWLLNGT")
-
+    burial_indicators, direct_indicators, protein_indicators, water_indicators, electrostatics_indicators = ensemble.average()
+    average = AWSEMIndicators(burial_indicators, direct_indicators, protein_indicators, water_indicators, electrostatics_indicators,
+                                  "SISSRVKSKRIQLGLNQAELAQKVGTTQQSIEQLENGKTKRPRFLPELASALGVSVDWLLNGT")
+    
     reduced_alphabet = 'ADEFGHIKLMNQRSTVWY'
 
     awsem_energy = AwsemEnergy(average, alphabet=reduced_alphabet)

From d3386c9dee09cf4be56c0cbcaae44f4e6ef7bfbb Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Thu, 10 Jul 2025 17:18:38 -0500
Subject: [PATCH 11/20] got rid of unused classes

---
 frustratometer/optimization/optimization.py | 164 --------------------
 1 file changed, 164 deletions(-)

diff --git a/frustratometer/optimization/optimization.py b/frustratometer/optimization/optimization.py
index 34f93c7..416f195 100644
--- a/frustratometer/optimization/optimization.py
+++ b/frustratometer/optimization/optimization.py
@@ -512,170 +512,6 @@ def regression_test(self, seq_index):
         energy=self.compute_energy(seq_index)
         assert np.isclose(energy,expected_energy), f"Expected energy {expected_energy} but got {energy}"
 
-class EnsembleEnergyStatistics(EnergyTerm):
-    """
-    An abstract class (but not using the ABC framework) for ensemble statistics that are distributive in the gammas,
-    meaning the quantity that we want to calculate, (<gammas * indicators>_decoys), is equal to (gammas * <indicators>_decoys).
-    This allows us to average the indicator functions over all decoys just once, and then conduct the MC search as we would for 
-    a single sequence. 
-    
-    When this class was initially written, the intended use cases are for EnsembleEnergyAverage and EnsembleEnergyStd. 
-    These classes just need to call the EnsembleEnergyStatistics __init__ and define the collapse_indicators function.
-    """
-    def __init__(self, model:DecoyEnsemble, use_numba=True, alphabet=_AA):
-        #
-        # boilerplate from Awsem*Average classes
-        if not type(model)==DecoyEnsemble:
-            raise TypeError("EnsembleEnergyAverage may only be initialized from a DecoyEnsemble")
-        self.model=model
-        self._use_numba=use_numba
-        self.alphabet=alphabet
-        self.reindex_dca=[_AA.index(aa) for aa in alphabet]
-        self.alphabet_size=len(alphabet)
-        #TODO: Fix the gamma matrix to account for elecrostatics-- i think this is already done!
-        self.gamma = np.concatenate([(a[self.reindex_dca].ravel() if len(a.shape)==1 else a[self.reindex_dca][:,self.reindex_dca].ravel()) for a in model.gamma_array])
-        #
-        # collapse over decoys (axis 0 of our indicator arrays)
-        self.indicators = self.collapse_indicators(model.indicators)
-
-        self.initialize_functions()
-    
-    def initialize_functions(self):
-        len_alphabet=self.alphabet_size
-        gamma=self.gamma
-        
-        # Precompute the mean of the indicators
-        means = [np.average(indicator_type, axis=-1) for indicator_type in self.indicators]
-        
-        def compute_energy(seq_index):
-            counts = np.zeros(len_alphabet, dtype=np.int64)
-            for val in seq_index:
-                counts[val] += 1
-            
-            # Calculate phi_mean
-            phi_mean = np.zeros(len_alphabet*len_indicators1D + len_alphabet**2*len_indicators2D)
-            
-            # 1D indicators
-            c=0
-            for i in range(len_indicators1D):
-                for j in range(len_alphabet):
-                    phi_mean[c] = indicator_means[i] * counts[j]
-                    c += 1
-
-            # 2D indicators
-            for i in range(len_indicators2D):
-                for j in range(len_alphabet):
-                    for k in range(len_alphabet):
-                        t=1 if j==k else 0
-                        phi_mean[c] = indicator_means[i+ len_indicators1D] * counts[j] * (counts[k] - t)
-                        c += 1
-
-            # Calculate energy
-            energy = 0
-            for i in range(phi_len):
-                energy += gamma[i] * phi_mean[i]
-            
-            return energy
-        
-        def denergy_mutation(seq_index, pos, aa):
-            counts = np.zeros(len_alphabet, dtype=np.int64)
-            for val in seq_index:
-                counts[val] += 1
-            aa_old=seq_index[pos]
-            if aa_old==aa:
-                return 0.
-            
-            # Calculate phi_mean
-
-            dphi_mean = np.zeros(len_alphabet*len_indicators1D + len_alphabet**2*len_indicators2D)
-            
-            # 1D indicators
-            for i in range(len_indicators1D):
-                dphi_mean[i*len_alphabet + aa_old] -= indicator_means[i]
-                dphi_mean[i*len_alphabet + aa] += indicator_means[i]
-
-            offset = len_alphabet*len_indicators1D
-            for i in range(len_indicators2D):
-                for j in range(len_alphabet):
-                    k=aa_old
-                    if j==k:
-                        dphi_mean[offset + i*len_alphabet**2 + j*len_alphabet + k] -= 2 * indicator_means[i + len_indicators1D] *  (counts[j]-1)
-                    elif j==aa:
-                        dphi_mean[offset + i*len_alphabet**2 + j*len_alphabet + k] += indicator_means[i+ len_indicators1D] * (counts[k] - counts[j] -1)
-                    else:
-                        dphi_mean[offset + i*len_alphabet**2 + j*len_alphabet + k] -= indicator_means[i + len_indicators1D] * counts[j]
-                        dphi_mean[offset + i*len_alphabet**2 + k*len_alphabet + j] -= indicator_means[i + len_indicators1D] * counts[j]                    
-                    k=aa
-                    if j==k:
-                        dphi_mean[offset + i*len_alphabet**2 + j*len_alphabet + k] += 2 * indicator_means[i + len_indicators1D] *  counts[j]
-                    elif j==aa_old:
-                        dphi_mean[offset + i*len_alphabet**2 + j*len_alphabet + k] += indicator_means[i+ len_indicators1D] * (counts[j] - counts[k] -1)
-                    else:
-                        dphi_mean[offset + i*len_alphabet**2 + j*len_alphabet + k] += indicator_means[i + len_indicators1D] * counts[j]
-                        dphi_mean[offset + i*len_alphabet**2 + k*len_alphabet + j] += indicator_means[i + len_indicators1D] * counts[j]
-            
-            # Calculate energy
-            denergy = 0
-            for i in range(phi_len):
-                denergy += gamma[i] * dphi_mean[i]
-            
-            return denergy
-        
-        self.compute_energy = compute_energy
-        self.compute_denergy_mutation = denergy_mutation
-
-        awsem_energy = AwsemEnergy(use_numba=self.use_numba, model=self.model, alphabet=self.alphabet).energy_function
-
-        def compute_energy_sample(seq_index,n_decoys=100000):
-            """ Function to compute the variance of the energy of permutations of a sequence using random shuffling.
-                This function is much faster than compute_energy_permutation but is an approximation"""
-            energies=np.zeros(n_decoys)
-            shuffled_index=seq_index.copy()
-            for i in numba.prange(n_decoys):
-                energies[i]=awsem_energy(shuffled_index[np.random.permutation(len(shuffled_index))])
-            return np.mean(energies)
-
-        def compute_energy_permutation(seq_index):
-            """ Function to compute the variance of the energy of all permutations of a sequence 
-                Caution: This function is very slow for normal sequences """
-            from itertools import permutations
-            decoy_sequences = np.array(list(permutations(seq_index)))
-            energies=np.zeros(len(decoy_sequences))
-            for i in numba.prange(len(decoy_sequences)):
-                energies[i]=awsem_energy(decoy_sequences[i])
-            return np.mean(energies)
-
-        def collapse_indicators(uncollapsed_indicators):
-            raise NotImplementedError("EnsembleEnergyStatistics is an abstract class. Use a subclass implementing this function.")
-        
-        self.compute_energy_sample=self.numbify(compute_energy_sample,parallel=True)
-        self.compute_energy_permutation=compute_energy_permutation
-
-    def regression_test(self, seq_index):
-        expected_energy=self.compute_energy_permutation(seq_index)
-        energy=self.compute_energy(seq_index)
-        assert np.isclose(energy,expected_energy), f"Expected energy {expected_energy} but got {energy}"
-
-class EnsembleEnergyAverage(EnsembleEnergyStatistics):
-    """
-    See documentation for EnsembleEnergyStatistics
-    """
-    def __init__(self, model:DecoyEnsemble, use_numba=True, alphabet=_AA):
-        super().__init__(model, use_numba, alphabet)
-
-    def collapse_indicators(uncollapsed_indicators):
-        self.indicators = [np.average(indicator, axis=0) for indicator in uncollapsed_indicators]
-
-class EnsembleEnergyStd(EnsembleEnergyStatistics):
-    """
-    See documentation for EnsembleEnergyStatistics
-    """
-    def __init__(self, model:DecoyEnsemble, use_numba=True, alphabet=_AA):
-        super().__init__(model, use_numba, alphabet)
-
-    def collapse_indicators(uncollapsed_indicators):
-        self.indicators = [np.std(indicator, axis=0) for indicator in uncollapsed_indicators]
-
 
 class AwsemEnergyVariance(EnergyTerm):   
     def __init__(self, model:Frustratometer, use_numba=True, alphabet=_AA):

From 88834d3c174edc1c040e17b60ab964b3523b6d35 Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Thu, 10 Jul 2025 17:27:29 -0500
Subject: [PATCH 12/20] updated documentation

---
 frustratometer/classes/AWSEM.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 00358d0..c58cb1a 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -444,11 +444,11 @@ def compute_configurational_energies(self):
 class AWSEMIndicators(AWSEMBase):
 
     def __init__(self, 
-                 burial_indicator,
-                 direct_indicator,
-                 protein_indicator,
-                 water_indicator,
-                 electrostatics_indicator,
+                 burial_indicator: np.ndarray,
+                 direct_indicator: np.ndarray,
+                 protein_indicator: np.ndarray,
+                 water_indicator: np.ndarray,
+                 electrostatics_indicator: Union[np.ndarray, None],
                  sequence: str,          # sequence is optional if we initialize from a Structure but not here
                  expose_indicator_functions: bool=False,
                  **parameters)->object:
@@ -457,8 +457,17 @@ def __init__(self,
 
         Parameters
         ----------
-        indicators : list
-            List of numpy.ndarray holding the indicator functions, with different decoys stacked along axis 0
+        burial_indicator : np.ndarray
+            Burial indicator array, most likely accessed using the burial_indicator attribute of an AWSEM
+        direct_indicator : np.ndarray
+            Direct indicator array, most likely accessed using the direct_indicator attribute of an AWSEM
+        protein_indicator : np.ndarray
+            Protein indicator array, most likely accessed using the protein_indicator attribute of an AWSEM
+        water_indicator : np.ndarray
+            Water indicator array, most likely accessed using the water_indicator attribute of an AWSEM
+        electrostatics_indicator : Union[np.ndarray, None]
+            Electrostatics indicator array, most likely accessed using the electrostatics_indicator attribute of an AWSEM.
+            May be None is electrostatics were turned off (k_electrostatics=0).
         sequence :  str
             The amino acid sequence of the protein. The sequence is assumed to be in one-letter code. 
         expose_indicator_functions: bool
@@ -503,10 +512,7 @@ def __init__(self,
         
         Returns
         -------
-        DecoyEnsemble object, which holds a list of 7 numpy arrays, ordered by indicator function type:
-            [low density (rho), medium density (rho), high density (rho), direct, protein, water, electrostatics].
-            The numpy arrays' first axes vary the structure, while the second axis and third axis, if it exists, 
-            hold the (appropriately masked) indicator functions for each residue or pair of residues
+        DecoyEnsemble object, which holds indicator arrays and gammas computed by the AWSEM class.
         """
         burial_indicators = []
         direct_indicators = []

From 2a88e03a9be80aaa4d98bc8f0664df73b02681a2 Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Fri, 11 Jul 2025 10:08:31 -0500
Subject: [PATCH 13/20] refactored DecoyEnsemble to minimize memory usage and
 implemented avg/std classes

---
 frustratometer/classes/AWSEM.py | 168 ++++++++++++++++++++++++++------
 1 file changed, 136 insertions(+), 32 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index c58cb1a..bf90fa8 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -178,9 +178,6 @@ def configurational_frustration(self,aa_freq=None, correction=0, n_decoys=4000):
         return -(self.compute_configurational_energies()-mean_decoy_energy)/(std_decoy_energy+correction)
 
 
-
-
-
 class AWSEM(AWSEMBase):
 
     def __init__(self, 
@@ -223,7 +220,7 @@ def __init__(self,
     def setup_structure(self, pdb_structure):
         # check structure
         selection_CB = pdb_structure.structure.select('name CB or (resname GLY IGL and name CA)')
-        resid = selection_CB.getResindices()
+        resid = list(set(selection_CB.getResindices())) # sometimes, it decides to split a single residue in 2 
         N=len(resid)
         self.resid = resid
         self.N = N
@@ -246,6 +243,7 @@ def pdb_structure(self,pdb_structure):
         self.setup_structure(pdb_structure)
         # check that our new structure is compatible with our old one
         if self.N != len(self.sequence):
+            import pdb; pdb.set_trace()
             raise ValueError("The pdb is incomplete. Try setting 'repair_pdb=True' when constructing the Structure object.")
         self.calculate_indicators()
     def change_conformation(alternative_pdb_structure):
@@ -512,41 +510,147 @@ def __init__(self,
         
         Returns
         -------
-        DecoyEnsemble object, which holds indicator arrays and gammas computed by the AWSEM class.
+        DecoyEnsemble object, which holds indicator arrays (and gammas???) computed by the AWSEM class.
         """
-        burial_indicators = []
-        direct_indicators = []
-        protein_indicators = []
-        water_indicators = []
-        electrostatics_indicators = []
         # the AWSEM class takes care of the indicator calculation (including masking) for us
         #     AWSEM normally accepts an amino acid sequence argument, but we don't need that here
         #     However, we do need to pass through parameters used to generate the indicator functions
-        awsem_obj = AWSEM(next(pdb_structures), expose_indicator_functions=True, **parameters)
+        awsem_obj = AWSEM(next(pdb_structures), expose_indicator_functions=True, repair_pdb=True, **parameters)
         for pdb_structure in pdb_structures: 
             awsem_obj.pdb_structure = pdb_structure # we can use the pdb_structure setter to update structural 
                                                     # stuff without fully re-initializing the object
-            burial_indicators.append(awsem_obj.burial_indicator)
-            direct_indicators.append(awsem_obj.direct_indicator)
-            protein_indicators.append(awsem_obj.protein_indicator)
-            water_indicators.append(awsem_obj.water_indicator)
-            if hasattr(awsem_obj, 'electrostatics_indicator'):
-                electrostatics_indicators.append(awsem_obj.electrostatics_indicator)
+            with open('burial_indicators.npy','ab') as f:
+                np.save(f,awsem_obj.burial_indicator)
+            with open('direct_indicators.npy','ab') as f:
+                np.save(f,awsem_obj.direct_indicator)
+            with open('protein_indicators.npy','ab') as f:
+                np.save(f,awsem_obj.protein_indicator)
+            with open('water_indicators.npy','ab') as f:
+                np.save(f,awsem_obj.water_indicator)
+            with open('electrostatics_indicators.npy','ab') as f:
+                if hasattr(awsem_obj, 'electrostatics_indicator'):
+                    np.save(f,awsem_obj.electrostatics_indicator)
+                else:
+                    np.save(f,None)
         # Stack and average indicators, ensuring correct shape for calculate_energy_and_potts
-        self.burial_indicator = np.mean(np.stack(burial_indicators, axis=0), axis=0)  # (N, 3)
-        self.direct_indicator = np.mean(np.stack(direct_indicators, axis=0), axis=0)  # (N, N, 1, 1)
-        self.protein_indicator = np.mean(np.stack(protein_indicators, axis=0), axis=0)  # (N, N, 1, 1)
-        self.water_indicator = np.mean(np.stack(water_indicators, axis=0), axis=0)  # (N, N, 1, 1)
-        if electrostatics_indicators:
-            self.electrostatics_indicator = np.mean(np.stack(electrostatics_indicators, axis=0), axis=0)  # (N, N)
-        else:
-            self.electrostatics_indicator = None
-        # Attach gamma parameters from the AWSEM object
-        self.burial_gamma = awsem_obj.burial_gamma
-        self.direct_gamma = awsem_obj.direct_gamma
-        self.protein_gamma = awsem_obj.protein_gamma
-        self.water_gamma = awsem_obj.water_gamma
-        self.electrostatics_gamma = getattr(awsem_obj, 'electrostatics_gamma', None)
+        self.burial_indicators = self.get_indicators("burial_indicators.npy")
+        self.direct_indicators = self.get_indicators("direct_indicators.npy")
+        self.protein_indicators = self.get_indicators("protein_indicators.npy")
+        self.water_indicators = self.get_indicators("water_indicators.npy")
+        self.electrostatics_indicators = self.get_indicators("electrostatics_indicators.npy")
+
+        # averages are needed to compute standard deviation, so it's useful to have them as attributes
+        self.avg_burial = None
+        self.avg_direct = None
+        self.avg_prot = None
+        self.avg_wat = None
+        self.avg_elect = None
+
+        ## Attach gamma parameters from the AWSEM object
+        ## Kind of off-topic from my current use of this class
+        #self.burial_gamma = awsem_obj.burial_gamma
+        #self.direct_gamma = awsem_obj.direct_gamma
+        #self.protein_gamma = awsem_obj.protein_gamma
+        #self.water_gamma = awsem_obj.water_gamma
+        #self.electrostatics_gamma = getattr(awsem_obj, 'electrostatics_gamma', None)
+        
+        # this might help with memory
+        del awsem_obj
+ 
+    def get_indicators(self, filename):
+        # expecting a numpy file
+        with open(filename, 'rb') as f:
+            while True:
+                try:
+                    yield np.load(f, allow_pickle=True) # needed to load None if not electrostatics
+                except EOFError:
+                    break
 
     def average(self):
-        return self.burial_indicator, self.direct_indicator, self.protein_indicator, self.water_indicator, self.electrostatics_indicator
\ No newline at end of file
+        # average burial computation from generator
+        avg_burial = 0
+        counter = 0
+        for array in self.burial_indicators:
+            counter += 1
+            burial_indicator += array
+        avg_burial /= counter
+        self.avg_burial = avg_burial
+        # average direct computation from generator
+        avg_direct = 0
+        counter = 0
+        for array in self.direct_indicators:
+            counter += 1
+            direct_indicator += array
+        avg_direct /= counter
+        self.avg_direct = avg_direct
+        # average prot computation from generator
+        avg_prot = 0
+        counter = 0
+        for array in self.protein_indicators:
+            counter += 1
+            protein_indicator += array
+        avg_prot /= counter
+        self.avg_prot = avg_prot
+        # average wat computation from generator
+        avg_wat = 0
+        counter = 0
+        for array in self.water_indicators:
+            counter += 1
+            water_indicator += array
+        avg_wat /= counter
+        self.avg_wat = avg_wat
+        # average elec computation from generator
+        #     if not defined, set to zero, which will have no impact
+        if self.electrostatics_indicators == None:
+            avg_elec = 0
+        else:
+            avg_elec = 0
+            counter = 0
+            for array in self.electrostatics_indicators:
+                counter += 1
+                avg_elec += array
+            avg_elec /= counter
+        self.avg_elec = avg_elec
+        return self.avg_burial, self.avg_direct, self.avg_prot, self.avg_wat, self.avg_elec
+
+    def std(self):
+        if self.avg_burial is None or self.avg_direct is None or \
+           self.avg_prot is None or self.avg_wat is None or \
+           self.avg_elec is None:
+            self.average()  # compute averages if not already done
+        # std burial computation from generator and previously computed average
+        std_burial = 0
+        counter = 0
+        for array in self.burial_indicators:
+            counter += 1
+            std_burial += (array - self.avg_burial) ** 2
+        std_burial = np.sqrt(std_burial / counter)
+        # std direct computation from generator and previously computed average
+        std_direct = 0
+        counter = 0
+        for array in self.direct_indicators:
+            counter += 1
+            std_direct += (array - self.avg_direct) ** 2
+        std_direct = np.sqrt(std_direct / counter)
+        # std prot computation from generator and previously computed average
+        std_prot = 0
+        counter = 0
+        for array in self.protein_indicators:
+            counter += 1
+            std_prot += (array - self.avg_prot) ** 2
+        std_prot = np.sqrt(std_prot / counter)
+        # std wat computation from generator and previously computed average
+        std_wat = 0
+        counter = 0
+        for array in self.water_indicators:
+            counter += 1
+            std_wat += (array - self.avg_wat) ** 2
+        std_wat = np.sqrt(std_wat / counter)
+        # std elec computation from generator and previously computed average
+        std_elec = 0
+        counter = 0
+        for array in self.electrostatics_indicators:
+            counter += 1
+            std_elec += (array - self.avg_elec) ** 2
+        std_elec = np.sqrt(std_elec / counter)
+        return std_burial, std_direct, std_prot, std_wat, std_elec
\ No newline at end of file

From 790adce77afd970a7ef1315032f5283c299d5907 Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Fri, 11 Jul 2025 11:36:03 -0500
Subject: [PATCH 14/20] got pdb.py from my amyloid_atlas branch, which allows
 prody for loading the structures. this probably wasn't necessary

---
 frustratometer/pdb/pdb.py | 87 +++++++++++++++++++++++++++++----------
 1 file changed, 66 insertions(+), 21 deletions(-)

diff --git a/frustratometer/pdb/pdb.py b/frustratometer/pdb/pdb.py
index 853e6e6..2feeb65 100644
--- a/frustratometer/pdb/pdb.py
+++ b/frustratometer/pdb/pdb.py
@@ -37,7 +37,8 @@ def download(pdbID: str,directory: Union[Path,str]=Path.cwd()) -> Path:
     return pdb_file
 
 def get_sequence(pdb_file: str, 
-                 chain: str
+                 chain: str,
+                 return_start_mask: bool=False
                  ) -> str:
     """
     Get a protein sequence from a pdb file
@@ -46,8 +47,10 @@ def get_sequence(pdb_file: str,
     ----------
     pdb_file : str,
         PDB file location.
-    chain: str,
-        Chain ID of the selected protein.
+    chain: str or list,
+        Chain ID(s) of the selected protein.
+    return_start_mask: bool,
+        Return binary mask list indicating whether each position is the start of a chain
 
     Returns
     -------
@@ -58,37 +61,63 @@ def get_sequence(pdb_file: str,
     Get a protein sequence from a PDB file
     
     :param pdb: PDB file location
-    :param chain: chain name of PDB file to get sequence
+    :param chain: chain name(s) of PDB file to get sequence
     :return: protein sequence
     """
 
-    if ".cif" in str(pdb_file):
-        parser = MMCIFParser()
-    else:
-        parser = PDBParser()
-    structure = parser.get_structure('name', pdb_file)
+    if ".cif" in str(pdb_file): # BIOPYTHON
+        parser = MMCIFParser() # BIOPYTHON
+    else:                     # BIOPYTHON
+        parser = PDBParser() # BIOPYTHON
+    structure = parser.get_structure('name', pdb_file) #BIOPYTHON
+    #structure = prody.parsePDB(str(pdb_file)) # PRODY
+    #hv = structure.getHierView() # PRODY
     if chain==None:
-        all_chains=[i.get_id() for i in structure.get_chains()]
+        all_chains=[i.get_id() for i in structure.get_chains()] # BIOPYTHON
+        #all_chains = [structure_chain.getChid() for structure_chain in hv] # PRODY
     else:
-        all_chains=[chain]
+        if type(chain) == list:
+            all_chains = chain
+        elif type(chain) == str:
+            all_chains = [id for id in chain if id != " "] # remove spaces if present in string
+        else:
+            raise TypeError(f"chain must be list or str but was {type(chain)}")
     sequence = ""
-    for chain in all_chains:
-        c = structure[0][chain]
+    start_mask = []
+    for single_chain in all_chains:
+        c = structure[0][single_chain] # BIOPYTHON
+        #c = hv[single_chain] # PRODY
         chain_seq = ""
         for residue in c:
-            is_regular_res = residue.has_id('CA') and residue.has_id('O')
-            res_id = residue.get_id()[0]
-            if (res_id==' ' or res_id=='H_MSE' or res_id=='H_M3L' or res_id=='H_CAS') and is_regular_res:
-                residue_name = residue.get_resname()
+            is_regular_res = residue.has_id('CA') and residue.has_id('O') # BIOPYTHON
+            #atom_names = [atom.getName() for atom in residue] # PRODY
+            #is_regular_res = ("CA" in atom_names and "O" in atom_names) # PRODY
+            res_id = residue.get_id()[0] #BIOPYTHON
+            if (res_id==' ' or res_id=='H_MSE' or res_id=='H_M3L' or res_id=='H_CAS') and is_regular_res: # BIOPYTHON
+            # i don't know what H_HSE, H_M3L, and H_CAS are doing 
+            # because they aren't in three_to_one, so those should throw an error
+            # long story short, I don't think we have to worry about them when switching from biopython to prody
+            #if is_regular_res: # PRODY
+                residue_name = residue.get_resname() # BIOPYTHON
+                #residue_name = residue.getResname() # PRODY
                 chain_seq += three_to_one[residue_name]
+        if chain_seq == "": # empty chain, like a nucleic acid chain (see 8ZWK)
+            continue        # FYI, currently, a non-empty chain with certain invalid residues will throw an error at the three_to_one[residue_name] above
         sequence += chain_seq
-    return sequence
+        start_mask.append(1)
+        for _ in range(1,len(chain_seq)):
+            start_mask.append(0)
+    if return_start_mask:
+        return (sequence,start_mask)
+    else:
+        return sequence
 
 
 
 def get_distance_matrix(pdb_file: Union[Path,str],
                         chain: str,
-                        method: str = 'CB'
+                        method: str = 'CB',
+                        return_distance_midpoints: bool = False,
                         ) -> np.array:
     """
     Calculate the distance matrix of the specified atoms in a PDB file.
@@ -106,6 +135,10 @@ def get_distance_matrix(pdb_file: Union[Path,str],
             'CA' for using only the CA atom,
             'minimum' for using the minimum distance between all atoms in each residue,
             'CB_force' computes a new coordinate for the CB atom based on the CA, C, and N atoms and uses CB distance even for glycine.
+    return_distance_midpoints: bool
+        Whether to return a matrix of the same shape as distance_matrix representing the same contacts as distance_matrix
+        that indicates the absolute coordinates of the midpoint between the pair of atoms. This helps us compute the pair distribution
+        functions of the different classes of contacts. So this matrix isn't really a matrix because each "element" has 3 channels: x, y, and z
 
     Returns:
         np.array: The distance matrix of the selected atoms.
@@ -121,7 +154,7 @@ def get_distance_matrix(pdb_file: Union[Path,str],
     if method == 'CA':
         coords = structure.select('protein and name CA' + chain_selection).getCoords()
     elif method == 'CB':
-        coords = structure.select('(protein and (name CB) or (resname GLY and name CA))' + chain_selection).getCoords()
+        coords = structure.select('(protein and (name CB) or (resname GLY IGL and name CA))' + chain_selection).getCoords()
     elif method == 'minimum':
         selection = structure.select('protein' + chain_selection)
         coords = selection.getCoords()
@@ -163,8 +196,20 @@ def get_distance_matrix(pdb_file: Union[Path,str],
     if len(coords) == 0:
         raise IndexError('Empty selection for distance map')
 
+    # coords should be a numpy array of shape (N,3)
     distance_matrix = sdist.squareform(sdist.pdist(coords))
-    return distance_matrix
+    assert distance_matrix.shape[0] == distance_matrix.shape[1]
+    if return_distance_midpoints:
+        midpoint_matrix = np.zeros((distance_matrix.shape[0],distance_matrix.shape[1],3))
+        for i in range(distance_matrix.shape[0]):
+            for j in range(distance_matrix.shape[1]):
+                midpoint_matrix[i,j,:] = (coords[None,i,:] + coords[None,j,:])/2
+                # check that indexing is consistent with distance_matrix
+                assert np.allclose(np.linalg.norm(coords[i,:]-coords[j,:]),distance_matrix[i,j])
+        assert np.allclose(midpoint_matrix,midpoint_matrix.transpose(1,0,2)) # check symmetry
+        return distance_matrix, midpoint_matrix
+    else:    
+        return distance_matrix
 
 
 def full_to_filtered_aligned_mapping(aligned_sequence: str,

From 6068d3c152a2786ffec549c138a6a8c7f54fa8cb Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Fri, 11 Jul 2025 11:36:46 -0500
Subject: [PATCH 15/20] undid unnecessary modification to structure processing
 in AWSEM

---
 frustratometer/classes/AWSEM.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index bf90fa8..3817658 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -220,7 +220,7 @@ def __init__(self,
     def setup_structure(self, pdb_structure):
         # check structure
         selection_CB = pdb_structure.structure.select('name CB or (resname GLY IGL and name CA)')
-        resid = list(set(selection_CB.getResindices())) # sometimes, it decides to split a single residue in 2 
+        resid = selection_CB.getResindices()
         N=len(resid)
         self.resid = resid
         self.N = N

From 6d5586b09070472f895d687fe671f985e79e671a Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Fri, 11 Jul 2025 11:37:22 -0500
Subject: [PATCH 16/20] changed name of DecoyEnsemble.average to
 DecoyEnsemble.avg

---
 frustratometer/classes/AWSEM.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 3817658..5ba8e17 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -566,7 +566,7 @@ def get_indicators(self, filename):
                 except EOFError:
                     break
 
-    def average(self):
+    def avg(self):
         # average burial computation from generator
         avg_burial = 0
         counter = 0
@@ -617,7 +617,7 @@ def std(self):
         if self.avg_burial is None or self.avg_direct is None or \
            self.avg_prot is None or self.avg_wat is None or \
            self.avg_elec is None:
-            self.average()  # compute averages if not already done
+            self.avg()  # compute averages if not already done
         # std burial computation from generator and previously computed average
         std_burial = 0
         counter = 0

From 1c1a5effa9e55eb816eda23be8f75bb2ff76db16 Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Fri, 11 Jul 2025 12:59:00 -0500
Subject: [PATCH 17/20] fix wrong variable used

---
 frustratometer/classes/AWSEM.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 5ba8e17..5691498 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -566,13 +566,14 @@ def get_indicators(self, filename):
                 except EOFError:
                     break
 
+    # average indicator functions over all decoys
     def avg(self):
         # average burial computation from generator
         avg_burial = 0
         counter = 0
         for array in self.burial_indicators:
             counter += 1
-            burial_indicator += array
+            avg_burial += array
         avg_burial /= counter
         self.avg_burial = avg_burial
         # average direct computation from generator
@@ -580,7 +581,7 @@ def avg(self):
         counter = 0
         for array in self.direct_indicators:
             counter += 1
-            direct_indicator += array
+            avg_direct += array
         avg_direct /= counter
         self.avg_direct = avg_direct
         # average prot computation from generator
@@ -588,7 +589,7 @@ def avg(self):
         counter = 0
         for array in self.protein_indicators:
             counter += 1
-            protein_indicator += array
+            avg_prot += array
         avg_prot /= counter
         self.avg_prot = avg_prot
         # average wat computation from generator
@@ -596,7 +597,7 @@ def avg(self):
         counter = 0
         for array in self.water_indicators:
             counter += 1
-            water_indicator += array
+            avg_wat += array
         avg_wat /= counter
         self.avg_wat = avg_wat
         # average elec computation from generator
@@ -613,6 +614,7 @@ def avg(self):
         self.avg_elec = avg_elec
         return self.avg_burial, self.avg_direct, self.avg_prot, self.avg_wat, self.avg_elec
 
+    # standard deviation of indicator functions over all decoys
     def std(self):
         if self.avg_burial is None or self.avg_direct is None or \
            self.avg_prot is None or self.avg_wat is None or \

From 3b64349b2d571c745d0d7ee4f37e570995e5b00b Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Fri, 11 Jul 2025 13:01:52 -0500
Subject: [PATCH 18/20] reinitialize indicator function generator each time
 attribute is accessed

---
 frustratometer/classes/AWSEM.py | 36 ++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 5691498..ca4ac5b 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -532,20 +532,15 @@ def __init__(self,
                     np.save(f,awsem_obj.electrostatics_indicator)
                 else:
                     np.save(f,None)
-        # Stack and average indicators, ensuring correct shape for calculate_energy_and_potts
-        self.burial_indicators = self.get_indicators("burial_indicators.npy")
-        self.direct_indicators = self.get_indicators("direct_indicators.npy")
-        self.protein_indicators = self.get_indicators("protein_indicators.npy")
-        self.water_indicators = self.get_indicators("water_indicators.npy")
-        self.electrostatics_indicators = self.get_indicators("electrostatics_indicators.npy")
-
-        # averages are needed to compute standard deviation, so it's useful to have them as attributes
+        # averages are needed to compute standard deviation
+        #     having these be attributes allows them to be easily passed
+        #     from the avg method to the std method
         self.avg_burial = None
         self.avg_direct = None
         self.avg_prot = None
         self.avg_wat = None
         self.avg_elect = None
-
+        ################################################
         ## Attach gamma parameters from the AWSEM object
         ## Kind of off-topic from my current use of this class
         #self.burial_gamma = awsem_obj.burial_gamma
@@ -553,10 +548,31 @@ def __init__(self,
         #self.protein_gamma = awsem_obj.protein_gamma
         #self.water_gamma = awsem_obj.water_gamma
         #self.electrostatics_gamma = getattr(awsem_obj, 'electrostatics_gamma', None)
-        
+        ################################################
         # this might help with memory
         del awsem_obj
  
+    # To manage memory, we need the indicator attributes to be generators (see self.get_indicators).
+    # But to ensure that we can iterate over them more than once, we need to 
+    #     be able to reinitialize the generators. We accomplish this with properties
+    @property
+    def burial_indicators(self):
+        return self.get_indicators("burial_indicators.npy")
+    @property
+    def direct_indicators(self):
+        return self.get_indicators("direct_indicators.npy")
+    @property
+    def protein_indicators(self):
+        return self.get_indicators("protein_indicators.npy")
+    @property
+    def water_indicators(self):
+        return self.get_indicators("water_indicators.npy")
+    @property
+    def electrostatics_indicators(self):
+        return self.get_indicators("electrostatics_indicators.npy")
+
+    # allows us to process indicators without holding them all in memory
+    #    this requires that every method that acts on the indicators iterates over them
     def get_indicators(self, filename):
         # expecting a numpy file
         with open(filename, 'rb') as f:

From 3a281d72044b9c2cd85f77c7ec6e337c43696296 Mon Sep 17 00:00:00 2001
From: Finley Clark <finn2001clark@gmail.com>
Date: Fri, 11 Jul 2025 18:31:43 -0500
Subject: [PATCH 19/20] take absolute value of gamma (sqrt(gamma**2)) for
 approximate variance calculation

---
 frustratometer/classes/AWSEM.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index ca4ac5b..4802135 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -449,6 +449,7 @@ def __init__(self,
                  electrostatics_indicator: Union[np.ndarray, None],
                  sequence: str,          # sequence is optional if we initialize from a Structure but not here
                  expose_indicator_functions: bool=False,
+                 absolute_value_gamma: bool=False,
                  **parameters)->object:
         """
         A stripped-down version of the AWSEM class that can be initialized from a set of indicator functions
@@ -470,7 +471,9 @@ def __init__(self,
             The amino acid sequence of the protein. The sequence is assumed to be in one-letter code. 
         expose_indicator_functions: bool
             If set to True, indicator functions of the contact and burial energy terms can be accessed by user.
-        
+        absolute_value_gamma: bool
+            If True, replace gammas with their absolute values. This is helpful for the standard deviation approximation
+
         Returns
         -------
         AWSEMIndicators object
@@ -484,6 +487,13 @@ def __init__(self,
         self.electrostatics_indicator = electrostatics_indicator
         self.sequence_mask_contact = np.full((self.N,self.N), True) 
         self.mask = np.full((self.N,self.N), True) 
+        if absolute_value_gamma:
+            self.burial_gamma = np.abs(self.burial_gamma)
+            self.direct_gamma = np.abs(self.direct_gamma)
+            self.protein_gamma = np.abs(self.protein_gamma)
+            self.water_gamma = np.abs(self.water_gamma)
+            self.electrostatics_gamma = np.abs(self.electrostatics_gamma)
+        self.absolute_value_gamma = absolute_value_gamma
         # mask should have been applied when calculating the indicator functions,
         #     so we set it such that no further masking is performed
         self.setup_model()

From 26795fe64a89cd81c40ed5d8f93f49cbf83d17b9 Mon Sep 17 00:00:00 2001
From: Finley Clark <fc36@rice.edu>
Date: Sat, 12 Jul 2025 16:00:42 -0500
Subject: [PATCH 20/20] i guess our sign convention caused taking the absolute
 value of the gammas to make all energies negative. so now, we take the
 negative of the absolute values of the gammas to make all energies positive

---
 frustratometer/classes/AWSEM.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/frustratometer/classes/AWSEM.py b/frustratometer/classes/AWSEM.py
index 4802135..5131fbd 100644
--- a/frustratometer/classes/AWSEM.py
+++ b/frustratometer/classes/AWSEM.py
@@ -488,11 +488,11 @@ def __init__(self,
         self.sequence_mask_contact = np.full((self.N,self.N), True) 
         self.mask = np.full((self.N,self.N), True) 
         if absolute_value_gamma:
-            self.burial_gamma = np.abs(self.burial_gamma)
-            self.direct_gamma = np.abs(self.direct_gamma)
-            self.protein_gamma = np.abs(self.protein_gamma)
-            self.water_gamma = np.abs(self.water_gamma)
-            self.electrostatics_gamma = np.abs(self.electrostatics_gamma)
+            self.burial_gamma = -np.abs(self.burial_gamma)
+            self.direct_gamma = -np.abs(self.direct_gamma)
+            self.protein_gamma = -np.abs(self.protein_gamma)
+            self.water_gamma = -np.abs(self.water_gamma)
+            self.electrostatics_gamma = -np.abs(self.electrostatics_gamma)
         self.absolute_value_gamma = absolute_value_gamma
         # mask should have been applied when calculating the indicator functions,
         #     so we set it such that no further masking is performed
@@ -681,4 +681,4 @@ def std(self):
             counter += 1
             std_elec += (array - self.avg_elec) ** 2
         std_elec = np.sqrt(std_elec / counter)
-        return std_burial, std_direct, std_prot, std_wat, std_elec
\ No newline at end of file
+        return std_burial, std_direct, std_prot, std_wat, std_elec