From 21e0a689933c06bffbe743ec4c33b9f09d594a6b Mon Sep 17 00:00:00 2001
From: Christopher <cfunk1210@gmail.com>
Date: Wed, 28 Feb 2024 18:21:58 -0500
Subject: [PATCH 01/11] Fix printing logging

---
 align_system/algorithms/llama_2_single_kdma_adm.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/align_system/algorithms/llama_2_single_kdma_adm.py b/align_system/algorithms/llama_2_single_kdma_adm.py
index 22082fb4..c037da17 100644
--- a/align_system/algorithms/llama_2_single_kdma_adm.py
+++ b/align_system/algorithms/llama_2_single_kdma_adm.py
@@ -126,11 +126,11 @@ def __init__(self, device='cuda', hf_model='meta-llama/Llama-2-7b-chat-hf', prec
     def load_model(self, model=None, tokenizer=None):
         assert (model is None) == (tokenizer is None), "model and tokenizer must both be None or both be not None."
         if model is not None:
-            print('Loading model and tokenizer from provided objects.')
+            log.info('Loading model and tokenizer from provided objects.')
             self.model = model
             self.tokenizer = tokenizer
         else:
-            print('Loading model:', self.hf_model)
+            log.info('Loading model: %s', self.hf_model)
             if self.device == 'auto':
                 self.model = AutoModelForCausalLM.from_pretrained(self.hf_model, torch_dtype=self.precision, device_map='auto')
             else:
@@ -284,7 +284,7 @@ def respond_to_dialog(self, dialog, prefix=None):
                 else:
                     new_dialog.append(message)
             dialog = new_dialog
-            print('INPUT\n', dialog)
+            log.info('INPUT\n %s', dialog)
             prompt_tokens = [self.tokenizer.apply_chat_template(dialog, tokenize=True)]
             inference_pair['input'] = self.tokenizer.apply_chat_template(dialog, tokenize=False)
 
@@ -306,7 +306,7 @@ def respond_to_dialog(self, dialog, prefix=None):
             temperature=self.temperature,
             do_sample=self.do_sample)
 
-        # Print the generated model output
+        # log.info the generated model output
         generated_output = self.tokenizer.decode(outputs.sequences[0][prompt_length:])
         inference_pair['output'] = generated_output
 
@@ -428,7 +428,7 @@ def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_sam
             if not good_parse:
                 reasoning, answer_idx, parse_method = Llama2SingleKDMAADM.bert_similarity_parse(high_response, shuffled_choices)
 
-            print('CHOSEN ANSWER IDX', answer_idx, shuffled_choices)
+            log.info('CHOSEN ANSWER IDX %s %s', answer_idx, shuffled_choices)
             assert answer_idx is not None, f'Failed to parse answer index from generated output: {low_response}'
 
             responses.append({
@@ -600,10 +600,10 @@ def parse_generated_output(generated_output, n_choices):
 
     @staticmethod
     def bert_similarity_parse(generated_output, choices):
-        print('BERT SIMILARITY PARSE')
+        log.info('BERT SIMILARITY PARSE')
         force_choice_func = build_force_choice_func('bert')
         answer_idx, _ = force_choice_func(generated_output, choices)
-        print('ANSWER IDX', answer_idx, type(answer_idx))
+        log.info('ANSWER IDX %s %s', answer_idx, type(answer_idx))
         return generated_output, answer_idx, 'bert_similarity'
 
     @staticmethod

From a657a7fc5ac3e2fdc05d149afba485a47c4c655a Mon Sep 17 00:00:00 2001
From: Christopher <cfunk1210@gmail.com>
Date: Fri, 1 Mar 2024 15:30:09 -0500
Subject: [PATCH 02/11] In progress

---
 .../algorithms/llama_2_single_kdma_adm.py     | 54 +++++++++++++++++--
 align_system/evaluation/adm_evaluator.py      |  2 +-
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/align_system/algorithms/llama_2_single_kdma_adm.py b/align_system/algorithms/llama_2_single_kdma_adm.py
index c037da17..61195411 100644
--- a/align_system/algorithms/llama_2_single_kdma_adm.py
+++ b/align_system/algorithms/llama_2_single_kdma_adm.py
@@ -3,7 +3,7 @@
 import random
 import os
 import pathlib
-from align_system.algorithms.abstracts import AlignedDecisionMaker
+import random
 
 from jinja2.exceptions import TemplateError
 
@@ -13,8 +13,7 @@
 import numpy as np
 
 from align_system.utils import logging
-
-
+from align_system.algorithms.abstracts import AlignedDecisionMaker
 from align_system.similarity_measures import build_force_choice_func
 
 
@@ -428,7 +427,7 @@ def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_sam
             if not good_parse:
                 reasoning, answer_idx, parse_method = Llama2SingleKDMAADM.bert_similarity_parse(high_response, shuffled_choices)
 
-            log.info('CHOSEN ANSWER IDX %s %s', answer_idx, shuffled_choices)
+            log.explain('CHOSEN ANSWER IDX %s %s', answer_idx, shuffled_choices)
             assert answer_idx is not None, f'Failed to parse answer index from generated output: {low_response}'
 
             responses.append({
@@ -760,11 +759,58 @@ def run_aligned_decision_maker_with_voting(
 
         return reasoning, answer_idx, responses, inference_pairs
 
+    def format_single_incontext_prompt(self, sample):
+        prompt = sample['scenario']
+        if sample['state'] is not None:
+            prompt += f'\n{sample["state"]}'
+
+        choices = sample['choices']
+
+        labels = kwargs.get('labels', {})
+
+        alignment_target = None
+        if target_kdma_values is not None:
+            target_kdma = next(iter(next(iter(filter(lambda x: len(x) > 0, labels))))) # get the frist key of the first label that is not empty
+
+            for label in labels:
+                assert len(label) == 0 or (target_kdma in label and len(label) == 1), f'All labels must have the same KDMA: labels={labels}'
+
+            alignment_target = {
+                target_kdma: target_kdma_values[target_kdma]
+            }
+
+
+        #TODO: add prompt completetion here for choices as well.
+
+
     def __call__(self, sample, target_kdma_values, **kwargs):
+        """ Build the prompt and send to the LLM to ask for a single KDMA
+
+
+        """
         prompt = sample['scenario']
         if sample['state'] is not None:
             prompt += f'\n{sample["state"]}'
 
+        if 'incontext' in kwargs:
+            possible_samples = []
+
+            for sam in kwargs['dataset']:
+                if sam['probe_id'] != sample['probe_id']:
+                    possible_samples.append(sam)
+
+                if len(possible_samples) < kwargs['incontext']['number']:
+                    raise(f'Not enough possible incontext samples to learn from here.
+                          Only {len(possible_samples)} samples while asking for
+                          {kwargs['incontext']['number']} in context samples')
+
+                if kwargs['incontext']['method'] == 'random':
+                    chosen_sample = random.sample(possible_samples, kwargs['incontext']['number'])
+                else:
+                    raise(f'"{kwargs['incontext']['method']}" is not a valid incontext method.  Please use "random", ')
+
+
+
         if 'retriever' in kwargs:
             # retriever_prompt = "How would you treat the following injuries: {}".format(prompt)
             retriever_prompt = "{}  {}".format(prompt, sample['probe'])
diff --git a/align_system/evaluation/adm_evaluator.py b/align_system/evaluation/adm_evaluator.py
index ff7eda9f..4a742e4f 100644
--- a/align_system/evaluation/adm_evaluator.py
+++ b/align_system/evaluation/adm_evaluator.py
@@ -11,7 +11,7 @@ def generate_outputs(dataset, adm, target_kdma_values, **kwargs):
             })
             continue
         
-        outputs.append(adm(input_, target_kdma_values, labels=label, **kwargs))
+        outputs.append(adm(input_, target_kdma_values, labels=label, dataset=dataset, **kwargs))
     
     return outputs
 

From b1b501c6c933a6c914bf3dcc64c1e0a623f2dc9d Mon Sep 17 00:00:00 2001
From: Christopher <cfunk1210@gmail.com>
Date: Fri, 1 Mar 2024 19:03:37 -0500
Subject: [PATCH 03/11] First working cut of incontext learning

---
 .../algorithms/llama_2_single_kdma_adm.py     | 52 +++++++++++--------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/align_system/algorithms/llama_2_single_kdma_adm.py b/align_system/algorithms/llama_2_single_kdma_adm.py
index 61195411..b30bb20a 100644
--- a/align_system/algorithms/llama_2_single_kdma_adm.py
+++ b/align_system/algorithms/llama_2_single_kdma_adm.py
@@ -114,6 +114,7 @@ def __init__(self, device='cuda', hf_model='meta-llama/Llama-2-7b-chat-hf', prec
         self.temperature = temperature
         self.do_sample = do_sample
         self.chat_template = kwargs.get('chat_template', None)
+        self.dataset = []
 
         assert precision in ['full', 'half'], "precision must be either 'full' or 'half'."
         self.precision = torch.float32 if precision == 'full' else torch.float16
@@ -407,6 +408,7 @@ def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_sam
                 shuffled_choices,
                 system_message=system_message)
 
+
             if not logged_aligned_dialog:
                 log.debug("[bold]*ALIGNED DIALOG*[/bold]",
                           extra={"markup": True})
@@ -759,25 +761,17 @@ def run_aligned_decision_maker_with_voting(
 
         return reasoning, answer_idx, responses, inference_pairs
 
-    def format_single_incontext_prompt(self, sample):
+    def format_single_incontext_prompt(self, sample, labels):
         prompt = sample['scenario']
         if sample['state'] is not None:
             prompt += f'\n{sample["state"]}'
 
-        choices = sample['choices']
+        for choice, label in zip(sample['choices'],labels):
+            level = 'high' if list(label.values())[0] > 5 else 'low'
+            attribute = list(label.keys())[0].replace('_', ' ')
+            prompt += f'  If you had a {level} {attribute}, you would select {choice}.'
 
-        labels = kwargs.get('labels', {})
-
-        alignment_target = None
-        if target_kdma_values is not None:
-            target_kdma = next(iter(next(iter(filter(lambda x: len(x) > 0, labels))))) # get the frist key of the first label that is not empty
-
-            for label in labels:
-                assert len(label) == 0 or (target_kdma in label and len(label) == 1), f'All labels must have the same KDMA: labels={labels}'
-
-            alignment_target = {
-                target_kdma: target_kdma_values[target_kdma]
-            }
+        return prompt
 
 
         #TODO: add prompt completetion here for choices as well.
@@ -795,21 +789,33 @@ def __call__(self, sample, target_kdma_values, **kwargs):
         if 'incontext' in kwargs:
             possible_samples = []
 
+            #sam has both info in first element and labels in second element
             for sam in kwargs['dataset']:
-                if sam['probe_id'] != sample['probe_id']:
+                if sam[0]['probe_id'] != sample['probe_id']:
+
                     possible_samples.append(sam)
 
-                if len(possible_samples) < kwargs['incontext']['number']:
-                    raise(f'Not enough possible incontext samples to learn from here.
-                          Only {len(possible_samples)} samples while asking for
-                          {kwargs['incontext']['number']} in context samples')
+            if len(possible_samples) < kwargs['incontext']['number']:
+                raise RuntimeError(f'Not enough possible incontext samples to learn from here.'
+                        f'Only {len(possible_samples)} samples while asking for'
+                        f'{kwargs["incontext"]["number"]} in context samples')
 
-                if kwargs['incontext']['method'] == 'random':
-                    chosen_sample = random.sample(possible_samples, kwargs['incontext']['number'])
-                else:
-                    raise(f'"{kwargs['incontext']['method']}" is not a valid incontext method.  Please use "random", ')
+            if kwargs['incontext']['method'] == 'random':
+                chosen_sample = random.sample(possible_samples, kwargs['incontext']['number'])
+            else:
+                raise(f'"{kwargs["incontext"]["method"]}" is not a valid incontext method.  Please use "random", ')
+
+            incontext_prompt_start = '  Here are some examples of similar problems with their attributes. '
+
+
+            extra_prompts = [incontext_prompt_start]
+            for cs, cl in chosen_sample:
+                extra_prompts.append(self.format_single_incontext_prompt(cs, cl))
 
+            extra_prompts.append('  Given these similar examples, please answer the question for the following scenario. ')
 
+            extra_prompts = ''.join(extra_prompts)
+            prompt = extra_prompts + prompt
 
         if 'retriever' in kwargs:
             # retriever_prompt = "How would you treat the following injuries: {}".format(prompt)

From bc9955ea01fb21d17228c95b8be2a5024ad3287a Mon Sep 17 00:00:00 2001
From: Christopher <cfunk1210@gmail.com>
Date: Wed, 6 Mar 2024 13:32:35 -0500
Subject: [PATCH 04/11] updating incontex for saying example

---
 align_system/algorithms/llama_2_single_kdma_adm.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/align_system/algorithms/llama_2_single_kdma_adm.py b/align_system/algorithms/llama_2_single_kdma_adm.py
index b30bb20a..ca7557d8 100644
--- a/align_system/algorithms/llama_2_single_kdma_adm.py
+++ b/align_system/algorithms/llama_2_single_kdma_adm.py
@@ -809,8 +809,10 @@ def __call__(self, sample, target_kdma_values, **kwargs):
 
 
             extra_prompts = [incontext_prompt_start]
+            ci =  1
             for cs, cl in chosen_sample:
-                extra_prompts.append(self.format_single_incontext_prompt(cs, cl))
+                extra_prompts.append(f'  Example {ci}' + self.format_single_incontext_prompt(cs, cl))
+                ci += 1
 
             extra_prompts.append('  Given these similar examples, please answer the question for the following scenario. ')
 

From e8ffc83ed1e06423038c52e2dd0f5dc87dfd8921 Mon Sep 17 00:00:00 2001
From: Christopher <cfunk1210@gmail.com>
Date: Mon, 29 Apr 2024 17:24:44 -0400
Subject: [PATCH 05/11] Updated to use an external dataset for incontext. 
 Right now using the old style dataset

---
 .../single_kdma_adm_config_incontext.yml      | 21 ++++++++
 .../algorithms/llama_2_single_kdma_adm.py     | 48 ++++++++++---------
 2 files changed, 47 insertions(+), 22 deletions(-)
 create mode 100644 adm_configs/single_kdma_adm_config_incontext.yml

diff --git a/adm_configs/single_kdma_adm_config_incontext.yml b/adm_configs/single_kdma_adm_config_incontext.yml
new file mode 100644
index 00000000..88e40153
--- /dev/null
+++ b/adm_configs/single_kdma_adm_config_incontext.yml
@@ -0,0 +1,21 @@
+adm:
+  name: 'SingleKDMAADM'
+  init_kwargs:
+    hf_model: meta-llama/Llama-2-7b-chat-hf
+    precision: half
+    temperature: 0.7
+
+  inference_kwargs:
+    baseline: true
+    n_negative_samples: 0
+    n_positive_samples: 1
+    shuffle: true
+    incontext: 
+      number: 5
+      method: random
+    dataset: ../datasets/metrics-eval/bbn/metrics-eval-train-renamed.json
+
+alignment_target_override:
+  id: ADEPT-metrics_eval-alignment-target-train-HIGH
+  kdma_values:
+    - {kdma: MoralDesert, value: 1}
diff --git a/align_system/algorithms/llama_2_single_kdma_adm.py b/align_system/algorithms/llama_2_single_kdma_adm.py
index ca7557d8..403978c9 100644
--- a/align_system/algorithms/llama_2_single_kdma_adm.py
+++ b/align_system/algorithms/llama_2_single_kdma_adm.py
@@ -789,11 +789,15 @@ def __call__(self, sample, target_kdma_values, **kwargs):
         if 'incontext' in kwargs:
             possible_samples = []
 
-            #sam has both info in first element and labels in second element
-            for sam in kwargs['dataset']:
-                if sam[0]['probe_id'] != sample['probe_id']:
+            # Read dataset
+            with open(kwargs['dataset']) as f:
+                dataset = json.load(f)
 
-                    possible_samples.append(sam)
+            #sam has both info in first element and labels in second element
+            for sam in dataset:
+                # if sam[0]['probe_id'] != sample['probe_id']:
+                # TODO: add a way to prevent having the sample as a knn if loading itself
+                possible_samples.append(sam)
 
             if len(possible_samples) < kwargs['incontext']['number']:
                 raise RuntimeError(f'Not enough possible incontext samples to learn from here.'
@@ -819,30 +823,30 @@ def __call__(self, sample, target_kdma_values, **kwargs):
             extra_prompts = ''.join(extra_prompts)
             prompt = extra_prompts + prompt
 
-        if 'retriever' in kwargs:
-            # retriever_prompt = "How would you treat the following injuries: {}".format(prompt)
-            retriever_prompt = "{}  {}".format(prompt, sample['probe'])
+        # if 'retriever' in kwargs:
+        #     # retriever_prompt = "How would you treat the following injuries: {}".format(prompt)
+        #     retriever_prompt = "{}  {}".format(prompt, sample['probe'])
 
-            retriever = kwargs['retriever']
-            retrieved_nodes = retriever.retrieve(retriever_prompt)
+        #     retriever = kwargs['retriever']
+        #     retrieved_nodes = retriever.retrieve(retriever_prompt)
 
-            if 'summarizer' in kwargs:
-                summarizer = kwargs['summarizer']
-                summary = summarizer.synthesize(retriever_prompt, nodes=retrieved_nodes)
+        #     if 'summarizer' in kwargs:
+        #         summarizer = kwargs['summarizer']
+        #         summary = summarizer.synthesize(retriever_prompt, nodes=retrieved_nodes)
 
-                log.explain("[bold] ** Retrieval Summary ** [/bold]",
-                            extra={"markup": True})
-                log.explain(summary)
+        #         log.explain("[bold] ** Retrieval Summary ** [/bold]",
+        #                     extra={"markup": True})
+        #         log.explain(summary)
 
-                prompt += "\n#############\n{}\n#############".format(summary)
+        #         prompt += "\n#############\n{}\n#############".format(summary)
 
-            else:
-                prompt += "\n#############\n{}\n#############".format(
-                    "\n#############\n".join((n.text for n in retrieved_nodes)))
+        #     else:
+        #         prompt += "\n#############\n{}\n#############".format(
+        #             "\n#############\n".join((n.text for n in retrieved_nodes)))
 
-            prompt += f'\nGiven the scenario and documentation above.. {sample["probe"]}'
-        else:
-            prompt += f'\n{sample["probe"]}'
+        #     prompt += f'\nGiven the scenario and documentation above.. {sample["probe"]}'
+        # else:
+        prompt += f'\n{sample["probe"]}'
 
         choices = sample['choices']
 

From 2b717a64981ff95b800b43ef45add9c2dd762ea4 Mon Sep 17 00:00:00 2001
From: Christopher <cfunk1210@gmail.com>
Date: Mon, 29 Apr 2024 18:21:21 -0400
Subject: [PATCH 06/11] WIP: getting incontext examples with tokens

---
 .../algorithms/llama_2_single_kdma_adm.py     | 29 +++++++++++++++----
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/align_system/algorithms/llama_2_single_kdma_adm.py b/align_system/algorithms/llama_2_single_kdma_adm.py
index 403978c9..76251263 100644
--- a/align_system/algorithms/llama_2_single_kdma_adm.py
+++ b/align_system/algorithms/llama_2_single_kdma_adm.py
@@ -216,6 +216,7 @@ def chat_prompt_tokens(self, dialogs, return_tensor=True):
     def build_multiple_choice_dialog(self,
                                      question,
                                      options,
+                                     incontext=None, 
                                      system_message=None,
                                      json_format=STANDARD_MULTIPLE_CHOICE_JSON_FORMAT):
         medical_triage_system_message = (
@@ -374,7 +375,7 @@ def respond_to_dialogs_batched(self, dialogs, prefixes=None):
 
         return generated_outputs
 
-    def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_samples=5, n_negative_sampels=5, shuffle=True, baseline=False, n_retries=3):
+    def aligned_decision_maker(self, question, choices, target_kdmas, incontext=None, n_positive_samples=5, n_negative_sampels=5, shuffle=True, baseline=False, n_retries=3):
         inference_pairs = []
         if not baseline:
             unsupported_kdmas = {kdma_remapping.get(k, k)
@@ -406,7 +407,8 @@ def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_sam
             dialog = self.build_multiple_choice_dialog(
                 question,
                 shuffled_choices,
-                system_message=system_message)
+                system_message=system_message,
+                incontext=incontext)
 
 
             if not logged_aligned_dialog:
@@ -766,6 +768,18 @@ def format_single_incontext_prompt(self, sample, labels):
         if sample['state'] is not None:
             prompt += f'\n{sample["state"]}'
 
+
+
+        [f'({i}) {option}' for i, option in enumerate(options)]
+        {
+            "role": "user",
+            "content": system_message
+        },
+        {
+            "role": "assistant",
+            "content": invalid_json
+        }
+
         for choice, label in zip(sample['choices'],labels):
             level = 'high' if list(label.values())[0] > 5 else 'low'
             attribute = list(label.keys())[0].replace('_', ' ')
@@ -788,6 +802,7 @@ def __call__(self, sample, target_kdma_values, **kwargs):
 
         if 'incontext' in kwargs:
             possible_samples = []
+            incontext_prompts = []
 
             # Read dataset
             with open(kwargs['dataset']) as f:
@@ -809,10 +824,11 @@ def __call__(self, sample, target_kdma_values, **kwargs):
             else:
                 raise(f'"{kwargs["incontext"]["method"]}" is not a valid incontext method.  Please use "random", ')
 
-            incontext_prompt_start = '  Here are some examples of similar problems with their attributes. '
+            # incontext_prompt_start = '  Here are some examples of similar problems with their attributes. '
 
 
-            extra_prompts = [incontext_prompt_start]
+            # extra_prompts = [incontext_prompt_start]
+            
             ci =  1
             for cs, cl in chosen_sample:
                 extra_prompts.append(f'  Example {ci}' + self.format_single_incontext_prompt(cs, cl))
@@ -821,7 +837,7 @@ def __call__(self, sample, target_kdma_values, **kwargs):
             extra_prompts.append('  Given these similar examples, please answer the question for the following scenario. ')
 
             extra_prompts = ''.join(extra_prompts)
-            prompt = extra_prompts + prompt
+            # prompt = extra_prompts + prompt
 
         # if 'retriever' in kwargs:
         #     # retriever_prompt = "How would you treat the following injuries: {}".format(prompt)
@@ -867,6 +883,7 @@ def __call__(self, sample, target_kdma_values, **kwargs):
             prompt,
             choices,
             alignment_target,
+            incontext=None,
             n_positive_samples=kwargs.get('n_positive_samples', 5),
             n_negative_samples=kwargs.get('n_negative_samples', 5),
             baseline=kwargs.get('baseline', False),
@@ -1100,7 +1117,7 @@ def populate_tagging_parameters(self, scenario_state, tagging_action, alignment_
 
             parsed_tagging_output = self.attempt_generic_parse(  # noqa
                 raw_tagging_response, ['Reasoning', 'Answer', 'Tag'])  # noqa
-
+            
             if parsed_tagging_output is not None:
                 if len(untagged_characters) == 1:
                     log.debug("** Force selecting only available character")

From abcaa76e070f050d0338431d262d06adc1dc47c6 Mon Sep 17 00:00:00 2001
From: Christopher <cfunk1210@gmail.com>
Date: Mon, 6 May 2024 18:45:45 -0400
Subject: [PATCH 07/11] Updated incontext examples to be like the assistent and
 also now about to use bert similarity to pick items

---
 .gitignore                                    |   1 +
 .vscode/launch.json                           |  52 ++++
 ...ig.yml => single_kdma_adm_config_high.yml} |   0
 ...single_kdma_adm_config_high_incontext.yml} |   2 +-
 adm_configs/single_kdma_adm_config_low.yml    |  17 ++
 .../single_kdma_adm_config_low_incontext.yml  |  21 ++
 .../algorithms/llama_2_single_kdma_adm.py     | 287 ++++++++++++++----
 7 files changed, 326 insertions(+), 54 deletions(-)
 create mode 100644 .vscode/launch.json
 rename adm_configs/{single_kdma_adm_config.yml => single_kdma_adm_config_high.yml} (100%)
 rename adm_configs/{single_kdma_adm_config_incontext.yml => single_kdma_adm_config_high_incontext.yml} (84%)
 create mode 100644 adm_configs/single_kdma_adm_config_low.yml
 create mode 100644 adm_configs/single_kdma_adm_config_low_incontext.yml

diff --git a/.gitignore b/.gitignore
index 70a21c83..56492e38 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 run.bash
+results/*
 
 venv/
 __pycache__/
\ No newline at end of file
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 00000000..9170e854
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,52 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "High Incontext Adept",
+            "type": "debugpy",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "module": "align_system.cli.run_align_system",
+            "args": [
+                    "TA3ActionBased",
+                    "--adm-config", "adm_configs/single_kdma_adm_config_high_incontext.yml",
+                    "--username", "kitware-single-kdma-adm-aligned-no-negatives",
+                    "--align-to-target",
+                    "--session-type", "adept",
+                    "--api_endpoint", "http://127.0.0.1:8080",
+                    "--loglevel", "DEBUG",
+                    "--logfile-path", "${workspaceFolder}/results/high_incontext/output.log",
+                    "--save-input-output-to-path", "${workspaceFolder}/results/high_incontext/input-output.json",
+                    "--save-alignment-score-to-path", "${workspaceFolder}/results/high_incontext/output-scores.json"
+                    ],
+            "env":  {
+                    "CUDA_VISIBLE_DEVICES": "1"
+                    }
+        },
+        {
+            "name": "Low Incontext Adept",
+            "type": "debugpy",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "module": "align_system.cli.run_align_system",
+            "args": [
+                    "TA3ActionBased",
+                    "--adm-config", "adm_configs/single_kdma_adm_config_low_incontext.yml",
+                    "--username", "kitware-single-kdma-adm-aligned-no-negatives",
+                    "--align-to-target",
+                    "--session-type", "adept",
+                    "--api_endpoint", "http://127.0.0.1:8080",
+                    "--loglevel", "DEBUG",
+                    "--logfile-path", "${workspaceFolder}/results/low_incontext/output.log",
+                    "--save-input-output-to-path", "${workspaceFolder}/results/low_incontext/input-output.json",
+                    "--save-alignment-score-to-path", "${workspaceFolder}/results/low_incontext/output-scores.json"
+                    ],
+            "env":  {
+                    "CUDA_VISIBLE_DEVICES": "0"
+                    }
+        }
+    ]
+}
\ No newline at end of file
diff --git a/adm_configs/single_kdma_adm_config.yml b/adm_configs/single_kdma_adm_config_high.yml
similarity index 100%
rename from adm_configs/single_kdma_adm_config.yml
rename to adm_configs/single_kdma_adm_config_high.yml
diff --git a/adm_configs/single_kdma_adm_config_incontext.yml b/adm_configs/single_kdma_adm_config_high_incontext.yml
similarity index 84%
rename from adm_configs/single_kdma_adm_config_incontext.yml
rename to adm_configs/single_kdma_adm_config_high_incontext.yml
index 88e40153..d564d779 100644
--- a/adm_configs/single_kdma_adm_config_incontext.yml
+++ b/adm_configs/single_kdma_adm_config_high_incontext.yml
@@ -13,7 +13,7 @@ adm:
     incontext: 
       number: 5
       method: random
-    dataset: ../datasets/metrics-eval/bbn/metrics-eval-train-renamed.json
+      dataset: ../datasets/metrics-eval/bbn/metrics-eval-train-renamed.json
 
 alignment_target_override:
   id: ADEPT-metrics_eval-alignment-target-train-HIGH
diff --git a/adm_configs/single_kdma_adm_config_low.yml b/adm_configs/single_kdma_adm_config_low.yml
new file mode 100644
index 00000000..55fd28e2
--- /dev/null
+++ b/adm_configs/single_kdma_adm_config_low.yml
@@ -0,0 +1,17 @@
+adm:
+  name: 'SingleKDMAADM'
+  init_kwargs:
+    hf_model: meta-llama/Llama-2-7b-chat-hf
+    precision: half
+    temperature: 0.7
+
+  inference_kwargs:
+    baseline: true
+    n_negative_samples: 0
+    n_positive_samples: 1
+    shuffle: true
+
+alignment_target_override:
+  id: ADEPT-metrics_eval-alignment-target-train-LOW
+  kdma_values:
+    - {kdma: MoralDesert, value: 0}
diff --git a/adm_configs/single_kdma_adm_config_low_incontext.yml b/adm_configs/single_kdma_adm_config_low_incontext.yml
new file mode 100644
index 00000000..90a65b35
--- /dev/null
+++ b/adm_configs/single_kdma_adm_config_low_incontext.yml
@@ -0,0 +1,21 @@
+adm:
+  name: 'SingleKDMAADM'
+  init_kwargs:
+    hf_model: meta-llama/Llama-2-7b-chat-hf
+    precision: half
+    temperature: 0.7
+
+  inference_kwargs:
+    baseline: true
+    n_negative_samples: 0
+    n_positive_samples: 1
+    shuffle: true
+    incontext: 
+      number: 5
+      method: random
+      dataset: ../datasets/metrics-eval/bbn/metrics-eval-train-renamed.json
+
+alignment_target_override:
+  id: ADEPT-metrics_eval-alignment-target-train-LOW
+  kdma_values:
+    - {kdma: MoralDesert, value: 0}
diff --git a/align_system/algorithms/llama_2_single_kdma_adm.py b/align_system/algorithms/llama_2_single_kdma_adm.py
index 76251263..6717dd86 100644
--- a/align_system/algorithms/llama_2_single_kdma_adm.py
+++ b/align_system/algorithms/llama_2_single_kdma_adm.py
@@ -4,6 +4,7 @@
 import os
 import pathlib
 import random
+from functools import reduce
 
 from jinja2.exceptions import TemplateError
 
@@ -214,41 +215,76 @@ def chat_prompt_tokens(self, dialogs, return_tensor=True):
 
 
     def build_multiple_choice_dialog(self,
-                                     question,
-                                     options,
-                                     incontext=None, 
-                                     system_message=None,
-                                     json_format=STANDARD_MULTIPLE_CHOICE_JSON_FORMAT):
+                                    question,
+                                    options,
+                                    incontext=None,
+                                    system_message=None,
+                                    json_format=STANDARD_MULTIPLE_CHOICE_JSON_FORMAT):
+        """
+        Constructs a structured dialog for a multiple-choice question scenario, typically used in systems
+        that require a step-by-step reasoning response in a controlled format such as JSON.
+
+        Parameters:
+            question (str): The question to be answered, forming the basis of the multiple-choice scenario.
+            options (list of str): The multiple-choice options provided as answers to the question.
+            incontext (dict, optional): Additional context that might be relevant to how the system message
+                or user response is formed. Defaults to None.
+            system_message (str, optional): A predefined system message that guides the assistant on how to
+                respond. If None, a default message tailored for medical triage scenarios is used.
+            json_format (str): The JSON format that the user response should adhere to, providing structural
+                guidance on how the response should be formatted.
+
+        Returns:
+            list of dict: A list where each dictionary represents a part of the dialog, with 'role' specifying
+                whether the part is from the 'system' or the 'user', and 'content' containing the text for that role.
+
+        Notes:
+            The function defaults to using a specialized system message for medical triage unless a specific
+            message is provided. This default message instructs the assistant on how to approach and format
+            the response to the question in a methodical and informative manner.
+        """
+        # Define a default system message for medical triage if none is provided
         medical_triage_system_message = (
             "You are an assistant specialized in answering multiple-choice questions related to "
             "medical triage. When a question is posed, carefully analyze the symptoms or conditions "
             "described. Respond by providing a detailed reasoning using a step-by-step process or "
             "process of elimination. Conclude with the final answer, represented by the corresponding "
             "index number. Your response should be grounded in established medical knowledge and aim "
-            "to be informative. Please format your response as a JSON object in the following structure:\\n\\n"
+            "to be informative. Please format your response as a JSON object in the following structure:\n\n"
             f"{json_format}"
             "Ensure that you adhere to proper JSON syntax, and carefully note the index that corresponds to each answer."
         )
         if system_message is None:
             system_message = medical_triage_system_message
 
+        # Format the multiple choice options for display
         formatted_options = [f'({i}) {option}' for i, option in enumerate(options)]
 
-        content = f'{question} {formatted_options}'
+        content = f'{question} {" ".join(formatted_options)}'
+        if incontext:
+            dialog =  list(reduce(lambda x, y: x + y, incontext, []))
+        else:
+            dialog = []
 
-        dialog = [
+        # Construct the dialog with system and user parts
+        
+        s_message = [
             {
                 "role": "system",
                 "content": system_message
-            },
+            }
+        ]  
+        u_message = [
             {
                 "role": "user",
                 "content": content
             }
         ]
+        dialog = s_message + dialog + u_message
 
         return dialog
 
+
     def log_dialog(self, dialog):
         for e in dialog:
             if e.get('role') == 'system':
@@ -376,19 +412,53 @@ def respond_to_dialogs_batched(self, dialogs, prefixes=None):
         return generated_outputs
 
     def aligned_decision_maker(self, question, choices, target_kdmas, incontext=None, n_positive_samples=5, n_negative_sampels=5, shuffle=True, baseline=False, n_retries=3):
+        """ Executes a decision-making process by simulating a dialog based on positive and negative alignments with specified Knowledge Domain Model Attributes (KDMAs). 
+        It attempts to identify the choice that best aligns with the target attributes, using both positive and negative samples to provide robustness against biases.
+
+        Parameters:
+            question (str): The primary question posed to the decision-making system.
+            choices (list of str): A list of choices from which the system must select the most appropriate based on KDAMs.
+            target_kdmas (dict): Key-value pairs indicating the target KDMAs and their desired levels. Values indicate desired thresholds for alignment.
+            incontext (dict, optional): Additional context provided to the decision-making system, which may affect its responses.
+            n_positive_samples (int): Number of samples to process assuming positive alignment with the target KDMAs.
+            n_negative_samples (int): Number of samples to process assuming negative or inverse alignment with the target KDMAs.
+            shuffle (bool): If True, shuffle the choices to potentially reduce positional bias in the decision-making process.
+            baseline (bool): If True, use a baseline decision-making model that does not consider specific KDMAs.
+            n_retries (int): The number of retry attempts to parse a successful response from the decision-making process.
+
+        Returns:
+            tuple:
+                responses (list): A list of dictionaries where each dictionary contains the response from the decision-making system, the reasoning behind it, and the index of the chosen answer.
+                inference_pairs (list): A list of dictionaries capturing detailed information about each inference attempt for analysis and debugging.
+
+        Raises:
+            RuntimeError: If any specified KDAMs in `target_kdmas` are not supported by the system.
+
+        Notes:
+            This function leverages logging to trace both aligned and misaligned dialogs, only the first of each type is logged for brevity. 
+        """
+
         inference_pairs = []
+
+
+        # Check if baseline is not used and handle unsupported KDMAs
         if not baseline:
             unsupported_kdmas = {kdma_remapping.get(k, k)
                                  for k in target_kdmas.keys()} - kdmas
             if len(unsupported_kdmas) > 0:
                 raise RuntimeError(f"KDMA(s) {unsupported_kdmas} not supported.")
 
+
+        # Prefix for logging reasoning
         prefix = '{"Reasoning": "Because'
 
         responses = []
 
+        # Flags to ensure we log certain types of dialog once
         logged_aligned_dialog = False
         logged_inverse_misaligned_dialog = False
+
+        # Generate responses for positive samples
         for _ in range(n_positive_samples):
             if baseline:
                 system_message = load_system_message()
@@ -399,24 +469,27 @@ def aligned_decision_maker(self, question, choices, target_kdmas, incontext=None
                                     for kdma, value in target_kdmas.items()}
                 system_message = load_system_message(system_message_keys)
 
-            indecies = list(range(len(choices)))
+            # Shuffle choices if required
+            indices = list(range(len(choices)))
             if shuffle:
-                random.shuffle(indecies)
-            shuffled_choices = [choices[i] for i in indecies]
+                random.shuffle(indices)
+            shuffled_choices = [choices[i] for i in indices]
 
+            # Build dialog with the system message and shuffled choices
             dialog = self.build_multiple_choice_dialog(
                 question,
                 shuffled_choices,
                 system_message=system_message,
                 incontext=incontext)
 
-
+            # Log aligned dialog once for clarity
             if not logged_aligned_dialog:
                 log.debug("[bold]*ALIGNED DIALOG*[/bold]",
                           extra={"markup": True})
                 self.log_dialog(dialog)
                 logged_aligned_dialog = True
 
+            # Attempt to parse a valid response multiple times
             good_parse = False
             for i in range(n_retries):
                 high_response, inference_pair = self.respond_to_dialog(dialog, prefix=prefix)
@@ -428,42 +501,48 @@ def aligned_decision_maker(self, question, choices, target_kdmas, incontext=None
                 except RuntimeError as e:
                     pass
 
+            # Fallback parsing strategy if normal parsing fails
             if not good_parse:
                 reasoning, answer_idx, parse_method = Llama2SingleKDMAADM.bert_similarity_parse(high_response, shuffled_choices)
 
+            # Ensure an answer was parsed successfully
             log.explain('CHOSEN ANSWER IDX %s %s', answer_idx, shuffled_choices)
             assert answer_idx is not None, f'Failed to parse answer index from generated output: {low_response}'
 
+            # Store response details
             responses.append({
                 'response': high_response,
                 'reasoning': reasoning,
                 'answer_idx': answer_idx,
-                'shuffle_indecies': indecies,
+                'shuffle_indices': indices,
                 'alignment': system_message_keys,
                 'aligned': True,
                 'parse_method': parse_method,
             })
-
+        # Repeat process for negative samples with inverse KDAM logic
         for _ in range(n_negative_sampels):
             system_message_keys = {kdma: 'high' if not value > 5 else 'low'
                                     for kdma, value in target_kdmas.items()}
 
-            indecies = list(range(len(choices)))
+            indices = list(range(len(choices)))
             if shuffle:
-                random.shuffle(indecies)
-            shuffled_choices = [choices[i] for i in indecies]
+                random.shuffle(indices)
+            shuffled_choices = [choices[i] for i in indices]
 
+             # Build dialog with inverse logic
             inverse_misaligned_dialog = self.build_multiple_choice_dialog(
                 question,
                 shuffled_choices,
                 system_message=load_system_message(system_message_keys))
 
+            # Log the first occurrence of an inverse misaligned dialog
             if not logged_inverse_misaligned_dialog:
                 log.debug("[bold]*INVERSE MISALIGNED DIALOG*[/bold]",
                             extra={"markup": True})
                 self.log_dialog(inverse_misaligned_dialog)
                 logged_inverse_misaligned_dialog = True
 
+            # Attempt response parsing with retries
             good_parse = False
             for i in range(n_retries):
                 low_response, inference_pair = self.respond_to_dialog(inverse_misaligned_dialog, prefix=prefix)
@@ -475,16 +554,18 @@ def aligned_decision_maker(self, question, choices, target_kdmas, incontext=None
                 except RuntimeError as e:
                     pass
 
+            # Fallback parsing strategy if normal parsing fails
             if not good_parse:
                 reasoning, answer_idx, parse_method = Llama2SingleKDMAADM.bert_similarity_parse(low_response, shuffled_choices)
 
             assert answer_idx is not None, f'Failed to parse answer index from generated output: {low_response}'
 
+            # Store response details
             responses.append({
                 'response': low_response,
                 'reasoning': reasoning,
                 'answer_idx': answer_idx,
-                'shuffle_indecies': indecies,
+                'shuffle_indices': indices,
                 'alignment': system_message_keys,
                 'aligned': False,
                 'parse_method': parse_method,
@@ -495,6 +576,23 @@ def aligned_decision_maker(self, question, choices, target_kdmas, incontext=None
 
     @staticmethod
     def calculate_votes(responses, choices):
+        """
+        Calculates voting scores for each choice based on a list of responses. Responses that align with the desired outcome increase the score of the selected choice. Misaligned responses distribute a penalty among other choices.
+
+        Parameters:
+            responses (list of dicts): Each dictionary contains information about a single response, including:
+                - 'answer_idx' (int or str): The index of the chosen answer.
+                - 'shuffle_indices' (list of int, optional): If present, it represents the original indices of the choices after shuffling.
+                - 'aligned' (bool): Indicates whether the response is aligned (True) or misaligned (False) with the desired outcome.
+            choices (list of str): A list of choices available for voting.
+
+        Returns:
+            list of float: A list of normalized vote scores for each choice, where higher scores represent greater alignment with the desired outcome.
+
+        Notes:
+            - The function handles cases where 'answer_idx' may not be an integer or could be out of the valid range of choices.
+            - Scores are adjusted by the minimum score to ensure all are non-negative and are then normalized to sum to 1.
+        """
         choice_votes = [0] * len(choices)
         for response in responses:
             answer_idx = response['answer_idx']
@@ -509,8 +607,8 @@ def calculate_votes(responses, choices):
             if answer_idx >= len(choices):
                 continue
 
-            if 'shuffle_indecies' in response:
-                answer_idx = response['shuffle_indecies'][int(answer_idx)]
+            if 'shuffle_indices' in response:
+                answer_idx = response['shuffle_indices'][int(answer_idx)]
 
             aligned = response['aligned']
 
@@ -717,11 +815,50 @@ def correct_json(self, invalid_json, verbose=True):
             return None
 
     def run_aligned_decision_maker_with_voting(
-            self, prompt, choices, alignment_target, n_positive_samples=5, n_negative_samples=5, baseline=False, shuffle=False):
+            self, 
+            prompt, 
+            choices, 
+            alignment_target, 
+            incontext= None,
+            n_positive_samples=5, 
+            n_negative_samples=5, 
+            baseline=False, 
+            shuffle=False):
+        """ Executes a decision-making process with voting based on alignment targets and user-provided choices. 
+        This method incorporates a mechanism for evaluating the alignment of choices with a specified target 
+        using a set of positive and negative samples.
+
+        Parameters:
+            prompt (str): The input prompt to which the decision-making model responds.
+            choices (list): A list of possible choices for the decision-maker to evaluate.
+            alignment_target (str): A target alignment criterion that guides the decision-making process.
+            incontext (list[dict], optional): Additional contextual information to provide to the model. Defaults to None.
+            n_positive_samples (int): Number of positive samples to use for aligning the choices with the target. Defaults to 5.
+            n_negative_samples (int): Number of negative samples to use for the alignment evaluation. Defaults to 5.
+            baseline (bool): Flag to determine whether to use a baseline model for comparison. Defaults to False.
+            shuffle (bool): Option to shuffle the choices before processing. This can help in reducing bias. Defaults to False.
+
+        Returns:
+            tuple: A tuple containing:
+                - reasoning (str or None): The reasoning behind the selected choice, if available.
+                - answer_idx (int): The index of the choice selected as most aligned.
+                - responses (list): Detailed responses from the model for each choice.
+                - inference_pairs (list): Raw data pairs used in the inference process.
+
+        Raises:
+            Exception: Captures and logs any exception that occurs during the vote calculation, defaulting choice scores to None if an error occurs.
+
+        Notes:
+            This method leverages internal logging to trace the detailed responses and the computation of choice scores. 
+            It is essential to ensure proper initialization of the logging and handling mechanisms to capture and utilize
+            the detailed debug outputs effectively.
+        
+        """
         responses, inference_pairs = self.aligned_decision_maker(
             prompt,
             choices,
             alignment_target,
+            incontext=incontext,
             baseline=baseline,
             n_positive_samples=n_positive_samples,
             n_negative_sampels=n_negative_samples,
@@ -755,40 +892,69 @@ def run_aligned_decision_maker_with_voting(
 
         for r in responses:
             assert r['answer_idx'] is not None
-            assert int(r['answer_idx']) < len(r['shuffle_indecies'])
+            assert int(r['answer_idx']) < len(r['shuffle_indices'])
 
-            if r['shuffle_indecies'][int(r['answer_idx'])] == answer_idx:
+            if r['shuffle_indices'][int(r['answer_idx'])] == answer_idx:
                 reasoning = r['reasoning']
                 break
 
         return reasoning, answer_idx, responses, inference_pairs
 
-    def format_single_incontext_prompt(self, sample, labels):
-        prompt = sample['scenario']
+
+    def format_single_incontext_prompt(self, sample, labels, target_kdma_values):
+        """
+        Formats an in-context prompt for use in a dialogue system where an assistant provides reasoned answers based on provided scores and target values.
+
+        Parameters:
+            sample (dict): A dictionary containing the scenario description and choices.
+                - 'scenario' (str): The main question or scenario description.
+                - 'state' (str, optional): Additional state information to append to the scenario.
+                - 'choices' (list of str): Possible choices for the scenario.
+            labels (list of dicts): A list where each dictionary contains scores associated with each choice.
+            target_kdma_values (dict): A dictionary mapping a target attribute to its desired value.
+
+        Returns:
+            list of dicts: A list containing structured data for the user and assistant roles, formatted for a dialogue interaction.
+                - Each dictionary contains 'role' (str) indicating if it's 'user' or 'assistant' and 'content' (str) which is the text associated with that role.
+
+        Notes:
+            This function assumes there is exactly one key in `target_kdma_values`.
+            The assistant's answer includes reasoning why a particular choice is preferred based on the minimal distance to the target value in `labels`.
+        """
+        # Combine scenario and additional state information if available
+        question = sample['scenario']
         if sample['state'] is not None:
-            prompt += f'\n{sample["state"]}'
+            question += f'\n{sample["state"]}'
 
+        # Format choices as enumerated options for display
+        formatted_options = [f'({i}) {option}' for i, option in enumerate(sample['choices'])]
+        content = f"{question} {' '.join(formatted_options)}"
 
+        # Calculate the distance of each choice's score to the target value
+        target = next(iter(target_kdma_values))
+        dist_to_target = [abs(score[target] - target_kdma_values[target]) for score in labels]
+        correct_answer_idx = np.argmin(dist_to_target)
 
-        [f'({i}) {option}' for i, option in enumerate(options)]
-        {
-            "role": "user",
-            "content": system_message
-        },
-        {
-            "role": "assistant",
-            "content": invalid_json
+        # Map the target attribute to its descriptive wording
+        target_wording_mapping = {
+            "moral_deservingness": "morally deserving choice.",
+            "maximization": "maximization choice."
         }
 
-        for choice, label in zip(sample['choices'],labels):
-            level = 'high' if list(label.values())[0] > 5 else 'low'
-            attribute = list(label.keys())[0].replace('_', ' ')
-            prompt += f'  If you had a {level} {attribute}, you would select {choice}.'
-
-        return prompt
+        # Determine the adjective based on the value being higher or lower than a threshold
+        adjective = 'more' if target_kdma_values[target] > 5 else 'less'
+        # Construct the reasoning text using the correct choice and its descriptor
+        answer_text = f'Because {sample["choices"][correct_answer_idx]} is the {adjective} {target_wording_mapping.get(target, "specified attribute")}'
+        answer = f'{{"Reasoning": "{answer_text}", "answer": {correct_answer_idx}}}'
 
+        # Structure the dialog with user and assistant roles
+        prompt = [
+            {"role": "user", "content": content},
+            {"role": "assistant", "content": answer}
+        ]
 
-        #TODO: add prompt completetion here for choices as well.
+        return prompt
+        
 
 
     def __call__(self, sample, target_kdma_values, **kwargs):
@@ -805,13 +971,13 @@ def __call__(self, sample, target_kdma_values, **kwargs):
             incontext_prompts = []
 
             # Read dataset
-            with open(kwargs['dataset']) as f:
+            with open(kwargs['incontext']['dataset']) as f:
                 dataset = json.load(f)
 
             #sam has both info in first element and labels in second element
             for sam in dataset:
                 # if sam[0]['probe_id'] != sample['probe_id']:
-                # TODO: add a way to prevent having the sample as a knn if loading itself
+                # TODO: add a way to prevent (or ensure) having the sample as a knn if loading itself
                 possible_samples.append(sam)
 
             if len(possible_samples) < kwargs['incontext']['number']:
@@ -821,22 +987,37 @@ def __call__(self, sample, target_kdma_values, **kwargs):
 
             if kwargs['incontext']['method'] == 'random':
                 chosen_sample = random.sample(possible_samples, kwargs['incontext']['number'])
+            elif kwargs['incontext']['method'] == 'bert_similarity':
+                # Extract Strings for each situation
+                possible_samples_parse = []
+                for s, _ in possible_samples:
+                    question = s['scenario']
+                    if s['state'] is not None:
+                        question += f'\n{s["state"]}'
+                    possible_samples_parse.append(question)
+
+                # Create similarity scores between incontext dataset and find topk indices
+                from bert_score import score 
+                _, _, F1 = score([prompt]*len(possible_samples_parse), possible_samples_parse, lang='en')
+                _, indices = torch.topk(F1,  kwargs['incontext']['number'])
+
+                # Make list of the top k for creating prompts
+                chosen_sample = []
+                for i in indices:
+                    chosen_sample.append(possible_samples[i])
             else:
-                raise(f'"{kwargs["incontext"]["method"]}" is not a valid incontext method.  Please use "random", ')
-
-            # incontext_prompt_start = '  Here are some examples of similar problems with their attributes. '
+                raise(f'"{kwargs["incontext"]["method"]}" is not a valid incontext method.  Please use "random or bert_similarity", ')
 
 
-            # extra_prompts = [incontext_prompt_start]
-            
+            incontext_prompts = []
             ci =  1
             for cs, cl in chosen_sample:
-                extra_prompts.append(f'  Example {ci}' + self.format_single_incontext_prompt(cs, cl))
+                incontext_prompts.append(self.format_single_incontext_prompt(cs, cl, target_kdma_values))
                 ci += 1
 
-            extra_prompts.append('  Given these similar examples, please answer the question for the following scenario. ')
+            # extra_prompts.append('  Given these similar examples, please answer the question for the following scenario. ')
 
-            extra_prompts = ''.join(extra_prompts)
+            # extra_prompts = ''.join(extra_prompts)
             # prompt = extra_prompts + prompt
 
         # if 'retriever' in kwargs:
@@ -883,7 +1064,7 @@ def __call__(self, sample, target_kdma_values, **kwargs):
             prompt,
             choices,
             alignment_target,
-            incontext=None,
+            incontext=incontext_prompts,
             n_positive_samples=kwargs.get('n_positive_samples', 5),
             n_negative_samples=kwargs.get('n_negative_samples', 5),
             baseline=kwargs.get('baseline', False),

From ce57312eeeedb5a0d89ae4085114172b606c9910 Mon Sep 17 00:00:00 2001
From: Christopher <cfunk1210@gmail.com>
Date: Mon, 20 May 2024 17:41:39 -0400
Subject: [PATCH 08/11] Added Incontext learning dataset update

Adding new incontext pulled from dataset extracted by running on the TA3 system.
---
 .../algorithms/llama_2_single_kdma_adm.py     | 50 ++++++++++++-------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/align_system/algorithms/llama_2_single_kdma_adm.py b/align_system/algorithms/llama_2_single_kdma_adm.py
index 6717dd86..00f77e64 100644
--- a/align_system/algorithms/llama_2_single_kdma_adm.py
+++ b/align_system/algorithms/llama_2_single_kdma_adm.py
@@ -900,16 +900,15 @@ def run_aligned_decision_maker_with_voting(
 
         return reasoning, answer_idx, responses, inference_pairs
 
-
     def format_single_incontext_prompt(self, sample, labels, target_kdma_values):
         """
         Formats an in-context prompt for use in a dialogue system where an assistant provides reasoned answers based on provided scores and target values.
 
         Parameters:
             sample (dict): A dictionary containing the scenario description and choices.
-                - 'scenario' (str): The main question or scenario description.
-                - 'state' (str, optional): Additional state information to append to the scenario.
-                - 'choices' (list of str): Possible choices for the scenario.
+                - 'prompt' (str): The main question or scenario description.
+                - 'choices' (list of dicts): Possible choices for the scenario.
+                    - Each choice is a dictionary with an 'unstructured' key containing the choice text.
             labels (list of dicts): A list where each dictionary contains scores associated with each choice.
             target_kdma_values (dict): A dictionary mapping a target attribute to its desired value.
 
@@ -921,18 +920,33 @@ def format_single_incontext_prompt(self, sample, labels, target_kdma_values):
             This function assumes there is exactly one key in `target_kdma_values`.
             The assistant's answer includes reasoning why a particular choice is preferred based on the minimal distance to the target value in `labels`.
         """
-        # Combine scenario and additional state information if available
-        question = sample['scenario']
-        if sample['state'] is not None:
-            question += f'\n{sample["state"]}'
+        # Mapping of target attributes to their corresponding score keys
+        kdma_name_map = {
+            'moral_deservingness': 'MoralDesert',
+            'maximization': 'maximization',
+        }
+
+        # Extract the main question from the sample
+        question = sample['prompt']
 
         # Format choices as enumerated options for display
-        formatted_options = [f'({i}) {option}' for i, option in enumerate(sample['choices'])]
+        formatted_options = [f'({i}) {option["unstructured"]}' for i, option in enumerate(sample['choices'])]
         content = f"{question} {' '.join(formatted_options)}"
 
-        # Calculate the distance of each choice's score to the target value
+        # Extract the target attribute (assuming there's only one key in target_kdma_values)
         target = next(iter(target_kdma_values))
-        dist_to_target = [abs(score[target] - target_kdma_values[target]) for score in labels]
+
+        # Calculate the distance of each choice's score to the target value
+        dist_to_target = []
+        for score in labels:
+            if kdma_name_map[target] in score:
+                # Multiply by 10 to match the rest of the KDMA's score range
+                dist = abs(score[kdma_name_map[target]] * 10 - target_kdma_values[target])
+            else: 
+                dist = float('inf')  # If the target attribute is not in the scores, assign an infinite distance
+            dist_to_target.append(dist)    
+
+        # Determine the index of the choice with the minimum distance to the target value
         correct_answer_idx = np.argmin(dist_to_target)
 
         # Map the target attribute to its descriptive wording
@@ -943,8 +957,9 @@ def format_single_incontext_prompt(self, sample, labels, target_kdma_values):
 
         # Determine the adjective based on the value being higher or lower than a threshold
         adjective = 'more' if target_kdma_values[target] > 5 else 'less'
+
         # Construct the reasoning text using the correct choice and its descriptor
-        answer_text = f'Because {sample["choices"][correct_answer_idx]} is the {adjective} {target_wording_mapping.get(target, "specified attribute")}'
+        answer_text = f'Because {sample["choices"][correct_answer_idx]["unstructured"]} is the {adjective} {target_wording_mapping.get(target, "specified attribute")}'
         answer = f'{{"Reasoning": "{answer_text}", "answer": {correct_answer_idx}}}'
 
         # Structure the dialog with user and assistant roles
@@ -954,7 +969,6 @@ def format_single_incontext_prompt(self, sample, labels, target_kdma_values):
         ]
 
         return prompt
-        
 
 
     def __call__(self, sample, target_kdma_values, **kwargs):
@@ -990,10 +1004,8 @@ def __call__(self, sample, target_kdma_values, **kwargs):
             elif kwargs['incontext']['method'] == 'bert_similarity':
                 # Extract Strings for each situation
                 possible_samples_parse = []
-                for s, _ in possible_samples:
-                    question = s['scenario']
-                    if s['state'] is not None:
-                        question += f'\n{s["state"]}'
+                for s in possible_samples:
+                    question = s['input']['prompt']
                     possible_samples_parse.append(question)
 
                 # Create similarity scores between incontext dataset and find topk indices
@@ -1011,8 +1023,8 @@ def __call__(self, sample, target_kdma_values, **kwargs):
 
             incontext_prompts = []
             ci =  1
-            for cs, cl in chosen_sample:
-                incontext_prompts.append(self.format_single_incontext_prompt(cs, cl, target_kdma_values))
+            for cs in chosen_sample:
+                incontext_prompts.append(self.format_single_incontext_prompt(cs['input'], cs['label'], target_kdma_values))
                 ci += 1
 
             # extra_prompts.append('  Given these similar examples, please answer the question for the following scenario. ')

From 1d835cfc2e30abdb8585304a6d56a1f103cd1366 Mon Sep 17 00:00:00 2001
From: Christopher <cfunk1210@gmail.com>
Date: Mon, 20 May 2024 17:42:23 -0400
Subject: [PATCH 09/11] Adding more about the incontext learning (adding to
 previous commit)

---
 .../algorithms/llama_2_single_kdma_adm.py     | 66 ++++++++++---------
 1 file changed, 35 insertions(+), 31 deletions(-)

diff --git a/align_system/algorithms/llama_2_single_kdma_adm.py b/align_system/algorithms/llama_2_single_kdma_adm.py
index 00f77e64..deb85166 100644
--- a/align_system/algorithms/llama_2_single_kdma_adm.py
+++ b/align_system/algorithms/llama_2_single_kdma_adm.py
@@ -970,67 +970,72 @@ def format_single_incontext_prompt(self, sample, labels, target_kdma_values):
 
         return prompt
 
-
     def __call__(self, sample, target_kdma_values, **kwargs):
-        """ Build the prompt and send to the LLM to ask for a single KDMA
+        """
+        Build the prompt and send it to the LLM to ask for a single KDMA (Key Decision-Making Attribute).
 
+        Parameters:
+            sample (dict): A dictionary containing the scenario, state, probe, and choices.
+                - 'scenario' (str): The main scenario description.
+                - 'state' (str, optional): Additional state information to append to the scenario.
+                - 'probe' (str): The specific question or probe to be answered.
+                - 'choices' (list of str): Possible choices for the scenario.
+            target_kdma_values (dict): A dictionary mapping a target attribute to its desired value.
+            kwargs (dict): Additional keyword arguments for in-context learning, retrievers, labels, etc.
+                - 'incontext' (dict, optional): Configuration for in-context learning.
+                    - 'dataset' (str): Path to the in-context dataset.
+                    - 'number' (int): Number of in-context samples to use.
+                    - 'method' (str): Method to select in-context samples ('random' or 'bert_similarity').
+                - 'labels' (list of dicts, optional): A list where each dictionary contains scores associated with each choice.
+                - 'n_positive_samples' (int, optional): Number of positive samples for decision making.
+                - 'n_negative_samples' (int, optional): Number of negative samples for decision making.
+                - 'baseline' (bool, optional): Whether to use a baseline approach.
+                - 'shuffle' (bool, optional): Whether to shuffle the choices.
 
+        Returns:
+            dict: A dictionary containing the selected choice and additional information.
+                - 'choice' (int): The index of the selected choice.
+                - 'info' (dict): Additional information including reasoning, responses, and raw data.
         """
         prompt = sample['scenario']
         if sample['state'] is not None:
             prompt += f'\n{sample["state"]}'
 
+        incontext_prompts = []
+
         if 'incontext' in kwargs:
             possible_samples = []
-            incontext_prompts = []
 
             # Read dataset
             with open(kwargs['incontext']['dataset']) as f:
                 dataset = json.load(f)
 
-            #sam has both info in first element and labels in second element
+            # Populate possible samples from the dataset
             for sam in dataset:
-                # if sam[0]['probe_id'] != sample['probe_id']:
-                # TODO: add a way to prevent (or ensure) having the sample as a knn if loading itself
                 possible_samples.append(sam)
 
             if len(possible_samples) < kwargs['incontext']['number']:
-                raise RuntimeError(f'Not enough possible incontext samples to learn from here.'
-                        f'Only {len(possible_samples)} samples while asking for'
-                        f'{kwargs["incontext"]["number"]} in context samples')
+                raise RuntimeError(f'Not enough possible in-context samples to learn from. Only {len(possible_samples)} samples available while asking for {kwargs["incontext"]["number"]} in-context samples.')
 
             if kwargs['incontext']['method'] == 'random':
                 chosen_sample = random.sample(possible_samples, kwargs['incontext']['number'])
             elif kwargs['incontext']['method'] == 'bert_similarity':
-                # Extract Strings for each situation
-                possible_samples_parse = []
-                for s in possible_samples:
-                    question = s['input']['prompt']
-                    possible_samples_parse.append(question)
+                # Extract strings for each situation
+                possible_samples_parse = [s['input']['prompt'] for s in possible_samples]
 
-                # Create similarity scores between incontext dataset and find topk indices
+                # Create similarity scores between the in-context dataset and find top-k indices
                 from bert_score import score 
                 _, _, F1 = score([prompt]*len(possible_samples_parse), possible_samples_parse, lang='en')
-                _, indices = torch.topk(F1,  kwargs['incontext']['number'])
+                _, indices = torch.topk(F1, kwargs['incontext']['number'])
 
                 # Make list of the top k for creating prompts
-                chosen_sample = []
-                for i in indices:
-                    chosen_sample.append(possible_samples[i])
+                chosen_sample = [possible_samples[i] for i in indices]
             else:
-                raise(f'"{kwargs["incontext"]["method"]}" is not a valid incontext method.  Please use "random or bert_similarity", ')
+                raise ValueError(f'"{kwargs["incontext"]["method"]}" is not a valid in-context method. Please use "random" or "bert_similarity".')
 
-
-            incontext_prompts = []
-            ci =  1
+            # Create in-context prompts
             for cs in chosen_sample:
                 incontext_prompts.append(self.format_single_incontext_prompt(cs['input'], cs['label'], target_kdma_values))
-                ci += 1
-
-            # extra_prompts.append('  Given these similar examples, please answer the question for the following scenario. ')
-
-            # extra_prompts = ''.join(extra_prompts)
-            # prompt = extra_prompts + prompt
 
         # if 'retriever' in kwargs:
         #     # retriever_prompt = "How would you treat the following injuries: {}".format(prompt)
@@ -1055,10 +1060,9 @@ def __call__(self, sample, target_kdma_values, **kwargs):
 
         #     prompt += f'\nGiven the scenario and documentation above.. {sample["probe"]}'
         # else:
-        prompt += f'\n{sample["probe"]}'
 
+        prompt += f'\n{sample["probe"]}'
         choices = sample['choices']
-
         labels = kwargs.get('labels', {})
 
         alignment_target = None

From d61baa9c914b7f4b4b5b3795747b2193b0b8edac Mon Sep 17 00:00:00 2001
From: Christopher <cfunk1210@gmail.com>
Date: Mon, 20 May 2024 17:43:41 -0400
Subject: [PATCH 10/11] Adding configs for different testing (high, low,
 incontext, and baseline) to run.  Also adding launch.json to help others with
 running on VScode

---
 .vscode/launch.json                           | 77 ++++++++++++++++++-
 .../single_kdma_adm_config_baseline.yml       | 17 ++++
 adm_configs/single_kdma_adm_config_high.yml   |  2 +-
 .../single_kdma_adm_config_high_incontext.yml |  7 +-
 adm_configs/single_kdma_adm_config_low.yml    |  2 +-
 .../single_kdma_adm_config_low_incontext.yml  |  5 +-
 6 files changed, 100 insertions(+), 10 deletions(-)
 create mode 100644 adm_configs/single_kdma_adm_config_baseline.yml

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 9170e854..93ace822 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -20,7 +20,8 @@
                     "--loglevel", "DEBUG",
                     "--logfile-path", "${workspaceFolder}/results/high_incontext/output.log",
                     "--save-input-output-to-path", "${workspaceFolder}/results/high_incontext/input-output.json",
-                    "--save-alignment-score-to-path", "${workspaceFolder}/results/high_incontext/output-scores.json"
+                    "--save-alignment-score-to-path", "${workspaceFolder}/results/high_incontext/output-scores.json",
+                    "--training-session"
                     ],
             "env":  {
                     "CUDA_VISIBLE_DEVICES": "1"
@@ -42,10 +43,80 @@
                     "--loglevel", "DEBUG",
                     "--logfile-path", "${workspaceFolder}/results/low_incontext/output.log",
                     "--save-input-output-to-path", "${workspaceFolder}/results/low_incontext/input-output.json",
-                    "--save-alignment-score-to-path", "${workspaceFolder}/results/low_incontext/output-scores.json"
+                    "--save-alignment-score-to-path", "${workspaceFolder}/results/low_incontext/output-scores.json",
+                    "--training-session"
                     ],
             "env":  {
-                    "CUDA_VISIBLE_DEVICES": "0"
+                    "CUDA_VISIBLE_DEVICES": "1"
+                    }
+        },
+        {
+            "name": "High Adept",
+            "type": "debugpy",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "module": "align_system.cli.run_align_system",
+            "args": [
+                    "TA3ActionBased",
+                    "--adm-config", "adm_configs/single_kdma_adm_config_high.yml",
+                    "--username", "kitware-single-kdma-adm-aligned-no-negatives",
+                    "--align-to-target",
+                    "--session-type", "adept",
+                    "--api_endpoint", "http://127.0.0.1:8080",
+                    "--loglevel", "DEBUG",
+                    "--logfile-path", "${workspaceFolder}/results/high/output.log",
+                    "--save-input-output-to-path", "${workspaceFolder}/results/high/input-output.json",
+                    "--save-alignment-score-to-path", "${workspaceFolder}/results/high/output-scores.json",
+                    "--training-session"
+                    ],
+            "env":  {
+                    "CUDA_VISIBLE_DEVICES": "2"
+                    }
+        },
+        {
+            "name": "Low Adept",
+            "type": "debugpy",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "module": "align_system.cli.run_align_system",
+            "args": [
+                    "TA3ActionBased",
+                    "--adm-config", "adm_configs/single_kdma_adm_config_low.yml",
+                    "--username", "kitware-single-kdma-adm-aligned-no-negatives",
+                    "--align-to-target",
+                    "--session-type", "adept",
+                    "--api_endpoint", "http://127.0.0.1:8080",
+                    "--loglevel", "DEBUG",
+                    "--logfile-path", "${workspaceFolder}/results/low/output.log",
+                    "--save-input-output-to-path", "${workspaceFolder}/results/low/input-output.json",
+                    "--save-alignment-score-to-path", "${workspaceFolder}/results/low/output-scores.json",
+                    "--training-session"
+                    ],
+            "env":  {
+                    "CUDA_VISIBLE_DEVICES": "3"
+                    }
+        },
+        {
+            "name": "Baseline Adept",
+            "type": "debugpy",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "module": "align_system.cli.run_align_system",
+            "args": [
+                    "TA3ActionBased",
+                    "--adm-config", "adm_configs/single_kdma_adm_config_baseline.yml",
+                    "--username", "kitware-single-kdma-adm-aligned-no-negatives",
+                    "--align-to-target",
+                    "--session-type", "adept",
+                    "--api_endpoint", "http://127.0.0.1:8080",
+                    "--loglevel", "DEBUG",
+                    "--logfile-path", "${workspaceFolder}/results/baseline/output.log",
+                    "--save-input-output-to-path", "${workspaceFolder}/results/baseline/input-output.json",
+                    "--save-alignment-score-to-path", "${workspaceFolder}/results/baseline/output-scores.json",
+                    "--training-session"
+                    ],
+            "env":  {
+                    "CUDA_VISIBLE_DEVICES": "3"
                     }
         }
     ]
diff --git a/adm_configs/single_kdma_adm_config_baseline.yml b/adm_configs/single_kdma_adm_config_baseline.yml
new file mode 100644
index 00000000..55fd28e2
--- /dev/null
+++ b/adm_configs/single_kdma_adm_config_baseline.yml
@@ -0,0 +1,17 @@
+adm:
+  name: 'SingleKDMAADM'
+  init_kwargs:
+    hf_model: meta-llama/Llama-2-7b-chat-hf
+    precision: half
+    temperature: 0.7
+
+  inference_kwargs:
+    baseline: true
+    n_negative_samples: 0
+    n_positive_samples: 1
+    shuffle: true
+
+alignment_target_override:
+  id: ADEPT-metrics_eval-alignment-target-train-LOW
+  kdma_values:
+    - {kdma: MoralDesert, value: 0}
diff --git a/adm_configs/single_kdma_adm_config_high.yml b/adm_configs/single_kdma_adm_config_high.yml
index 384427f8..646c27c2 100644
--- a/adm_configs/single_kdma_adm_config_high.yml
+++ b/adm_configs/single_kdma_adm_config_high.yml
@@ -6,7 +6,7 @@ adm:
     temperature: 0.7
 
   inference_kwargs:
-    baseline: true
+    baseline: false
     n_negative_samples: 0
     n_positive_samples: 1
     shuffle: true
diff --git a/adm_configs/single_kdma_adm_config_high_incontext.yml b/adm_configs/single_kdma_adm_config_high_incontext.yml
index d564d779..51c9762b 100644
--- a/adm_configs/single_kdma_adm_config_high_incontext.yml
+++ b/adm_configs/single_kdma_adm_config_high_incontext.yml
@@ -6,14 +6,15 @@ adm:
     temperature: 0.7
 
   inference_kwargs:
-    baseline: true
+    baseline: false
     n_negative_samples: 0
     n_positive_samples: 1
     shuffle: true
     incontext: 
       number: 5
-      method: random
-      dataset: ../datasets/metrics-eval/bbn/metrics-eval-train-renamed.json
+      method: bert_similarity
+      # dataset: ../datasets/metrics-eval/bbn/metrics-eval-train-renamed.json
+      dataset: /data/shared/samba/integrated_results_metrics_eval/captured_dataset_for_chris/baseline_adept_high-1715105775-input-output.json
 
 alignment_target_override:
   id: ADEPT-metrics_eval-alignment-target-train-HIGH
diff --git a/adm_configs/single_kdma_adm_config_low.yml b/adm_configs/single_kdma_adm_config_low.yml
index 55fd28e2..70a9d648 100644
--- a/adm_configs/single_kdma_adm_config_low.yml
+++ b/adm_configs/single_kdma_adm_config_low.yml
@@ -6,7 +6,7 @@ adm:
     temperature: 0.7
 
   inference_kwargs:
-    baseline: true
+    baseline: false
     n_negative_samples: 0
     n_positive_samples: 1
     shuffle: true
diff --git a/adm_configs/single_kdma_adm_config_low_incontext.yml b/adm_configs/single_kdma_adm_config_low_incontext.yml
index 90a65b35..e8fb6567 100644
--- a/adm_configs/single_kdma_adm_config_low_incontext.yml
+++ b/adm_configs/single_kdma_adm_config_low_incontext.yml
@@ -6,14 +6,15 @@ adm:
     temperature: 0.7
 
   inference_kwargs:
-    baseline: true
+    baseline: false
     n_negative_samples: 0
     n_positive_samples: 1
     shuffle: true
     incontext: 
       number: 5
       method: random
-      dataset: ../datasets/metrics-eval/bbn/metrics-eval-train-renamed.json
+      # dataset: ../datasets/metrics-eval/bbn/metrics-eval-train-renamed.json
+      dataset: /data/shared/samba/integrated_results_metrics_eval/captured_dataset_for_chris/baseline_adept_high-1715105775-input-output.json
 
 alignment_target_override:
   id: ADEPT-metrics_eval-alignment-target-train-LOW

From 6f2ee925844803a5a7a731e29a5603186e27475e Mon Sep 17 00:00:00 2001
From: Christopher <cfunk1210@gmail.com>
Date: Wed, 22 May 2024 13:47:00 -0400
Subject: [PATCH 11/11] Adding more configs

---
 .vscode/launch.json                           | 33 ++++++++++++++++---
 .../single_kdma_adm_config_high_baseline.yml  | 17 ++++++++++
 ...> single_kdma_adm_config_low_baseline.yml} |  0
 .../single_kdma_adm_config_low_incontext.yml  |  2 +-
 4 files changed, 46 insertions(+), 6 deletions(-)
 create mode 100644 adm_configs/single_kdma_adm_config_high_baseline.yml
 rename adm_configs/{single_kdma_adm_config_baseline.yml => single_kdma_adm_config_low_baseline.yml} (100%)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 93ace822..3b2439cb 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -97,22 +97,45 @@
                     }
         },
         {
-            "name": "Baseline Adept",
+            "name": "High Baseline Adept",
             "type": "debugpy",
             "request": "launch",
             "console": "integratedTerminal",
             "module": "align_system.cli.run_align_system",
             "args": [
                     "TA3ActionBased",
-                    "--adm-config", "adm_configs/single_kdma_adm_config_baseline.yml",
+                    "--adm-config", "adm_configs/single_kdma_adm_config_high_baseline.yml",
                     "--username", "kitware-single-kdma-adm-aligned-no-negatives",
                     "--align-to-target",
                     "--session-type", "adept",
                     "--api_endpoint", "http://127.0.0.1:8080",
                     "--loglevel", "DEBUG",
-                    "--logfile-path", "${workspaceFolder}/results/baseline/output.log",
-                    "--save-input-output-to-path", "${workspaceFolder}/results/baseline/input-output.json",
-                    "--save-alignment-score-to-path", "${workspaceFolder}/results/baseline/output-scores.json",
+                    "--logfile-path", "${workspaceFolder}/results/high_baseline/output.log",
+                    "--save-input-output-to-path", "${workspaceFolder}/results/high_baseline/input-output.json",
+                    "--save-alignment-score-to-path", "${workspaceFolder}/results/high_baseline/output-scores.json",
+                    "--training-session"
+                    ],
+            "env":  {
+                    "CUDA_VISIBLE_DEVICES": "3"
+                    }
+        },
+        {
+            "name": "Low Baseline Adept",
+            "type": "debugpy",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "module": "align_system.cli.run_align_system",
+            "args": [
+                    "TA3ActionBased",
+                    "--adm-config", "adm_configs/single_kdma_adm_config_low_baseline.yml",
+                    "--username", "kitware-single-kdma-adm-aligned-no-negatives",
+                    "--align-to-target",
+                    "--session-type", "adept",
+                    "--api_endpoint", "http://127.0.0.1:8080",
+                    "--loglevel", "DEBUG",
+                    "--logfile-path", "${workspaceFolder}/results/low_baseline/output.log",
+                    "--save-input-output-to-path", "${workspaceFolder}/low_baseline/baseline/input-output.json",
+                    "--save-alignment-score-to-path", "${workspaceFolder}/low_baseline/baseline/output-scores.json",
                     "--training-session"
                     ],
             "env":  {
diff --git a/adm_configs/single_kdma_adm_config_high_baseline.yml b/adm_configs/single_kdma_adm_config_high_baseline.yml
new file mode 100644
index 00000000..384427f8
--- /dev/null
+++ b/adm_configs/single_kdma_adm_config_high_baseline.yml
@@ -0,0 +1,17 @@
+adm:
+  name: 'SingleKDMAADM'
+  init_kwargs:
+    hf_model: meta-llama/Llama-2-7b-chat-hf
+    precision: half
+    temperature: 0.7
+
+  inference_kwargs:
+    baseline: true
+    n_negative_samples: 0
+    n_positive_samples: 1
+    shuffle: true
+
+alignment_target_override:
+  id: ADEPT-metrics_eval-alignment-target-train-HIGH
+  kdma_values:
+    - {kdma: MoralDesert, value: 1}
diff --git a/adm_configs/single_kdma_adm_config_baseline.yml b/adm_configs/single_kdma_adm_config_low_baseline.yml
similarity index 100%
rename from adm_configs/single_kdma_adm_config_baseline.yml
rename to adm_configs/single_kdma_adm_config_low_baseline.yml
diff --git a/adm_configs/single_kdma_adm_config_low_incontext.yml b/adm_configs/single_kdma_adm_config_low_incontext.yml
index e8fb6567..a23452cb 100644
--- a/adm_configs/single_kdma_adm_config_low_incontext.yml
+++ b/adm_configs/single_kdma_adm_config_low_incontext.yml
@@ -12,7 +12,7 @@ adm:
     shuffle: true
     incontext: 
       number: 5
-      method: random
+      method: bert_similarity
       # dataset: ../datasets/metrics-eval/bbn/metrics-eval-train-renamed.json
       dataset: /data/shared/samba/integrated_results_metrics_eval/captured_dataset_for_chris/baseline_adept_high-1715105775-input-output.json