diff --git a/benchtools/benchmark.py b/benchtools/benchmark.py index fb1607d..ca571f4 100644 --- a/benchtools/benchmark.py +++ b/benchtools/benchmark.py @@ -123,7 +123,7 @@ def from_folders(cls, bench_path): for task_dir in task_list: # load the tasks task_path = os.path.join(task_folder, task_dir) - task = Task.from_txt_csv(task_path) + task = Task.from_txt_csv(task_path, source_path=bench_path) tasks.append(task) else: tasks = [] diff --git a/benchtools/runner.py b/benchtools/runner.py index c8a3c64..6426054 100644 --- a/benchtools/runner.py +++ b/benchtools/runner.py @@ -27,7 +27,8 @@ def __init__(self, runner_type='ollama', model='gemma3:1b', api=None): self.model = model api_default = {'ollama_api': "http://localhost:11434", 'openai':"https://api.openai.com/v1", - 'ollama':""} + 'ollama':"", + 'bedrock':""} if api: self.api = api else: diff --git a/benchtools/task.py b/benchtools/task.py index 2707c00..065fc4a 100644 --- a/benchtools/task.py +++ b/benchtools/task.py @@ -170,10 +170,18 @@ def from_txt_csv(cls, task_path, task_name = None, with open(os.path.join(task_path, "template.txt"), "r") as f: prompt = f.read() + info_file = os.path.join(task_path,'info.yml') + if os.path.exists(info_file): + with open(info_file, "r") as f: + info_dict = yaml.safe_load(f) + else: + info_dict= {} + values_file = os.path.join(task_path, "values.csv") # load and strip whitespace from column names and values value_answer_df = pd.read_csv(values_file).rename(columns=lambda x: x.strip()).applymap(lambda x: x.strip() if isinstance(x, str) else x) + # TODO: Check info_dict for calculated? if 'reference' in value_answer_df.columns: variant_values = value_answer_df.drop(columns='reference').to_dict(orient='records') @@ -193,14 +201,6 @@ def from_txt_csv(cls, task_path, task_name = None, else: description = f"a template based task with template: {prompt} and values like:\n\n {value_answer_df.head().to_markdown()}" - info_file = os.path.join(task_path,'task.yml') - if os.path.exists(info_file): - with open(info_file, "r") as f: - info_dict = yaml.safe_load(f) - else: - info_dict= {} - - return cls(task_name, template= prompt, variant_values = variant_values, description = description, diff --git a/demos/folderbench/multiple_models.yml b/demos/folderbench/multiple_models.yml index fbb7d54..33b6d91 100644 --- a/demos/folderbench/multiple_models.yml +++ b/demos/folderbench/multiple_models.yml @@ -1,4 +1,4 @@ -runner_type: ollama +runner_type: bedrock model: - - 'llama3.2' - - 'gemma3' + - 'meta.llama3-70b-instruct-v1:0' + - 'google.gemma-3-4b-it '