From e3a10fa1bece3f6cd54ac3117155541beee14f0c Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Wed, 31 Dec 2025 22:48:23 +0200 Subject: [PATCH 1/3] Formats ORCA method and basis set strings Normalizes method strings to ORCA-friendly labels and addresses the wb97xd deprecation by suggesting alternatives. Formats basis set strings to ORCA formatting (e.g., def2tzvp -> def2-TZVP). Fixed the opt and fine opt keywords to be more inline with 5.0.4 - 6.0.0 --- arc/job/adapters/orca.py | 54 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/arc/job/adapters/orca.py b/arc/job/adapters/orca.py index d64340af7b..26e01255d9 100644 --- a/arc/job/adapters/orca.py +++ b/arc/job/adapters/orca.py @@ -32,6 +32,50 @@ logger = get_logger() +ORCA_METHOD_ALIASES = { + 'wb97xd3': 'wb97x-d3', +} + + +def _format_orca_method(method: str) -> str: + """ + Convert ARC method names to ORCA-friendly labels when needed. + """ + if not method: + return method + if method.lower() == 'wb97xd': + logger.warning('ORCA does not support wb97xd; use wb97x or wb97x-d3.') + return ORCA_METHOD_ALIASES.get(method.lower(), method) + + +def _format_orca_basis_token(token: str) -> str: + """ + Convert def2 basis tokens to ORCA formatting (e.g., def2tzvp -> def2-tzvp). + """ + if not token: + return token + parts = token.split('/') + base = parts[0] + if base.lower().startswith('def2'): + base_rest = base[4:] + if base_rest.startswith('-'): + base_rest = base_rest[1:] + if base_rest: + base = f"def2-{base_rest.lower()}" + if len(parts) > 1: + parts = [base] + [part.lower() for part in parts[1:]] + return '/'.join(parts) + return base + + +def _format_orca_basis(basis: str) -> str: + """ + Convert basis strings to ORCA-friendly labels where applicable. + """ + if not basis: + return basis + return ' '.join(_format_orca_basis_token(token) for token in basis.split()) + default_job_settings, global_ess_settings, input_filenames, output_filenames, servers, submit_filenames = \ settings['default_job_settings'], settings['global_ess_settings'], settings['input_filenames'], \ settings['output_filenames'], settings['servers'], settings['submit_filenames'] @@ -219,13 +263,13 @@ def write_input_file(self) -> None: 'keywords', ]: input_dict[key] = '' - input_dict['auxiliary_basis'] = self.level.auxiliary_basis or '' - input_dict['basis'] = self.level.basis or '' + input_dict['auxiliary_basis'] = _format_orca_basis(self.level.auxiliary_basis or '') + input_dict['basis'] = _format_orca_basis(self.level.basis or '') input_dict['charge'] = self.charge input_dict['cpus'] = self.cpu_cores input_dict['label'] = self.species_label input_dict['memory'] = self.input_file_memory - input_dict['method'] = self.level.method + input_dict['method'] = _format_orca_method(self.level.method) input_dict['multiplicity'] = self.multiplicity input_dict['xyz'] = xyz_to_str(self.xyz) @@ -241,9 +285,9 @@ def write_input_file(self) -> None: input_dict['method_class'] = 'KS' # DFT grid must be the same for both opt and freq if self.fine: - self.add_to_args(val='Grid6 NoFinalGrid', key1='keyword') + self.add_to_args(val='defgrid3', key1='keyword') else: - self.add_to_args(val='Grid5 NoFinalGrid', key1='keyword') + self.add_to_args(val='defgrid2', key1='keyword') elif self.level.method_type == 'wavefunction': input_dict['method_class'] = 'HF' if 'dlpno' in self.level.method: From 31980488648be823a754c61071b77fdac3ca0a97 Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Wed, 31 Dec 2025 22:48:40 +0200 Subject: [PATCH 2/3] Adds tests for ORCA formatting helpers Adds unit tests for the ORCA method, basis set token, and basis set formatting helper functions. These tests ensure the correct conversion of basis set names and methods to the format expected by ORCA. --- arc/job/adapters/orca_test.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/arc/job/adapters/orca_test.py b/arc/job/adapters/orca_test.py index e8663c85b7..c2fd561aff 100644 --- a/arc/job/adapters/orca_test.py +++ b/arc/job/adapters/orca_test.py @@ -12,7 +12,11 @@ import unittest from arc.common import ARC_PATH -from arc.job.adapters.orca import OrcaAdapter +from arc.job.adapters.orca import (OrcaAdapter, + _format_orca_basis, + _format_orca_basis_token, + _format_orca_method, + ) from arc.level import Level from arc.settings.settings import input_filenames, output_filenames from arc.species import ARCSpecies @@ -173,6 +177,28 @@ def test_write_input_file_with_CPCM_solvation(self): """ self.assertEqual(content_3, job_3_expected_input_file) + def test_format_orca_method(self): + """Test ORCA method formatting helper.""" + self.assertEqual(_format_orca_method('wb97xd3'), 'wb97x-d3') + self.assertEqual(_format_orca_method('wb97xd'), 'wb97xd') + self.assertEqual(_format_orca_method('B3LYP'), 'B3LYP') + + def test_format_orca_basis_token(self): + """Test ORCA basis token formatting helper.""" + self.assertEqual(_format_orca_basis_token('def2tzvp'), 'def2-tzvp') + self.assertEqual(_format_orca_basis_token('def2-TZVP'), 'def2-tzvp') + self.assertEqual(_format_orca_basis_token('def2tzvp/c'), 'def2-tzvp/c') + self.assertEqual(_format_orca_basis_token('def2-TZVP/C'), 'def2-tzvp/c') + self.assertEqual(_format_orca_basis_token('cc-pvtz'), 'cc-pvtz') + + def test_format_orca_basis(self): + """Test ORCA basis formatting helper.""" + self.assertEqual(_format_orca_basis('def2tzvp'), 'def2-tzvp') + self.assertEqual(_format_orca_basis('def2-TZVP'), 'def2-tzvp') + self.assertEqual(_format_orca_basis('def2tzvp/c'), 'def2-tzvp/c') + self.assertEqual(_format_orca_basis('def2tzvp def2tzvp/c'), + 'def2-tzvp def2-tzvp/c') + def test_set_files(self): """Test setting files""" job_1_files_to_upload = [{'file_name': 'submit.sub', From 92ecf4197b84869289d81fbe3d506b9dadf59bfd Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Wed, 31 Dec 2025 22:48:59 +0200 Subject: [PATCH 3/3] Fixes ZPE extraction when 'Eh' is present Addresses an issue where the zero-point energy (ZPE) extraction fails when the "Eh" unit is explicitly present in the line. Improves the parsing logic to correctly identify and extract the ZPE value, regardless of whether the "Eh" unit is present or not. --- arc/parser/adapters/orca.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arc/parser/adapters/orca.py b/arc/parser/adapters/orca.py index 0d681f1a6c..ebe7b3b4e9 100644 --- a/arc/parser/adapters/orca.py +++ b/arc/parser/adapters/orca.py @@ -224,7 +224,11 @@ def parse_zpe_correction(self) -> Optional[float]: if 'Zero point energy' in line: # Example: Zero point energy ... 0.025410 Eh try: - zpe = float(line.split()[-2]) + parts = line.split() + if 'Eh' in parts: + zpe = float(parts[parts.index('Eh') - 1]) + else: + zpe = float(parts[-2]) break except (ValueError, IndexError): continue