Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added encoders and parsers for CREST to support energy, gradient, hes… #19

Merged
merged 1 commit into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
"qcparse",
"rotamer",
"rotamers",
"runtypes",
"singlepoint",
"spinmult",
"tcin",
"tcout",
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [unreleased]

### Added

- Encoders and parsers for CREST to support `energy`, `gradient`, `hessian`, and `optimization` calculations.

## [0.6.3] - 2024-09-12

### Added
Expand Down
60 changes: 56 additions & 4 deletions qcparse/encoders/crest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@
from qcparse.exceptions import EncoderError
from qcparse.models import NativeInput

SUPPORTED_CALCTYPES = {CalcType.conformer_search}
SUPPORTED_CALCTYPES = {
CalcType.conformer_search,
CalcType.optimization,
CalcType.energy,
CalcType.gradient,
CalcType.hessian,
}


def encode(inp_obj: ProgramInput) -> NativeInput:
Expand Down Expand Up @@ -42,13 +48,47 @@ def validate_input(inp_obj: ProgramInput):
"""
# These values come from other parts of the ProgramInput and should not be set
# in the keywords.
non_allowed_keywords = ["charge", "uhf", "runtype"]
non_allowed_keywords = ["charge", "uhf"]
for keyword in non_allowed_keywords:
if keyword in inp_obj.keywords:
raise EncoderError(
f"{keyword} should not be set in keywords for CREST. It is already set "
"on the Structure or ProgramInput elsewhere.",
)
if "runtype" in inp_obj.keywords:
_validate_runtype_calctype(inp_obj.keywords["runtype"], inp_obj.calctype)


def _validate_runtype_calctype(runtype: str, calctype: CalcType):
"""Validate that the runtype is supported for the calctype."""
invalid_runtype = False
valid_runtypes = set()

if calctype == CalcType.conformer_search:
valid_runtypes = {"imtd-gc", "imtd-smtd", "entropy", "nci", "nci-mtd"}
if runtype not in valid_runtypes:
invalid_runtype = True

elif calctype == CalcType.optimization:
valid_runtypes = {"optimize", "ancopt"}
if runtype not in valid_runtypes:
invalid_runtype = True

elif calctype in {CalcType.energy, CalcType.gradient}:
valid_runtypes = {"singlepoint"}
if runtype not in valid_runtypes:
invalid_runtype = True

elif calctype == CalcType.hessian:
valid_runtypes = {"numhess"}
if runtype not in valid_runtypes:
invalid_runtype = True

if invalid_runtype:
raise EncoderError(
f"Unsupported runtype {runtype} for calctype {calctype}. Valid runtypes "
f"are: {valid_runtypes}.",
)


def _to_toml_dict(inp_obj: ProgramInput, struct_filename: str) -> Dict[str, Any]:
Expand All @@ -64,8 +104,20 @@ def _to_toml_dict(inp_obj: ProgramInput, struct_filename: str) -> Dict[str, Any]
toml_dict.setdefault("threads", os.cpu_count())
toml_dict["input"] = struct_filename

# TODO: May need to deal with non-covalent mode at some point
toml_dict["runtype"] = "imtd-gc"
# Set default runtype if not already set
if "runtype" not in inp_obj.keywords:
if inp_obj.calctype == CalcType.conformer_search:
toml_dict["runtype"] = "imtd-gc"
elif inp_obj.calctype == CalcType.optimization:
toml_dict["runtype"] = "optimize"
elif inp_obj.calctype in {CalcType.energy, CalcType.gradient}:
toml_dict["runtype"] = "singlepoint"
elif inp_obj.calctype == CalcType.hessian:
toml_dict["runtype"] = "numhess"
else:
raise EncoderError(
f"Unsupported calctype {inp_obj.calctype} for CREST encoder.",
)

# Calculation level keywords
calculation = toml_dict.pop("calculation", {})
Expand Down
153 changes: 151 additions & 2 deletions qcparse/parsers/crest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
import re
from pathlib import Path
from typing import List, Optional, Union
from typing import Any, Dict, List, Optional, Union

import numpy as np
from qcio import ConformerSearchResults, Structure
from qcio import (
CalcType,
ConformerSearchResults,
OptimizationResults,
ProgramInput,
ProgramOutput,
Provenance,
SinglePointResults,
Structure,
)

from .utils import regex_search

Expand Down Expand Up @@ -87,3 +97,142 @@ def parse_conformer_search_dir(
rotamers=rotamers,
rotamer_energies=np.array(rotamer_energies),
)


def parse_energy_grad(text: str) -> SinglePointResults:
"""Parse the output of a CREST energy and gradient calculation.

Args:
text: The text of the output file.

Returns:
The parsed energy and gradient as a SinglePointResults object.
"""
# Parse the energy
energy_regex = r"# Energy \( Eh \)\n#*\n\s*([-\d.]+)"
gradient_regex = r"# Gradient \( Eh/a0 \)\n#\s*\n((?:\s*[-\d.]+\n)+)"

energy = float(regex_search(energy_regex, text).group(1))
gradient = np.array(
[float(x) for x in regex_search(gradient_regex, text).group(1).split()]
)
return SinglePointResults(
energy=energy,
gradient=gradient,
)


def parse_singlepoint_dir(
directory: Union[Path, str], filename: str = "crest.engrad"
) -> SinglePointResults:
"""Parse the output directory of a CREST single point calculation.

Args:
directory: Path to the directory containing the CREST output files.
filename: The name of the file containing the single point results.
Default is 'crest.engrad'.

Returns:
The parsed single point results as a SinglePointResults object.
"""
directory = Path(directory)
text = (directory / filename).read_text()

return parse_energy_grad(text)


def parse_numhess_dir(
directory: Union[Path, str],
filename: str = "numhess1",
stdout: Optional[str] = None,
) -> SinglePointResults:
"""Parse the output directory of a CREST numerical Hessian calculation.

Args:
directory: Path to the directory containing the CREST output files.
filename: The name of the file containing the numerical Hessian results.
Default is 'numhess1'.

Returns:
The parsed numerical Hessian results as a SinglePointResults object.
"""
data = (Path(directory) / filename).read_text()
float_regex = r"[-+]?\d*\.\d+|\d+"
numbers = re.findall(float_regex, data)
array = np.array(numbers, dtype=float)
spr_dict: Dict[str, Any] = {"hessian": array}
if stdout:
energy_regex = r"Energy\s*=\s*([-+]?\d+\.\d+)\s*Eh"
energy = float(regex_search(energy_regex, stdout).group(1))
spr_dict["energy"] = energy
return SinglePointResults(**spr_dict)


def parse_optimization_dir(
directory: Union[Path, str],
*,
inp_obj: ProgramInput,
stdout: str,
) -> OptimizationResults:
"""Parse the output directory of a CREST optimization calculation.

Args:
directory: Path to the directory containing the CREST output files.
inp_obj: The qcio ProgramInput object for the optimization.
stdout: The stdout from CREST.

Returns:
The parsed optimization results as a OptimizationResults object.
"""
# Read in the xyz file containing the trajectory
directory = Path(directory)
xyz_text = (directory / "crestopt.log").read_text()

# Parse structures and energies from the xyz file
structures = Structure.from_xyz_multi(
xyz_text,
charge=inp_obj.structure.charge,
multiplicity=inp_obj.structure.multiplicity,
)
energies = [
float(struct.extras[Structure._xyz_comment_key][1]) for struct in structures
]

# Fake gradient for each step because CREST does not output it
fake_gradient = np.zeros(len(inp_obj.structure.symbols) * 3)

# Parse program version
program_version = parse_version_string(stdout)

# Collect final gradient if calculation succeeded
try:
final_spr = parse_singlepoint_dir(directory)
except FileNotFoundError:
# Calculation failed, so we don't have the final energy or gradient
final_spr = SinglePointResults(gradient=fake_gradient)

# Create the optimization trajectory
trajectory: List[ProgramOutput] = [
ProgramOutput(
input_data=ProgramInput(
calctype=CalcType.gradient,
structure=struct,
model=inp_obj.model,
),
success=True,
results=SinglePointResults(energy=energy, gradient=fake_gradient),
provenance=Provenance(
program="crest",
program_version=program_version,
),
)
for struct, energy in zip(structures, energies)
]

# Fill in final gradient
# https://github.com/crest-lab/crest/issues/354
trajectory[-1].results.gradient[:] = final_spr.gradient

return OptimizationResults(
trajectory=trajectory,
)
20 changes: 20 additions & 0 deletions tests/data/crest_output/crest.engrad
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#
# Atoms
#
3
#
# Energy ( Eh )
#
-0.335557824179335
#
# Gradient ( Eh/a0 )
#
-0.005962071557911
-0.004419818102026
0.003139227894649
0.003048425211480
0.001982394235964
-0.001779667371498
0.002913646346432
0.002437423866062
-0.001359560523152
65 changes: 65 additions & 0 deletions tests/data/crest_output/crestopt.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
3
Etot= -4.7918798035
O -0.0934852751 -0.0692099762 0.0488995019
H 0.4307247549 1.6181264838 0.5296458319
H 1.0532005349 -0.5195317162 -1.3058451581
3
Etot= -4.9229264187
O 0.1136777730 0.0841383057 -0.0594737049
H 0.3707615594 1.3916895508 0.4552315157
H 0.9060006825 -0.4464430651 -1.1230576351
3
Etot= -5.0483521241
O 0.2573182669 0.1904912574 -0.1346012710
H 0.3599743190 1.1289027403 0.3128185924
H 0.7731474289 -0.2900092063 -0.9055171457
3
Etot= -5.0170597610
O 0.1917723991 0.1420049385 -0.1002932645
H 0.4678576011 0.8951550442 0.0741564841
H 0.7308100147 -0.0077751914 -0.7011630439
3
Etot= -5.0491088307
O 0.0623819936 0.0460852580 -0.0326872105
H 0.5219250724 0.9797836792 0.0717912538
H 0.8061329488 0.0035158541 -0.7664038677
3
Etot= -5.0660326493
O 0.1987775715 0.1472783377 -0.1039067694
H 0.4093039819 1.0814532336 0.2382238721
H 0.7823584615 -0.1993467800 -0.8616169270
3
Etot= -5.0730217268
O 0.1770402418 0.1302188641 -0.0930982848
H 0.4376238607 1.0313507655 0.1821152037
H 0.7757759123 -0.1321848383 -0.8163167432
3
Etot= -5.0723078880
O 0.1736292273 0.1445536976 -0.0815172819
H 0.4380419413 1.0059759479 0.1669722835
H 0.7787688463 -0.1211448541 -0.8127548260
3
Etot= -5.0683269168
O 0.1851362467 0.1179180760 -0.1079633483
H 0.4523839201 0.9974657046 0.1483557889
H 0.7529198480 -0.0859989893 -0.7676922650
3
Etot= -5.0732163707
O 0.1760746513 0.1281388107 -0.0933864784
H 0.4401853431 1.0270846083 0.1771945599
H 0.7741800205 -0.1258386276 -0.8111079058
3
Etot= -5.0733841586
O 0.1769285244 0.1328773064 -0.0914470659
H 0.4405581451 1.0164576782 0.1706642632
H 0.7729533454 -0.1199501932 -0.8065170216
3
Etot= -5.0734021646
O 0.1782533830 0.1321019587 -0.0931605147
H 0.4400623060 1.0186302961 0.1723993514
H 0.7721243259 -0.1213474635 -0.8065386611
3
Etot= -5.0734025156
O 0.1788766949 0.1323436930 -0.0936142242
H 0.4397631667 1.0187613429 0.1727606527
H 0.7718001532 -0.1217202445 -0.8064462529
20 changes: 20 additions & 0 deletions tests/data/crest_output/numhess1
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
$hessian
0.02040569 -0.00018059 0.02080099 -0.02081319 0.01511689
0.00867078 0.00037976 -0.01495837 -0.02946283
-0.00018059 -0.01341723 -0.03209513 0.01368595 0.03374600
0.01874084 -0.01351862 -0.02035995 0.01336374
0.02080099 -0.03209513 0.00327178 0.00784908 0.01737681
-0.01812512 -0.02863169 0.01472059 0.01485103
-0.02081319 0.01368595 0.00784908 0.01933555 -0.01625843
-0.00694960 0.00149263 0.00258608 -0.00090575
0.01511689 0.03374600 0.01737681 -0.01625843 -0.03409225
-0.01710500 0.00114214 0.00035657 -0.00027546
0.00867078 0.01874084 -0.01812512 -0.00694960 -0.01710500
0.01843539 -0.00173455 -0.00164242 -0.00030677
0.00037976 -0.01351862 -0.02863169 0.00149263 0.00114214
-0.00173455 -0.00185964 0.01238496 0.03036359
-0.01495837 -0.02035995 0.01472059 0.00258608 0.00035657
-0.00164242 0.01238496 0.02002423 -0.01308397
-0.02946283 0.01336374 0.01485103 -0.00090575 -0.00027546
-0.00030677 0.03036359 -0.01308397 -0.01454546
$end
Loading
Loading