diff --git a/bin/neuralxc b/bin/neuralxc index b1de2f8..9263672 100644 --- a/bin/neuralxc +++ b/bin/neuralxc @@ -44,7 +44,7 @@ if __name__ == '__main__': adddat.set_defaults(func=add_data_driver) def inspectdat_driver(args): - subprocess.Popen('h5dump -n ' + args.hdf5, shell=True) + subprocess.Popen(f'h5dump -n {args.hdf5}', shell=True) #================ Inspect data ================ inspectdat = datsub.add_parser('inspect', description='Inspect data in hdf5 file') diff --git a/devtools/scripts/create_conda_env.py b/devtools/scripts/create_conda_env.py index 9668d3b..292af33 100644 --- a/devtools/scripts/create_conda_env.py +++ b/devtools/scripts/create_conda_env.py @@ -57,7 +57,7 @@ def temp_cd(): with open(args.conda_file, "r") as handle: yaml_script = loader(handle.read()) -python_replacement_string = "python {}*".format(args.python) +python_replacement_string = f"python {args.python}*" try: for dep_index, dep_value in enumerate(yaml_script['dependencies']): @@ -79,14 +79,17 @@ def temp_cd(): if conda_path is None: raise RuntimeError("Could not find a conda binary in CONDA_EXE variable or in executable search path") -print("CONDA ENV NAME {}".format(args.name)) -print("PYTHON VERSION {}".format(args.python)) -print("CONDA FILE NAME {}".format(args.conda_file)) -print("CONDA PATH {}".format(conda_path)) +print(f"CONDA ENV NAME {args.name}") +print(f"PYTHON VERSION {args.python}") +print(f"CONDA FILE NAME {args.conda_file}") +print(f"CONDA PATH {conda_path}") # Write to a temp directory which will always be cleaned up with temp_cd(): temp_file_name = "temp_script.yaml" with open(temp_file_name, 'w') as f: f.write(yaml.dump(yaml_script)) - sp.call("{} env create -n {} -f {}".format(conda_path, args.name, temp_file_name), shell=True) + sp.call( + f"{conda_path} env create -n {args.name} -f {temp_file_name}", + shell=True, + ) diff --git a/examples/scripts/apply_subset.py b/examples/scripts/apply_subset.py index d06d8b2..355d417 100644 --- a/examples/scripts/apply_subset.py +++ b/examples/scripts/apply_subset.py @@ -27,16 +27,14 @@ def get_structures_energies(path, unit=1): if __name__ == '__main__': - if len(sys.argv) == 5: - unit = float(sys.argv[4]) - else: - unit = kcalpmol - + unit = float(sys.argv[4]) if len(sys.argv) == 5 else kcalpmol print("using unit", unit) atoms = get_structures_energies(sys.argv[1], unit=unit) - if not sys.argv[2] == 'all': - subset = np.genfromtxt(sys.argv[2]).astype(int) - else: - subset = np.arange(len(atoms)) + subset = ( + np.arange(len(atoms)) + if sys.argv[2] == 'all' + else np.genfromtxt(sys.argv[2]).astype(int) + ) + atoms_subset = [atoms[s] for s in subset] write(sys.argv[3], atoms_subset) diff --git a/examples/scripts/fix_paths.py b/examples/scripts/fix_paths.py index 7889258..451876c 100644 --- a/examples/scripts/fix_paths.py +++ b/examples/scripts/fix_paths.py @@ -2,11 +2,7 @@ import os import sys -if len(sys.argv) > 1: - path = sys.argv[1] -else: - path = 'basis_sgdml_benzene.json' - +path = sys.argv[1] if len(sys.argv) > 1 else 'basis_sgdml_benzene.json' print('FILEPATH', path) basis = json.load(open(path, 'r')) diff --git a/neuralxc/_version.py b/neuralxc/_version.py index 1dc2b7f..030095b 100644 --- a/neuralxc/_version.py +++ b/neuralxc/_version.py @@ -24,8 +24,7 @@ def get_keywords(): git_refnames = "$Format:%d$" git_full = "$Format:%H$" git_date = "$Format:%ci$" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords + return {"refnames": git_refnames, "full": git_full, "date": git_date} class VersioneerConfig: @@ -85,20 +84,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= if e.errno == errno.ENOENT: continue if verbose: - print("unable to run %s" % dispcmd) + print(f"unable to run {dispcmd}") print(e) return None, None else: if verbose: - print("unable to find command, tried %s" % (commands, )) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) + print(f"unable to run {dispcmd} (error)") + print(f"stdout was {stdout}") return None, p.returncode return stdout, p.returncode @@ -112,7 +111,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): """ rootdirs = [] - for i in range(3): + for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return { @@ -122,12 +121,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): "error": None, "date": None } - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level + rootdirs.append(root) + root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix)) + print( + f"Tried directories {rootdirs} but none started with prefix {parentdir_prefix}" + ) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -140,21 +141,17 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() + with open(versionfile_abs, "r") as f: + for line in f: + if line.strip().startswith("git_refnames ="): + if mo := re.search(r'=\s*"(.*)"', line): + keywords["refnames"] = mo[1] + if line.strip().startswith("git_full ="): + if mo := re.search(r'=\s*"(.*)"', line): + keywords["full"] = mo[1] + if line.strip().startswith("git_date ="): + if mo := re.search(r'=\s*"(.*)"', line): + keywords["date"] = mo[1] except EnvironmentError: pass return keywords @@ -179,11 +176,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -192,17 +189,17 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) + print(f'likely tags: {",".join(sorted(tags))}') for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: - print("picking %s" % r) + print(f"picking {r}") return { "version": r, "full-revisionid": keywords["full"].strip(), @@ -230,21 +227,29 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - + GITS = ["git.cmd", "git.exe"] if sys.platform == "win32" else ["git"] out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: - print("Directory %s not under git control" % root) + print(f"Directory {root} not under git control") raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = run_command( - GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", - "%s*" % tag_prefix], cwd=root) + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + f"{tag_prefix}*", + ], + cwd=root, + ) + # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -254,11 +259,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - + pieces = {"long": full_out, "short": full_out[:7], "error": None} # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out @@ -280,7 +281,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): return pieces # tag - full_tag = mo.group(1) + full_tag = mo[1] if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" @@ -290,10 +291,10 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) + pieces["distance"] = int(mo[2]) # commit: short hex revision ID - pieces["short"] = mo.group(3) + pieces["short"] = mo[3] else: # HEX: no tags @@ -310,9 +311,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" + return "." if "+" in pieces.get("closest-tag", "") else "+" def render_pep440(pieces): @@ -329,13 +328,11 @@ def render_pep440(pieces): if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" + if pieces["dirty"]: + rendered += ".dirty" return rendered @@ -372,13 +369,13 @@ def render_pep440_post(pieces): if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] + rendered += f'g{pieces["short"]}' else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" - rendered += "+g%s" % pieces["short"] + rendered += f'+g{pieces["short"]}' return rendered @@ -394,13 +391,11 @@ def render_pep440_old(pieces): rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" + if pieces["dirty"]: + rendered += ".dev0" return rendered diff --git a/neuralxc/datastructures/hdf5.py b/neuralxc/datastructures/hdf5.py index 0a60663..b6df9ad 100644 --- a/neuralxc/datastructures/hdf5.py +++ b/neuralxc/datastructures/hdf5.py @@ -36,20 +36,18 @@ def add_species(file, system, traj_path=''): order = [system] cg = file #Current group - for idx, o in enumerate(order): - if not o in cg.keys(): - cg = cg.create_group(o) - else: - cg = cg[o] - - if not 'species' in cg.attrs: + for o in order: + cg = cg.create_group(o) if o not in cg.keys() else cg[o] + if 'species' not in cg.attrs: if not traj_path: raise Exception('Must provide a trajectory file to define species') - species = {} - for atoms in read(traj_path, ':'): - species[''.join(atoms.get_chemical_symbols())] = 0 - species = ''.join([key for key in species]) + species = { + ''.join(atoms.get_chemical_symbols()): 0 + for atoms in read(traj_path, ':') + } + + species = ''.join(list(species)) cg.attrs.update({'species': species}) @@ -78,16 +76,12 @@ def add_data(which, file, data, system, method, override=False, E0=None): """ order = [system, method] - if not which in ['energy', 'forces']: + if which not in ['energy', 'forces']: order.append('density') cg = file #Current group for idx, o in enumerate(order): - if not o in cg.keys(): - cg = cg.create_group(o) - else: - cg = cg[o] - + cg = cg.create_group(o) if o not in cg.keys() else cg[o] if which == 'energy': if E0 is None: cg.attrs.update({'E0': min(data)}) @@ -111,20 +105,26 @@ def create_dataset(): def merge_sets(file, datasets, density_key=None, new_name='merged', E0={}): - energies = [file[data + '/energy'][:] for data in datasets] + energies = [file[f'{data}/energy'][:] for data in datasets] if not E0: energies = [e - nxc.ml.utils.find_attr_in_tree(file, data, 'E0') for e, data in zip(energies, datasets)] forces_found = True try: - forces = [file[data + '/forces'][:] for data in datasets] + forces = [file[f'{data}/forces'][:] for data in datasets] except KeyError: forces_found = False if density_key: - densities = [file[data + '/density/' + density_key][:] for data in datasets] + densities = [file[f'{data}/density/{density_key}'][:] for data in datasets] + + densities_full = np.zeros( + [ + sum(len(d) for d in densities), + sum(d.shape[1] for d in densities), + ] + ) - densities_full = np.zeros([sum([len(d) for d in densities]), sum([d.shape[1] for d in densities])]) line_mark = 0 col_mark = 0 for d in densities: @@ -133,7 +133,10 @@ def merge_sets(file, datasets, density_key=None, new_name='merged', E0={}): col_mark += d.shape[1] if forces_found: - forces_full = np.zeros([sum([len(d) for d in forces]), max([d.shape[1] for d in forces]), 3]) + forces_full = np.zeros( + [sum(len(d) for d in forces), max(d.shape[1] for d in forces), 3] + ) + line_mark = 0 for f in forces: @@ -143,8 +146,10 @@ def merge_sets(file, datasets, density_key=None, new_name='merged', E0={}): species = [neuralxc.ml.utils.find_attr_in_tree(file, data, 'species') for data in datasets] if E0: energies = [ - e - sum([s.count(element) * value for element, value in E0.items()]) for e, s in zip(energies, species) + e - sum(s.count(element) * value for element, value in E0.items()) + for e, s in zip(energies, species) ] + species = [''.join(species)] energies = np.concatenate(energies) @@ -159,11 +164,11 @@ def merge_sets(file, datasets, density_key=None, new_name='merged', E0={}): file.create_group(new_name) file[new_name].attrs.update({'species': species}) file[new_name].attrs.update({'E0': 0}) - file.create_dataset(new_name + '/energy', data=energies) + file.create_dataset(f'{new_name}/energy', data=energies) if forces_found: - file.create_dataset(new_name + '/forces', data=forces_full) + file.create_dataset(f'{new_name}/forces', data=forces_full) if density_key: - file.create_dataset(new_name + '/density/' + density_key, data=densities_full) + file.create_dataset(f'{new_name}/density/{density_key}', data=densities_full) def basis_to_hash(basis): diff --git a/neuralxc/drivers/data.py b/neuralxc/drivers/data.py index 20a0ba7..2120574 100644 --- a/neuralxc/drivers/data.py +++ b/neuralxc/drivers/data.py @@ -59,7 +59,7 @@ def obs(which, zero): add_species(file, system, traj) forces = [a.get_forces()\ for a in read(traj,':')] - max_na = max([len(f) for f in forces]) + max_na = max(len(f) for f in forces) forces_padded = np.zeros([len(forces), max_na, 3]) for idx, f in enumerate(forces): forces_padded[idx, :len(f)] = f @@ -75,7 +75,7 @@ def obs(which, zero): else: raise Exception('Option {} not recognized'.format(which)) - if density and not 'density' in add: + if density and 'density' not in add: add.append('density') for observable in add: obs(observable, zero) @@ -97,11 +97,11 @@ def merge_data_driver(file, base, ref, out, optE0=False, pre=''): print('Warning: E0 is not being optimzed for merged dataset. Might produce' +\ 'unexpected behavior') - merge_sets(datafile, base, basis_key, new_name=out + '/base', E0=E0) + merge_sets(datafile, base, basis_key, new_name=f'{out}/base', E0=E0) for key in E0: E0[key] = 0 - merge_sets(datafile, ref, None, new_name=out + '/ref', E0=E0) + merge_sets(datafile, ref, None, new_name=f'{out}/ref', E0=E0) def split_data_driver(hdf5, group, label, slice=':', comp=''): @@ -114,15 +114,15 @@ def split_data_driver(hdf5, group, label, slice=':', comp=''): ijk = bi_slice(i, j, k) root = group - if not root[0] == '/': root = '/' + root + if root[0] != '/': + root = f'/{root}' def collect_all_sets(file, path): sets = {} if isinstance(file[path], h5py._hl.dataset.Dataset): return {path: file[path]} - else: - for key in file[path]: - sets.update(collect_all_sets(file, path + '/' + key)) + for key in file[path]: + sets.update(collect_all_sets(file, f'{path}/{key}')) return sets all_sets = collect_all_sets(file, root) @@ -136,10 +136,10 @@ def collect_all_sets(file, path): elif new_len != length: raise Exception('Datasets contained in group dont have consistent lengths') idx = path.find(group) + len(group) - new_path = path[:idx] + '/' + label + path[idx:] + new_path = f'{path[:idx]}/{label}{path[idx:]}' if comp != '': idx = path.find(group) + len(group) - comp_path = path[:idx] + '/' + comp + path[idx:] + comp_path = f'{path[:idx]}/{comp}{path[idx:]}' comp_sets[comp_path] = all_sets[path][:].tolist() del comp_sets[comp_path][ijk] split_sets[new_path] = all_sets[path][ijk] @@ -161,7 +161,8 @@ def delete_data_driver(hdf5, group): """ Deletes data in hdf5 file""" file = h5py.File(hdf5, 'r+') root = group - if not root[0] == '/': root = '/' + root + if root[0] != '/': + root = f'/{root}' del file[root] @@ -175,7 +176,7 @@ def sample_driver(preprocessor, size, hdf5, dest='sample.npy', cutoff=0.0): basis_key = basis_to_hash(basis) data = load_sets(datafile, hdf5[1], hdf5[1], basis_key, cutoff) symmetrizer_instructions = {'symmetrizer_type': pre.get('symmetrizer_type', 'trace')} - symmetrizer_instructions.update({'basis': basis}) + symmetrizer_instructions['basis'] = basis species = [''.join(find_attr_in_tree(datafile, hdf5[1], 'species'))] sampler_pipeline = get_default_pipeline(basis, diff --git a/neuralxc/drivers/model.py b/neuralxc/drivers/model.py index 62b92b8..f49dda9 100644 --- a/neuralxc/drivers/model.py +++ b/neuralxc/drivers/model.py @@ -74,12 +74,10 @@ def parse_sets_input(path): with open(path, 'r') as setsfile: line = setsfile.readline().rstrip() hdf5[0] = line #datafile location - line = setsfile.readline().rstrip() - while (line): + while line := setsfile.readline().rstrip(): split = line.split() hdf5[1].append(split[0]) hdf5[2].append(split[1]) - line = setsfile.readline().rstrip() return hdf5 @@ -99,7 +97,7 @@ def serialize(in_path, jit_path, as_radial): model = xc.ml.network.load_pipeline(in_path) projector_type = model.get_basis_instructions().get('projector_type', 'ortho') if as_radial: - if not 'radial' in projector_type: + if 'radial' not in projector_type: projector_type += '_radial' if projector_type == 'pyscf_radial' or \ @@ -109,10 +107,9 @@ def serialize(in_path, jit_path, as_radial): model.basis_instructions = pyscf_to_gaussian_basis(model.basis_instructions) model.basis_instructions.update({'projector_type': projector_type}) - else: - if projector_type[-len('_radial'):] == '_radial': - projector_type = projector_type[:-len('_radial')] - model.basis_instructions.update({'projector_type': projector_type}) + elif projector_type[-len('_radial'):] == '_radial': + projector_type = projector_type[:-len('_radial')] + model.basis_instructions.update({'projector_type': projector_type}) xc.ml.pipeline.serialize_pipeline(model, jit_path, override=True) if model.get_basis_instructions().get('spec_agnostic', 'False'): with open(jit_path + '/AGN', 'w') as file: @@ -158,15 +155,11 @@ def sc_driver(xyz, model0 = os.path.abspath(model0) engine_kwargs = {'nxc': model0} - engine_kwargs.update(pre.get('engine', {})) + engine_kwargs |= pre.get('engine', {}) # If not nozero, automatically aligns energies between reference and # baseline data by removing mean deviation - if nozero: - E0 = 0 - else: - E0 = None - + E0 = 0 if nozero else None #============= Iteration 0 ================= # Initial self-consistent calculation either with model0 or baseline method only # if not neuralxc model provided. Hyperparameter optimization done in first fit @@ -181,9 +174,12 @@ def sc_driver(xyz, iteration = 0 if model0: - open('sets.inp', 'w').write('data.hdf5 \n *system/it{} \t system/ref'.format(iteration)) + open('sets.inp', 'w').write( + f'data.hdf5 \n *system/it{iteration} \t system/ref' + ) + else: - open('sets.inp', 'w').write('data.hdf5 \n system/it{} \t system/ref'.format(iteration)) + open('sets.inp', 'w').write(f'data.hdf5 \n system/it{iteration} \t system/ref') if sets: open('sets.inp', 'a').write('\n' + open(sets, 'r').read()) @@ -195,7 +191,13 @@ def sc_driver(xyz, kwargs=engine_kwargs) print('\nProjecting onto basis ...') print('-----------------------------\n') - pre_driver(xyz, 'workdir', preprocessor='pre.json', dest='data.hdf5/system/it{}'.format(iteration)) + pre_driver( + xyz, + 'workdir', + preprocessor='pre.json', + dest=f'data.hdf5/system/it{iteration}', + ) + add_data_driver(hdf5='data.hdf5', system='system', method='it0', @@ -206,9 +208,10 @@ def sc_driver(xyz, add_data_driver(hdf5='data.hdf5', system='system', method='ref', add=['energy'], traj=xyz, override=True, zero=E0) print('\nBaseline accuracy') print('-----------------------------\n') - statistics_sc = \ - eval_driver(hdf5=['data.hdf5','system/it{}'.format(iteration), - 'system/ref']) + statistics_sc = eval_driver( + hdf5=['data.hdf5', f'system/it{iteration}', 'system/ref'] + ) + open('statistics_sc', 'w').write(json.dumps(statistics_sc)) print('\nFitting initial ML model ...') @@ -224,24 +227,31 @@ def sc_driver(xyz, #=================== Iterations > 0 ============== it_label = 1 for it_label in range(1, maxit + 1): - if keep_itdata: - iteration = it_label - else: - iteration = 0 + iteration = it_label if keep_itdata else 0 + print(f'\n\n====== Iteration {it_label} ======') + open('sets.inp', 'w').write( + f'data.hdf5 \n *system/it{iteration} \t system/ref' + ) - print('\n\n====== Iteration {} ======'.format(it_label)) - open('sets.inp', 'w').write('data.hdf5 \n *system/it{} \t system/ref'.format(iteration)) if sets: open('sets.inp', 'a').write('\n' + open(sets, 'r').read()) mkdir('workdir') - shcopytreedel('best_model', 'model_it{}'.format(it_label)) - serialize('model_it{}'.format(it_label), 'model_it{}.jit'.format(it_label), - 'radial' in pre['preprocessor'].get('projector_type', 'ortho')) + shcopytreedel('best_model', f'model_it{it_label}') + serialize( + f'model_it{it_label}', + f'model_it{it_label}.jit', + 'radial' in pre['preprocessor'].get('projector_type', 'ortho'), + ) + + + engine_kwargs = { + 'nxc': f'../../model_it{it_label}.jit', + 'skip_calculated': False, + } - engine_kwargs = {'nxc': '../../model_it{}.jit'.format(it_label), 'skip_calculated': False} - engine_kwargs.update(pre.get('engine', {})) + engine_kwargs |= pre.get('engine', {}) print('\nRunning SCF calculations ...') print('-----------------------------\n') @@ -253,24 +263,38 @@ def sc_driver(xyz, print('\nProjecting onto basis...') print('-----------------------------\n') - pre_driver(xyz, 'workdir', preprocessor='pre.json', dest='data.hdf5/system/it{}'.format(iteration)) + pre_driver( + xyz, + 'workdir', + preprocessor='pre.json', + dest=f'data.hdf5/system/it{iteration}', + ) + + + add_data_driver( + hdf5='data.hdf5', + system='system', + method=f'it{iteration}', + add=['energy'], + traj='workdir/results.traj', + override=True, + zero=E0, + ) - add_data_driver(hdf5='data.hdf5', - system='system', - method='it{}'.format(iteration), - add=['energy'], - traj='workdir/results.traj', - override=True, - zero=E0) print('\nResults') print('-----------------------------\n') - statistics_sc = \ - eval_driver(hdf5=['data.hdf5','system/it{}'.format(iteration), - 'system/ref'], printout=False) + statistics_sc = eval_driver( + hdf5=['data.hdf5', f'system/it{iteration}', 'system/ref'], + printout=False, + ) + open('statistics_sc', 'a').write('\n' + json.dumps(statistics_sc)) - open('model_it{}/statistics_sc'.format(it_label), 'w').write('\n' + json.dumps(statistics_sc)) + open(f'model_it{it_label}/statistics_sc', 'w').write( + '\n' + json.dumps(statistics_sc) + ) + results_df = pd.DataFrame([json.loads(line) for line in open('statistics_sc','r')]) results_df.index.name = 'Iteration' print(results_df.to_markdown()) @@ -295,13 +319,13 @@ def sc_driver(xyz, mkdir('testing') shcopy('sc/data.hdf5'.format(iteration), 'testing/data.hdf5') - shcopytree('sc/model_it{}.jit'.format(it_label), 'testing/nxc.jit') - shcopytree('sc/model_it{}'.format(it_label), 'final_model/') - shcopytree('sc/model_it{}.jit'.format(it_label), 'final_model.jit/') + shcopytree(f'sc/model_it{it_label}.jit', 'testing/nxc.jit') + shcopytree(f'sc/model_it{it_label}', 'final_model/') + shcopytree(f'sc/model_it{it_label}.jit', 'final_model.jit/') os.chdir('testing') mkdir('workdir') engine_kwargs = {'nxc': '../../nxc.jit'} - engine_kwargs.update(pre.get('engine', {})) + engine_kwargs |= pre.get('engine', {}) driver(read(testfile, ':'), pre['preprocessor'].get('application', 'siesta'), workdir='workdir', @@ -373,21 +397,17 @@ def fit_driver(preprocessor, hyper, hdf5=None, sets='', sample='', cutoff=0.0, m for set in apply_to: selection = (data[:, 0] == set) prediction = new_model.predict(data)[set][:, 0] - print('Dataset {} old STD: {}'.format(set, np.std(data[selection][:, -1]))) + print(f'Dataset {set} old STD: {np.std(data[selection][:, -1])}') data[selection, -1] += prediction - print('Dataset {} new STD: {}'.format(set, np.std(data[selection][:, -1]))) + print(f'Dataset {set} new STD: {np.std(data[selection][:, -1])}') if sample != '': sample = np.load(sample) data = data[sample] - print("Using sample of size {}".format(len(sample))) + print(f"Using sample of size {len(sample)}") np.random.shuffle(data) - if hyperopt: - estimator = grid_cv - else: - estimator = new_model - + estimator = grid_cv if hyperopt else new_model real_targets = np.array(data[:, -1]).real.flatten() estimator.fit(data) diff --git a/neuralxc/drivers/other.py b/neuralxc/drivers/other.py index 6f7aca6..fb4295e 100644 --- a/neuralxc/drivers/other.py +++ b/neuralxc/drivers/other.py @@ -32,7 +32,7 @@ def plot_basis(basis): basis_instructions=basis_instructions['preprocessor']) for spec in projector.basis: - if not len(spec) == 1: continue + if len(spec) != 1: continue basis = projector.basis[spec] if isinstance(basis, list): r = torch.from_numpy(np.linspace(0, np.max([np.max(b_) for b in basis for b_ in b['r_o']]), 500)) @@ -45,9 +45,9 @@ def plot_basis(basis): rad = [rad] for ir, rl in enumerate(rad): if ir == 0: - plt.plot(r, rl, label='l = {}'.format(l), color='C{}'.format(l)) + plt.plot(r, rl, label=f'l = {l}', color=f'C{l}') else: - plt.plot(r, rl, color='C{}'.format(l)) + plt.plot(r, rl, color=f'C{l}') # plt.ylim(0,1) plt.legend() plt.show() @@ -79,9 +79,8 @@ def get_real_basis(atoms, basis, spec_agnostic=False): bp = BasisPadder(auxmol) padded_basis = bp.get_basis_json() for sym in padded_basis: - if sym in real_basis: - if real_basis[sym] != padded_basis[sym]: - raise Exception('Different basis sets across systems currently not supported') + if sym in real_basis and real_basis[sym] != padded_basis[sym]: + raise Exception('Different basis sets across systems currently not supported') real_basis[sym] = padded_basis[sym] @@ -106,7 +105,7 @@ def run_engine_driver(xyz, preprocessor, workdir='.tmp/'): nworkers=pre.get('n_workers', 1), kwargs=pre.get('engine', {})) # shutil.move(workdir + '/results.traj', './results.traj') - shutil.copy(workdir + '/results.traj', './results.traj') + shutil.copy(f'{workdir}/results.traj', './results.traj') if workdir == '.tmp/': shutil.rmtree(workdir) @@ -114,11 +113,7 @@ def run_engine_driver(xyz, preprocessor, workdir='.tmp/'): def fetch_default_driver(kind, hint='', out=''): from collections import abc - if hint: - hint_cont = json.load(open(hint, 'r')) - else: - hint_cont = {} - + hint_cont = json.load(open(hint, 'r')) if hint else {} def nested_dict_iter(nested): for key, value in nested.items(): if isinstance(value, abc.Mapping): @@ -139,7 +134,10 @@ def find_value_in_nested(nested, truekey): app = value df_cont = json.load(open(os.path.dirname(__file__) + '/../data/pre_{}.json'.format(app), 'r')) else: - df_cont = json.load(open(os.path.dirname(__file__) + '/../data/hyper.json', 'r')) + df_cont = json.load( + open(f'{os.path.dirname(__file__)}/../data/hyper.json', 'r') + ) + if hint: for key1 in df_cont: @@ -154,7 +152,7 @@ def find_value_in_nested(nested, truekey): df_cont[key1] = found if out == '': - out = kind + '.json' + out = f'{kind}.json' open(out, 'w').write(json.dumps(df_cont, indent=4)) @@ -205,7 +203,7 @@ def pre_driver(xyz, srcdir, preprocessor, dest='.tmp/'): pre.update({'preprocessor': basis_instr}) open(preprocessor_path, 'w').write(json.dumps(pre.__dict__)) - filename = os.path.join(workdir, basis_to_hash(basis_instr) + '.npy') + filename = os.path.join(workdir, f'{basis_to_hash(basis_instr)}.npy') data = preprocessor.fit_transform(None) np.save(filename, data) if 'hdf5' in dest: diff --git a/neuralxc/engines/cp2k.py b/neuralxc/engines/cp2k.py index 569fcb5..39e863b 100644 --- a/neuralxc/engines/cp2k.py +++ b/neuralxc/engines/cp2k.py @@ -19,7 +19,7 @@ def __init__(self, **kwargs): self.nxc = self.nxc["path"] self.input_path = kwargs.pop("input_path", "") inp = '' - if not 'command' in kwargs: + if 'command' not in kwargs: kwargs['command'] = 'env OMP_NUM_THREADS=1 cp2k_shell.sdbg' if self.input_path: with open(self.input_path, 'r') as inp_file: @@ -34,11 +34,11 @@ def _generate_input(self, *args, **kwargs): nxc = self.nxc nxc_addto = self.nxc_addto - pattern = re.compile("LIBXC.*?{}.*?END LIBXC".format(nxc_addto), re.MULTILINE | re.S) + pattern = re.compile(f"LIBXC.*?{nxc_addto}.*?END LIBXC", re.MULTILINE | re.S) pattern0 = pattern.findall(input)[0] - pattern1 = pattern0.replace('{}\n'.format(nxc_addto), '{}\n \t\tNXC {}\n'.format(nxc_addto, nxc)) + pattern1 = pattern0.replace(f'{nxc_addto}\n', f'{nxc_addto}\n \t\tNXC {nxc}\n') input = input.replace(pattern0, pattern1) return input diff --git a/neuralxc/engines/engine.py b/neuralxc/engines/engine.py index bf7de35..bb3b890 100644 --- a/neuralxc/engines/engine.py +++ b/neuralxc/engines/engine.py @@ -40,8 +40,8 @@ def Engine(app, **kwargs): if app == 'pyscf_rad': app = 'pyscf' registry = BaseEngine.get_registry() - if not app in registry: - raise Exception('Engine: {} not registered'.format(app)) + if app not in registry: + raise Exception(f'Engine: {app} not registered') return registry[app](**kwargs) @@ -111,7 +111,7 @@ def __init__(self, **kwargs): # Environment variables for ase os.environ['SIESTA_PP_PATH'] = kwargs.pop('pseudoloc', '.') - if not 'SIESTA_COMMAND' in os.environ: - os.environ['SIESTA_COMMAND'] = exec_prepend + ' siesta < ./%s > ./%s' + if 'SIESTA_COMMAND' not in os.environ: + os.environ['SIESTA_COMMAND'] = f'{exec_prepend} siesta < ./%s > ./%s' self.calc = CustomSiesta(fdf_path, **kwargs) diff --git a/neuralxc/engines/siesta.py b/neuralxc/engines/siesta.py index f614f45..0b95533 100644 --- a/neuralxc/engines/siesta.py +++ b/neuralxc/engines/siesta.py @@ -28,23 +28,24 @@ def __init__(self, fdf_path=None, **kwargs): def write_input(self, atoms, properties=None, system_changes=None): super().write_input(atoms, properties, system_changes) - filename = os.path.join(self.directory, self.label + '.fdf') + filename = os.path.join(self.directory, f'{self.label}.fdf') # add custom fdf if self.fdf_path: with open(self.fdf_path, 'r') as custom_fdf: all_custom_keys = [list(entry.keys())[0]\ - for _, entry in next_fdf_entry(custom_fdf)] + for _, entry in next_fdf_entry(custom_fdf)] - filename_tmp = os.path.join(self.directory, self.label + '.tmp') + filename_tmp = os.path.join(self.directory, f'{self.label}.tmp') with open(filename_tmp, 'w') as tmp_file: with open(self.fdf_path, 'r') as custom_fdf: tmp_file.write(custom_fdf.read()) with open(filename, 'r') as ase_fdf: for is_block, entry in next_fdf_entry(ase_fdf): - if not list(entry.keys())[0] in all_custom_keys: - if 'pao' in list(entry.keys())[0] \ - and any(['pao' in key for key in all_custom_keys]): + if list(entry.keys())[0] not in all_custom_keys: + if 'pao' in list(entry.keys())[0] and any( + 'pao' in key for key in all_custom_keys + ): continue if is_block: tmp_file.write('%block ') @@ -53,17 +54,15 @@ def write_input(self, atoms, properties=None, system_changes=None): tmp_file.write(list(entry.values())[0]) tmp_file.write('%endblock ') tmp_file.write(list(entry.keys())[0]) - tmp_file.write('\n') else: tmp_file.write(' '.join(list(entry.items())[0])) - tmp_file.write('\n') - + tmp_file.write('\n') with open(filename_tmp, 'r') as tmp_file: with open(filename, 'w') as ase_fdf: ase_fdf.write(tmp_file.read()) if self.nxc: with open(filename, 'a') as ase_fdf: - ase_fdf.write('NeuralXC {} \n'.format(self.nxc)) + ase_fdf.write(f'NeuralXC {self.nxc} \n') def _write_species(self, f, atoms): """Write input related the different species. @@ -74,7 +73,7 @@ def _write_species(self, f, atoms): """ species, species_numbers = self.species(atoms) - if not self['pseudo_path'] is None: + if self['pseudo_path'] is not None: pseudo_path = self['pseudo_path'] elif 'SIESTA_PP_PATH' in os.environ: pseudo_path = os.environ['SIESTA_PP_PATH'] @@ -96,7 +95,7 @@ def _write_species(self, f, atoms): if spec['pseudopotential'] is None: label = symbol - pseudopotential = label + '.psf' + pseudopotential = f'{label}.psf' else: pseudopotential = spec['pseudopotential'] @@ -122,14 +121,14 @@ def _write_species(self, f, atoms): symlink_pseudos = self['symlink_pseudos'] if symlink_pseudos is None: - symlink_pseudos = not os.name == 'nt' + symlink_pseudos = os.name != 'nt' if symlink_pseudos: os.symlink(pseudopotential, pseudo_targetpath) else: shutil.copy(pseudopotential, pseudo_targetpath) - if not spec['excess_charge'] is None: + if spec['excess_charge'] is not None: atomic_number += 200 n_atoms = sum(np.array(species_numbers) == species_number) @@ -142,14 +141,14 @@ def _write_species(self, f, atoms): os.system(cmd) pseudo_head += '-Fraction-%.5f' % fraction - synth_pseudo = pseudo_head + '.psf' - synth_block_filename = pseudo_head + '.synth' + synth_pseudo = f'{pseudo_head}.psf' + synth_block_filename = f'{pseudo_head}.synth' os.remove(name) shutil.copyfile(synth_pseudo, name) synth_block = read_vca_synth_block(synth_block_filename, species_number=species_number) synth_blocks.append(synth_block) - if len(synth_blocks) > 0: + if synth_blocks: f.write(format_fdf('SyntheticAtoms', list(synth_blocks))) label = '.'.join(np.array(name.split('.'))[:-1]) @@ -158,7 +157,7 @@ def _write_species(self, f, atoms): if isinstance(spec['basis_set'], PAOBasisBlock): pao_basis.append(spec['basis_set'].script(label)) else: - basis_sizes.append((" " + label, spec['basis_set'])) + basis_sizes.append((f" {label}", spec['basis_set'])) f.write((format_fdf('ChemicalSpecieslabel', chemical_labels))) f.write('\n') f.write((format_fdf('PAO.Basis', pao_basis))) @@ -208,7 +207,7 @@ def getpath(self, fname=None, ext=None): if fname is None: fname = self.prefix if ext is not None: - fname = '{}.{}'.format(fname, ext) + fname = f'{fname}.{ext}' return os.path.join(self.directory, fname) @@ -217,8 +216,7 @@ def next_fdf_entry(file): inside_block = False block_content = '' block_name = '' - line = file.readline() - while (line): + while line := file.readline(): if len(line.strip()) > 0: if line.strip()[0] == '%': if not inside_block: @@ -234,5 +232,3 @@ def next_fdf_entry(file): yield False, {line.split()[0].lower(): ' '.join(line.split()[1:])} else: block_content += line - - line = file.readline() diff --git a/neuralxc/formatter.py b/neuralxc/formatter.py index 0bebbd0..b96b01d 100644 --- a/neuralxc/formatter.py +++ b/neuralxc/formatter.py @@ -2,7 +2,7 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin -from collections import Mapping +from collections.abc import Mapping class Formatter(TransformerMixin, BaseEstimator): @@ -17,9 +17,7 @@ def fit(self, C): """ Providing C in dictionary format, build set of rules for transformation """ - self._rule = {} - for idx, key, data in expand(C): - self._rule[key] = [s for s in data[0]] + self._rule = {key: list(data[0]) for idx, key, data in expand(C)} def transform(self, C): """ Transforms from a dictionary format ({n,l,m} : value) @@ -30,15 +28,12 @@ def transform(self, C): for idx, key, data in expand(C): data = data[0] - if not key in transformed[idx]: transformed[idx][key] = [] + if key not in transformed[idx]: transformed[idx][key] = [] for d in data: transformed[idx][key].append(np.array([d[s] for s in d])) transformed[idx][key] = np.array(transformed[idx][key]) - if not isinstance(C, list): - return transformed[0] - else: - return transformed + return transformed if isinstance(C, list) else transformed[0] def inverse_transform(self, C): """ Transforms from an ordered np.ndarray format to a dictionary @@ -48,23 +43,23 @@ def inverse_transform(self, C): for idx, key, data in expand(C): data = data[0] - if not key in transformed[idx]: transformed[idx][key] = [] + if key not in transformed[idx]: transformed[idx][key] = [] if not isinstance(self._rule, dict): basis = self._basis[key] rule = [ - '{},{},{}'.format(n, l, m) for n in range(0, basis['n']) for l in range(0, basis['l']) + f'{n},{l},{m}' + for n in range(basis['n']) + for l in range(basis['l']) for m in range(-l, l + 1) ] + else: rule = self._rule[key] for d in data: transformed[idx][key].append(dict(zip(rule, d.tolist()))) - if not isinstance(C, list): - return transformed[0] - else: - return transformed + return transformed if isinstance(C, list) else transformed[0] def fix_species(species, spec_agnostic=False): @@ -78,11 +73,10 @@ def fix_species(species, spec_agnostic=False): if spec_agnostic: if spec.upper() == spec: fixed[-1].append('X') + elif spec.upper() == spec: + fixed[-1].append(spec) else: - if spec.upper() == spec: - fixed[-1].append(spec) - else: - fixed[-1][-1] = fixed[-1][-1] + spec + fixed[-1][-1] = fixed[-1][-1] + spec return fixed @@ -112,7 +106,7 @@ def __init__(self, attrs, sys_species, spec_agnostic=False): if len(key) < 4 and key != 'X': self._attrs.pop(key) if not isinstance(sys_species, list): - raise ValueError('sys_species must be a list but is {}'.format(sys_species)) + raise ValueError(f'sys_species must be a list but is {sys_species}') self._sys_species = sys_species self._spec_agnostic = spec_agnostic @@ -148,10 +142,11 @@ def transform(self, X, y=None): features = [] targets = [] - if not n_sys == len(sys_species): + if n_sys != len(sys_species): raise ValueError( - 'Number of systems in X and len(sys_species) incompatible: n_sys: {}, len(sys_species): {}'.format( - n_sys, len(sys_species))) + f'Number of systems in X and len(sys_species) incompatible: n_sys: {n_sys}, len(sys_species): {len(sys_species)}' + ) + for this_sys, _ in enumerate(sys_species): this_species = sys_species[this_sys] @@ -165,7 +160,10 @@ def transform(self, X, y=None): if spec not in feat_dict: feat_dict[spec] = [] - vec_len = self._attrs[spec]['n'] * sum([2 * l + 1 for l in range(self._attrs[spec]['l'])]) + vec_len = self._attrs[spec]['n'] * sum( + 2 * l + 1 for l in range(self._attrs[spec]['l']) + ) + x_atm = X_sys[:, idx:idx + vec_len] # print(x_atm.shape) feat_dict[spec].append(x_atm) @@ -196,7 +194,7 @@ def inverse_transform(self, features, targets): sys_species = fix_species(self._sys_species, self._spec_agnostic) total_length = np.sum([len(tar) for tar in targets]) max_vec_len = np.max([np.sum([feat[spec].shape[1]*feat[spec].shape[2] for spec in feat])\ - for feat in features]) + for feat in features]) X = np.zeros([total_length, max_vec_len + 1]) y = np.zeros(total_length) @@ -209,7 +207,7 @@ def inverse_transform(self, features, targets): this_species = sys_species[sysidx] this_len = len(tar) X[sys_loc:sys_loc + this_len, 0] = sysidx - unique_species = np.unique([char for char in this_species]) + unique_species = np.unique(list(this_species)) spec_loc = {spec: 0 for spec in unique_species} idx = 1 diff --git a/neuralxc/ml/network.py b/neuralxc/ml/network.py index 94dc4fd..ecd54e3 100644 --- a/neuralxc/ml/network.py +++ b/neuralxc/ml/network.py @@ -51,7 +51,7 @@ def get_params(self, *args, **kwargs): def build_network(self): self._network = EnergyNetwork(n_layers=self.n_layers, n_nodes=self.n_nodes, activation=self.activation) - if not self.path is None: + if self.path is not None: self._network.restore_model(self.path) def fit(self, X, y=None, *args, **kwargs): @@ -86,8 +86,7 @@ def predict(self, X, *args, **kwargs): if not isinstance(X, list): X = [X] - predictions = self._network.predict(X[0]) - return predictions + return self._network.predict(X[0]) def score(self, X, y=None, metric='mae'): @@ -102,14 +101,11 @@ def score(self, X, y=None, metric='mae'): else: raise Exception('Metric unknown or not implemented') - scores = [] if not isinstance(X, list): X = [X] y = [y] - for X_, y_ in zip(X, y): - scores.append(metric_function(self.predict(X_) - y_)) - + scores = [metric_function(self.predict(X_) - y_) for X_, y_ in zip(X, y)] return -np.mean(scores) def load_network(self, path): @@ -120,7 +116,7 @@ def train_net(net, dataloader, dataloader_val=None, max_steps=10000, n_checkpoin # net.train() check_point_every = max_steps//n_checkpoints - + loss_fn = torch.nn.MSELoss() optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay) @@ -134,7 +130,6 @@ def train_net(net, dataloader, dataloader_val=None, max_steps=10000, n_checkpoin max_epochs = max_steps for epoch in range(max_epochs): - logs = {} epoch_loss = 0 for data in dataloader: rho, energy = data @@ -145,9 +140,9 @@ def train_net(net, dataloader, dataloader_val=None, max_steps=10000, n_checkpoin optimizer.step() epoch_loss += loss.item() - logs['log loss'] = np.sqrt(epoch_loss / len(dataloader)) + logs = {'log loss': np.sqrt(epoch_loss / len(dataloader))} for i, param_group in enumerate(optimizer.param_groups): - logs['lr_{}'.format(i)] = float(param_group['lr']) + logs[f'lr_{i}'] = float(param_group['lr']) if logs['lr_0'] <= MIN_RATE: return net @@ -164,8 +159,13 @@ def train_net(net, dataloader, dataloader_val=None, max_steps=10000, n_checkpoin logs['val loss'] = np.sqrt(val_loss / len(dataloader_val)) else: logs['val loss'] = 0 - print('Epoch {} ||'.format(epoch), ' Training loss : {:.6f}'.format(logs['log loss']), - ' Validation loss : {:.6f}'.format(logs['val loss']), ' Learning rate: {}'.format(logs['lr_0'])) + print( + f'Epoch {epoch} ||', + ' Training loss : {:.6f}'.format(logs['log loss']), + ' Validation loss : {:.6f}'.format(logs['val loss']), + f" Learning rate: {logs['lr_0']}", + ) + return net @@ -176,10 +176,7 @@ def __init__(self, rho, energies): def __getitem__(self, index): - rho = {} - for species in self.rho: - rho[species] = self.rho[species][index] - + rho = {species: self.rho[species][index] for species in self.rho} energy = self.energies[index] return (rho, energy) @@ -202,16 +199,23 @@ def __init__(self, n_nodes, n_layers, activation): def train(self, X, y, step_size=0.01, max_steps=50001, b_=0, verbose=True, train_valid_split=0.8, batch_size=0): if not hasattr(self, 'species_nets'): - species_nets = {} - for spec in X: - if self.n_layers < 1: - species_nets[spec] = torch.nn.Linear(X[spec].shape[-1], 1) - else: - species_nets[spec] = torch.nn.Sequential( - *([torch.nn.Linear(X[spec].shape[-1], self.n_nodes)] +\ - (self.n_layers-1)* [self.activation,torch.nn.Linear(self.n_nodes, self.n_nodes)] +\ - [self.activation, torch.nn.Linear(self.n_nodes,1)]) + species_nets = { + spec: torch.nn.Linear(X[spec].shape[-1], 1) + if self.n_layers < 1 + else torch.nn.Sequential( + *( + [torch.nn.Linear(X[spec].shape[-1], self.n_nodes)] + + (self.n_layers - 1) + * [ + self.activation, + torch.nn.Linear(self.n_nodes, self.n_nodes), + ] + + [self.activation, torch.nn.Linear(self.n_nodes, 1)] ) + ) + for spec in X + } + self.species_nets = torch.nn.ModuleDict(species_nets) print(self.species_nets) if train_valid_split < 1.0: @@ -248,11 +252,10 @@ def predict(self, X): return [result.detach().numpy()] def forward(self, input): - output = 0 - for spec in input: - output += torch.sum(self.species_nets[spec](input[spec]), dim=-2) - - return output + return sum( + torch.sum(self.species_nets[spec](input[spec]), dim=-2) + for spec in input + ) Energy_Network = EnergyNetwork # Needed to unpickle old models diff --git a/neuralxc/ml/pipeline.py b/neuralxc/ml/pipeline.py index c19b221..9a6fd5d 100644 --- a/neuralxc/ml/pipeline.py +++ b/neuralxc/ml/pipeline.py @@ -88,12 +88,8 @@ def save(self, path, override=False, npmodel=False): if os.path.isdir(path): if not override: raise Exception('Model already exists, set override = True') - else: - shutil.rmtree(path) - os.mkdir(path) - else: - os.mkdir(path) - + shutil.rmtree(path) + os.mkdir(path) pickle.dump([self.steps, self.basis_instructions, self.symmetrize_instructions], open(os.path.join(path, 'pipeline.pckl'), 'wb')) @@ -124,7 +120,7 @@ def __init__(self, species, model): self.model = torch.nn.Sequential(*steps) def forward(self, *args): - C = {spec: c for spec, c in zip(self.species, args)} + C = dict(zip(self.species, args)) return self.model(C) @@ -160,15 +156,16 @@ def serialize_energy(model, C, outpath, override): try: os.mkdir(outpath) except FileExistsError: - if override: - shutil.rmtree(outpath) - os.mkdir(outpath) - else: + if not override: raise Exception('Model exists, set override = True to save at this location') + shutil.rmtree(outpath) + os.mkdir(outpath) for spec in C: - torch.jit.save(e_models[spec], outpath + '/xc_' + spec) - open(outpath + '/bas.json', 'w').write(json.dumps(dict(model.basis_instructions))) + torch.jit.save(e_models[spec], f'{outpath}/xc_{spec}') + open(f'{outpath}/bas.json', 'w').write( + json.dumps(dict(model.basis_instructions)) + ) def serialize_projector(projector): @@ -182,11 +179,7 @@ def serialize_projector(projector): else: rho_c = np.ones(shape=grid_c) - species = [] - for spec in projector.basis: - if len(spec) < 3: - species.append(spec) - + species = [spec for spec in projector.basis if len(spec) < 3] unitcell_c = torch.from_numpy(unitcell_c).double() grid_c = torch.from_numpy(grid_c).double() pos_c = torch.from_numpy(pos_c).double() @@ -216,11 +209,7 @@ def serialize_pipeline(model, outpath, override=False): my_box = np.array([[0, 9]] * 3) pos_c = np.array([[0, 0, 0]]) basis_instructions = model.basis_instructions - species = [] - for spec in basis_instructions: - if len(spec) < 3: - species.append(spec) - + species = [spec for spec in basis_instructions if len(spec) < 3] try: model.symmetrize_instructions.update({'basis': model.basis_instructions}) except AttributeError: @@ -236,10 +225,10 @@ def serialize_pipeline(model, outpath, override=False): grid_weights=grid_c) try: - model.symmetrize_instructions.update(projector.symmetrize_instructions) + model.symmetrize_instructions |= projector.symmetrize_instructions except AttributeError: - model.symmetrize_instructions.update({'basis': model.basis_instructions}) - + model.symmetrize_instructions['basis'] = model.basis_instructions + model.symmetrizer = Symmetrizer(model.symmetrize_instructions) except TypeError: C = {} @@ -267,14 +256,15 @@ def serialize_pipeline(model, outpath, override=False): try: os.mkdir(outpath) except FileExistsError: - if override: - shutil.rmtree(outpath) - os.mkdir(outpath) - else: + if not override: raise Exception('Model exists, set override = True to save at this location') + shutil.rmtree(outpath) + os.mkdir(outpath) for spec in species: - torch.jit.save(basis_models[spec], outpath + '/basis_' + spec) - torch.jit.save(projector_models[spec], outpath + '/projector_' + spec) - torch.jit.save(e_models[spec], outpath + '/xc_' + spec) - open(outpath + '/bas.json', 'w').write(json.dumps(dict(model.basis_instructions))) + torch.jit.save(basis_models[spec], f'{outpath}/basis_{spec}') + torch.jit.save(projector_models[spec], f'{outpath}/projector_{spec}') + torch.jit.save(e_models[spec], f'{outpath}/xc_{spec}') + open(f'{outpath}/bas.json', 'w').write( + json.dumps(dict(model.basis_instructions)) + ) diff --git a/neuralxc/ml/transformer.py b/neuralxc/ml/transformer.py index ab00398..4c1fd2a 100644 --- a/neuralxc/ml/transformer.py +++ b/neuralxc/ml/transformer.py @@ -24,10 +24,7 @@ def wrapped_func(X, *args, **kwargs): X = torch.from_numpy(X) made_tensor = True Y = func(X, *args, **kwargs) - if made_tensor: - return Y.detach().numpy() - else: - return Y + return Y.detach().numpy() if made_tensor else Y return wrapped_func @@ -69,53 +66,46 @@ def transform(self, X, y=None, **fit_params): results = [] for x in X: if isinstance(x, dict): - results_dict = {} - for spec in x: - results_dict[spec] = self._spec_dict[spec].transform(x[spec]) + results_dict = {spec: self._spec_dict[spec].transform(x[spec]) for spec in x} results.append(results_dict) else: results.append(system_shape(self.torch_transform(atomic_shape(x)), x.shape[-2])) if made_list: results = results[0] - if was_tuple: - return results, y - else: - return results + return (results, y) if was_tuple else results def fit(self, X, y=None): if self.is_fit: return self + self.is_fit = True + if isinstance(X, tuple): + X = X[0] + + if isinstance(X, list): + super_X = {} + for x in X: + for spec in x: + if spec not in super_X: + super_X[spec] = [] + super_X[spec].append(atomic_shape(x[spec])) + for spec in super_X: + super_X[spec] = np.concatenate(super_X[spec]) else: - self.is_fit = True - if isinstance(X, tuple): - X = X[0] - - if isinstance(X, list): - super_X = {} - for x in X: - for spec in x: - if not spec in super_X: - super_X[spec] = [] - super_X[spec].append(atomic_shape(x[spec])) - for spec in super_X: - super_X[spec] = np.concatenate(super_X[spec]) - else: - super_X = X - - if isinstance(super_X, dict): - self._spec_dict = {} - for spec in super_X: - self._spec_dict[spec] =\ - type(self)(*self._initargs, - **self.get_kwargs()) - self._spec_dict[spec].__dict__.update(self.get_params()) - # Due to padding some rows might be zero, exclude those during fit: - mask = ~np.all(atomic_shape(super_X[spec]) == 0, axis=-1) - self._spec_dict[spec].fit(self._before_fit(atomic_shape(super_X[spec])[mask])) - return self - else: - return super().fit(atomic_shape(super_X)) + super_X = X + + if not isinstance(super_X, dict): + return super().fit(atomic_shape(super_X)) + self._spec_dict = {} + for spec in super_X: + self._spec_dict[spec] =\ + type(self)(*self._initargs, + **self.get_kwargs()) + self._spec_dict[spec].__dict__.update(self.get_params()) + # Due to padding some rows might be zero, exclude those during fit: + mask = ~np.all(atomic_shape(super_X[spec]) == 0, axis=-1) + self._spec_dict[spec].fit(self._before_fit(atomic_shape(super_X[spec])[mask])) + return self def fit_transform(self, X, y=None, **fit_params): return self.fit(X).transform(X) @@ -136,11 +126,11 @@ def __init__(self, threshold=0.0): TorchModule.__init__(self) self._before_fit = identity # lambdas can't be pickled self._initargs = [] - self.treshold = threshold + self.threshold = threshold super().__init__(**self.get_kwargs()) def get_kwargs(self): - return dict(threshold=self.treshold) + return dict(threshold=self.threshold) def transform(self, X, y=None, **fit_params): return GroupedTransformer.transform(self, X, y, **fit_params) @@ -150,7 +140,7 @@ def fit_transform(self, X, y=None, **fit_params): def torch_transform(self, X): X_shape = X.size() - if not len(X_shape) == 2: + if len(X_shape) != 2: X = X.view(-1, X_shape[-1]) support = torch.from_numpy(self.get_support()).bool() return X[:, support] @@ -164,6 +154,7 @@ def __init__(self, threshold=0.0): TorchModule.__init__(self) self._before_fit = identity # lambdas can't be pickled self._initargs = [] + self.threshold = threshold self._initkwargs = {} super().__init__() @@ -172,7 +163,7 @@ def get_kwargs(self): def torch_transform(self, X): X_shape = X.size() - if not len(X_shape) == 2: + if len(X_shape) != 2: X = X.view(-1, X_shape[-1]) X = (X - torch.from_numpy(self.mean_)) / torch.sqrt(torch.from_numpy(self.var_)) return X diff --git a/neuralxc/ml/utils.py b/neuralxc/ml/utils.py index c3de848..ecb6a95 100644 --- a/neuralxc/ml/utils.py +++ b/neuralxc/ml/utils.py @@ -30,11 +30,11 @@ def E_from_atoms(traj): energies = {} for atoms in traj: spec = ''.join(atoms.get_chemical_symbols()) - if not spec in energies: + if spec not in energies: energies[spec] = [] energies[spec].append(atoms.get_potential_energy()) - allspecies = np.unique([s for s in ''.join([key for key in energies])]) + allspecies = np.unique(list(''.join(list(energies)))) X = np.zeros([len(energies), len(allspecies)]) y = np.zeros(len(energies)) @@ -45,7 +45,7 @@ def E_from_atoms(traj): lr = LinearRegression(fit_intercept=False) lr.fit(X, y) offsets = lr.predict(X) - offsets = {key: offset for key, offset in zip(energies, offsets)} + offsets = dict(zip(energies, offsets)) energies = [] for atoms in traj: @@ -136,8 +136,7 @@ def load_sets(datafile, baseline, reference, basis_key='', percentile_cutoff=0): X = np.concatenate(Xs, axis=0) y = np.concatenate(ys, axis=0) - data = np.concatenate([X, y.reshape(-1, 1)], axis=1) - return data + return np.concatenate([X, y.reshape(-1, 1)], axis=1) def load_data(datafile, baseline, reference, basis_key, percentile_cutoff=0.0, E0=None): @@ -168,9 +167,9 @@ def load_data(datafile, baseline, reference, basis_key, percentile_cutoff=0.0, E If None tries to find this value as an attribute inside datafile """ no_energy = False - if baseline + '/energy' in datafile: - data_base = datafile[baseline + '/energy'] - data_ref = datafile[reference + '/energy'] + if f'{baseline}/energy' in datafile: + data_base = datafile[f'{baseline}/energy'] + data_ref = datafile[f'{reference}/energy'] else: data_base = np.array([0]) data_ref = np.array([0]) @@ -196,7 +195,7 @@ def load_data(datafile, baseline, reference, basis_key, percentile_cutoff=0.0, E if basis_key == '': data_base = np.zeros([len(tar), 0]) else: - data_base = datafile[baseline + '/density/' + basis_key][:, :] + data_base = datafile[f'{baseline}/density/{basis_key}'][:, :] if no_energy: tar = np.zeros(len(data_base)) @@ -218,12 +217,12 @@ def match_hyperparameter(hp, parameters): corresponding full name in parameters """ - matches = [] - for par in parameters: - if hp == par: - matches.append(par) + matches = [par for par in parameters if hp == par] if len(matches) != 1: - raise ValueError('{} matches found for hyperparameter {}. Must be exactly 1'.format(len(matches), hp)) + raise ValueError( + f'{len(matches)} matches found for hyperparameter {hp}. Must be exactly 1' + ) + return matches[0] @@ -233,7 +232,7 @@ def to_full_hyperparameters(hp, parameters): """ full = {} for name in hp: - new_key = 'ml__' + match_hyperparameter(name, parameters) + new_key = f'ml__{match_hyperparameter(name, parameters)}' full[new_key] = hp[name] if not isinstance(full[new_key], list): full[new_key] = [full[new_key]] @@ -253,9 +252,13 @@ def get_default_pipeline(basis, species, symmetrizer_type='trace', pca_threshold estimator = NetworkWrapper(4, 1, 0, alpha=0.001, max_steps=4001, test_size=0.0, valid_size=0, random_seed=None) - pipeline_list = [('spec_group', spec_group), ('symmetrizer', symmetrizer), ('var_selector', var_selector)] + pipeline_list = [ + ('spec_group', spec_group), + ('symmetrizer', symmetrizer), + ('var_selector', var_selector), + ('scaler', GroupedStandardScaler()), + ] - pipeline_list.append(('scaler', GroupedStandardScaler())) pipeline_list.append(('estimator', estimator)) @@ -278,15 +281,16 @@ def get_grid_cv(hdf5, preprocessor, inputfile, spec_agnostic=False): if not isinstance(hdf5[1], list): hdf5[1] = [hdf5[1]] - all_species = [] - for set in hdf5[1]: - all_species.append(''.join(find_attr_in_tree(datafile, set, 'species'))) + all_species = [ + ''.join(find_attr_in_tree(datafile, set, 'species')) + for set in hdf5[1] + ] if pre: basis = pre['preprocessor'] else: basis = {spec: {'n': 1, 'l': 1, 'r_o': 1} for spec in ''.join(all_species)} - basis.update({'extension': 'RHOXC'}) + basis['extension'] = 'RHOXC' pipeline = get_default_pipeline(basis, all_species, @@ -305,8 +309,15 @@ def get_grid_cv(hdf5, preprocessor, inputfile, spec_agnostic=False): verbose = inp.get('verbose', 1) pipe = Pipeline([('ml', pipeline)]) - grid_cv = GridSearchCV(pipe, hyper, cv=cv, n_jobs=n_jobs, refit=True, verbose=verbose, return_train_score=True) - return grid_cv + return GridSearchCV( + pipe, + hyper, + cv=cv, + n_jobs=n_jobs, + refit=True, + verbose=verbose, + return_train_score=True, + ) def get_basis_grid(preprocessor): @@ -331,10 +342,7 @@ def nested_dict_build(nested, i): if isinstance(value, abc.Mapping): select_dict[key] = nested_dict_build(value, i) else: - if isinstance(value, list): - select_dict[key] = value[i] - else: - select_dict[key] = value + select_dict[key] = value[i] if isinstance(value, list) else value return select_dict max_len = 0 @@ -359,13 +367,14 @@ def get_preprocessor(pre, atoms, src_path): species = ''.join(atoms[0].get_chemical_symbols()) for a in atoms: species2 = ''.join(a.get_chemical_symbols()) - if not species2 == species: + if species2 != species: print('Warning (in get_preprocessor): Dataset not homogeneous') basis = {spec: {'n': 1, 'l': 1, 'r_o': 1} for spec in species} - basis.update(pre['preprocessor']) - preprocessor = Preprocessor(basis, src_path, atoms, num_workers=pre.get('n_workers', 1)) - return preprocessor + basis |= pre['preprocessor'] + return Preprocessor( + basis, src_path, atoms, num_workers=pre.get('n_workers', 1) + ) class SampleSelector(BaseEstimator): @@ -415,7 +424,7 @@ def sample_clusters(data, for center, label in zip(centers, np.unique(labels)): # Loop over clusters _, idx = nbrs.kneighbors(center.reshape(1, -1)) choice = idx[0] - if not choice in picked: + if choice not in picked: sampled.append(indices[choice]) return sampled diff --git a/neuralxc/neuralxc.py b/neuralxc/neuralxc.py index 1ee90c4..c82668a 100644 --- a/neuralxc/neuralxc.py +++ b/neuralxc/neuralxc.py @@ -16,9 +16,9 @@ class PySCFNXC(AtomicFunc): def __init__(self, path): - model_paths = glob(path + '/*') + model_paths = glob(f'{path}/*') for mp in model_paths: - if 'bas.json' == os.path.basename(mp): + if os.path.basename(mp) == 'bas.json': mp = json.loads(open(mp,'r').read()) self.basis = ConfigFile({'preprocessor' : mp, diff --git a/neuralxc/preprocessor/driver.py b/neuralxc/preprocessor/driver.py index 37661f3..17a3978 100644 --- a/neuralxc/preprocessor/driver.py +++ b/neuralxc/preprocessor/driver.py @@ -46,12 +46,12 @@ def mbe_driver(atoms, app, workdir, kwargs, nworkers): results = calculate_distributed(atoms, app, workdir, kwargs, nworkers) species = [a.get_chemical_symbols() for a in atoms] - n_mol = int(len(species[0]) / n_block) + n_mol = len(species[0]) // n_block for s in species: - n_mol_new = int(len(s) / n_block) - if not n_mol == n_mol_new: + n_mol_new = len(s) // n_block + if n_mol != n_mol_new: raise Exception('Every snapshot in trajectory must contain same number of molecules') - if not s == [s for s in building_block] * int(len(s) / n_block): + if s != list(building_block) * (len(s) // n_block): print(s) raise Exception('Trajectory file must contain atoms in the oder OHHOHH...') @@ -67,14 +67,19 @@ def mbe_driver(atoms, app, workdir, kwargs, nworkers): cell=a.get_cell()) for a in atoms for comb in itertools.combinations(range(n_mol), n) ] try: - os.mkdir(mbe_root + '/mbe_{}'.format(n)) + os.mkdir(mbe_root + f'/mbe_{n}') except FileExistsError: pass - lower_results.append(calculate_distributed(new_atoms, app, mbe_root + '/mbe_{}'.format(n), kwargs, nworkers)) + lower_results.append( + calculate_distributed( + new_atoms, app, mbe_root + f'/mbe_{n}', kwargs, nworkers + ) + ) + etot = np.array([a.get_potential_energy() for a in results]) for i, lr in enumerate(lower_results[::-1]): - write(mbe_root + '/mbe_{}/results.traj'.format(n_mol - (i + 1)), lr) + write(mbe_root + f'/mbe_{n_mol - (i + 1)}/results.traj', lr) epart = np.array([((-1)**(i + 1)) * a.get_potential_energy() for a in lr]).reshape(len(etot), -1) epart = np.sum(epart, axis=-1) etot += epart @@ -90,7 +95,7 @@ def calculate_distributed(atoms, app, workdir, kwargs, n_workers=-1): cwd = os.getcwd() if n_workers > 1: - print('Calculating {} systems on'.format(len(atoms))) + print(f'Calculating {len(atoms)} systems on') cluster = LocalCluster(n_workers=n_workers, threads_per_worker=1) print(cluster) client = Client(cluster) diff --git a/neuralxc/preprocessor/preprocessor.py b/neuralxc/preprocessor/preprocessor.py index 53fc17c..d4e6fda 100644 --- a/neuralxc/preprocessor/preprocessor.py +++ b/neuralxc/preprocessor/preprocessor.py @@ -31,7 +31,7 @@ def __init__(self, basis_instructions, src_path, atoms, target_path='', num_work self.num_workers = num_workers def fit(self, X=None, y=None, **kwargs): - self.client = kwargs.get('client', None) + self.client = kwargs.get('client') return self def transform(self, X=None, y=None): @@ -42,20 +42,21 @@ def transform(self, X=None, y=None): unique_systems = np.array([''.join(self.get_chemical_symbols(a)) for a in self.atoms]) unique_systems = np.unique(unique_systems, axis=0) - if spec_agn: - self.species_string = unique_systems[0][0] * max([len(s) for s in unique_systems]) - else: - self.species_string = ''.join([s for s in unique_systems]) - # === Padding === + self.species_string = ( + unique_systems[0][0] * max(len(s) for s in unique_systems) + if spec_agn + else ''.join(list(unique_systems)) + ) + + width = { + ''.join(self.get_chemical_symbols(atoms)): len(dat) + for dat, atoms in zip(self.data, self.atoms) + } - #Find padded width of data - width = {} - for dat, atoms in zip(self.data, self.atoms): - width[''.join(self.get_chemical_symbols(atoms))] = len(dat) #Sanity check assert len(unique_systems) == len(width) if spec_agn: - paddedwidth = max([width[key] for key in width]) + paddedwidth = max(width[key] for key in width) else: paddedwidth = sum([width[key] for key in width]) @@ -75,7 +76,7 @@ def transform(self, X=None, y=None): padded_data[lidx, paddedoffset[syskey]:paddedoffset[syskey] + len(dat)] = dat data = padded_data - if isinstance(X, list) or isinstance(X, np.ndarray): + if isinstance(X, (list, np.ndarray)): data = data[X] return data @@ -102,18 +103,22 @@ def map(self, *args): atoms = self.atoms extension = self.basis_instructions.get('extension', 'RHOXC') if extension[0] != '.': - extension = '.' + extension + extension = f'.{extension}' jobs = [] for i, system in enumerate(atoms): - filename = '' - for file in os.listdir(pjoin(self.src_path, str(i))): - if file.endswith(extension): - filename = file - break + filename = next( + ( + file + for file in os.listdir(pjoin(self.src_path, str(i))) + if file.endswith(extension) + ), + '', + ) + if filename == '': raise Exception('Density file not found in ' +\ - pjoin(self.src_path,str(i))) + pjoin(self.src_path,str(i))) jobs.append([ pjoin(self.src_path, str(i), filename), @@ -123,11 +128,11 @@ def map(self, *args): # results = np.array([j.compute(num_workers = self.num_workers) for j in jobs]) futures = client.map(transform_one, *[[j[i] for j in jobs] for i in range(3)], len(jobs) * [self.basis_instructions]) - if self.num_workers == 1: - results = list(futures) - else: - results = [f.result() for f in futures] - return results + return ( + list(futures) + if self.num_workers == 1 + else [f.result() for f in futures] + ) def score(self, *args, **kwargs): return 0 diff --git a/neuralxc/projector/gaussian.py b/neuralxc/projector/gaussian.py index 3e9e3fd..57a4d11 100644 --- a/neuralxc/projector/gaussian.py +++ b/neuralxc/projector/gaussian.py @@ -35,19 +35,18 @@ def parse_basis(basis_instructions): else: basis_strings[species] = basis_instructions['basis'] bas = basis_strings[species] + elif os.path.isfile(basis_instructions[species]['basis']): + basis_strings[species] = open(basis_instructions[species]['basis'], 'r').read() + bas = gtobasis.parse(basis_strings[species]) else: - if os.path.isfile(basis_instructions[species]['basis']): - basis_strings[species] = open(basis_instructions[species]['basis'], 'r').read() - bas = gtobasis.parse(basis_strings[species]) - else: - basis_strings[species] = basis_instructions[species]['basis'] - bas = basis_strings[species] + basis_strings[species] = basis_instructions[species]['basis'] + bas = basis_strings[species] spec = 'O' if species == 'X' else species try: - mol = gto.M(atom='{} 0 0 0'.format(spec), basis={spec: bas}) + mol = gto.M(atom=f'{spec} 0 0 0', basis={spec: bas}) except RuntimeError: - mol = gto.M(atom='{} 0 0 0'.format(spec), basis={spec: bas}, spin=1) + mol = gto.M(atom=f'{spec} 0 0 0', basis={spec: bas}, spin=1) if 'basis' in basis_instructions: sigma = basis_instructions['basis'].get('sigma', 2.0) gamma = basis_instructions['basis'].get('gamma', 1.0) @@ -116,7 +115,7 @@ def get_basis_on_mesh(self, box, basis_instructions): rads = [] box['radial'] = torch.stack(box['radial']) - for ib, basis in enumerate(basis_instructions): + for basis in basis_instructions: l = basis['l'] filt = (box['radial'][0] <= 1000000) box_rad = box['radial'][:, filt] @@ -163,9 +162,9 @@ def init_padder(self, basis_instructions): bas = basis_strings[species] spec = 'O' if species == 'X' else species try: - mol = gto.M(atom='{} 0 0 0'.format(spec), basis={spec: bas}) + mol = gto.M(atom=f'{spec} 0 0 0', basis={spec: bas}) except RuntimeError: - mol = gto.M(atom='{} 0 0 0'.format(spec), basis={spec: bas}, spin=1) + mol = gto.M(atom=f'{spec} 0 0 0', basis={spec: bas}, spin=1) bp = BasisPadder(mol) il = bp.indexing_l[spec][0] ir = bp.indexing_r[spec][0] @@ -191,9 +190,11 @@ def radials(cls, r, basis, W=None): result = [] if isinstance(basis, list): for b in basis: - res = [] - for ib, alpha in enumerate(b['alpha']): - res.append(cls.g(r, b['r_o'][ib], b['alpha'][ib], b['l'], b['gamma'][ib])) + res = [ + cls.g(r, b['r_o'][ib], b['alpha'][ib], b['l'], b['gamma'][ib]) + for ib, alpha in enumerate(b['alpha']) + ] + result.append(res) elif isinstance(basis, dict): result.append([cls.g(r, basis['r_o'], basis['alpha'], basis['l'], basis['gamma'])]) @@ -222,8 +223,8 @@ def __init__(self, unitcell, grid, basis_instructions, **kwargs): Instructions that define basis """ full_basis, basis_strings = parse_basis(basis_instructions) - basis = {key: val for key, val in basis_instructions.items()} - basis.update(full_basis) + basis = dict(basis_instructions.items()) + basis |= full_basis self.basis_strings = basis_strings EuclideanProjector.__init__(self, unitcell, grid, basis, **kwargs) self.init_padder(basis_instructions) @@ -256,9 +257,7 @@ def forward_fast(self, rho, positions, unitcell, grid, radials, angulars, my_box """ self.set_cell_parameters(unitcell, grid) basis = self.basis[self.species] - box = {} - box['mesh'] = my_box[:3] - box['radial'] = my_box[3:] + box = {'mesh': my_box[:3], 'radial': my_box[3:]} Xm, Ym, Zm = box['mesh'].long() return self.project_onto(rho[..., Xm, Ym, Zm], radials, angulars, basis, self.basis_strings[self.species], box) @@ -288,8 +287,8 @@ def __init__(self, grid_coords, grid_weights, basis_instructions, **kwargs): self.grid_weights = torch.from_numpy(grid_weights) self.V_cell = self.grid_weights full_basis, basis_strings = parse_basis(basis_instructions) - basis = {key: val for key, val in basis_instructions.items()} - basis.update(full_basis) + basis = dict(basis_instructions.items()) + basis |= full_basis self.basis_strings = basis_strings self.basis = basis self.all_angs = {} @@ -299,9 +298,7 @@ def __init__(self, grid_coords, grid_weights, basis_instructions, **kwargs): def forward_fast(self, rho, positions, grid_coords, grid_weights, radials, angulars, my_box): basis = self.basis[self.species] - box = {} - box['mesh'] = my_box[0] - box['radial'] = my_box[1:] + box = {'mesh': my_box[0], 'radial': my_box[1:]} Xm = box['mesh'].long() grid_weights = grid_weights[Xm] self.set_cell_parameters(grid_coords, grid_weights) diff --git a/neuralxc/projector/polynomial.py b/neuralxc/projector/polynomial.py index 7ebad46..36e6897 100644 --- a/neuralxc/projector/polynomial.py +++ b/neuralxc/projector/polynomial.py @@ -80,10 +80,7 @@ def g(cls, r, basis, a): @staticmethod def orthogonalize(func, r, basis, W): r_o = basis['r_o'] - rad = [] - for k in torch.arange(0, W.size()[0]): - rad.append(func(r, basis, (k + 1).double())) - + rad = [func(r, basis, (k + 1).double()) for k in torch.arange(0, W.size()[0])] result = contract('ij,j...->i...', W, torch.stack(rad)) result[:, r > r_o] = 0 return result diff --git a/neuralxc/projector/projector.py b/neuralxc/projector/projector.py index c7b3a8b..caa5ff2 100644 --- a/neuralxc/projector/projector.py +++ b/neuralxc/projector/projector.py @@ -29,8 +29,8 @@ def DensityProjector(**kwargs): projector_type = 'pyscf' registry = BaseProjector.get_registry() - if not projector_type in registry: - raise Exception('Projector: {} not registered'.format(projector_type)) + if projector_type not in registry: + raise Exception(f'Projector: {projector_type} not registered') return registry[projector_type](**kwargs) @@ -106,7 +106,7 @@ def forward(self, rho, positions, species, unitcell, grid, my_box): # rho = rho.permute(1,0) for pos, spec in zip(positions, species): - if not spec in basis_rep: + if spec not in basis_rep: basis_rep[spec] = [] self.species = spec @@ -138,10 +138,7 @@ def angulars_real(l, theta, phi): float or np.ndarray Value of angular function at provided point(s) """ - res = [] - for m in range(-l, l + 1): - res.append(geom.SH(l, m, theta, phi)) - return res + return [geom.SH(l, m, theta, phi) for m in range(-l, l + 1)] class EuclideanProjector(BaseProjector): @@ -165,10 +162,11 @@ def __init__(self, unitcell, grid, basis_instructions, **kwargs): super().__init__() self.basis = basis_instructions # Initialize the matrix used to orthonormalize radial basis - W = {} - for species in basis_instructions: - if len(species) < 3: - W[species] = self.get_W(basis_instructions[species]) + W = { + species: self.get_W(basis_instructions[species]) + for species in basis_instructions + if len(species) < 3 + } a = np.linalg.norm(unitcell, axis=1) / grid[:3] self.unitcell = torch.from_numpy(unitcell) @@ -300,11 +298,7 @@ def mesh_3d(self, U, a, rmax, my_box, cm, scaled=False, indexing='xy', both=Fals Rm = torch.stack([Xm, Ym, Zm]).double() - if scaled: - R = contract('ij,jklm -> iklm', U, Rm) - return R - else: - return Rm + return contract('ij,jklm -> iklm', U, Rm) if scaled else Rm class RadialProjector(BaseProjector): @@ -330,10 +324,11 @@ def __init__(self, grid_coords, grid_weights, basis_instructions, **kwargs): BaseProjector.__init__(self) self.basis = basis_instructions # Initialize the matrix used to orthonormalize radial basis - W = {} - for species in basis_instructions: - if len(species) < 3: - W[species] = self.get_W(basis_instructions[species]) + W = { + species: self.get_W(basis_instructions[species]) + for species in basis_instructions + if len(species) < 3 + } self.grid_coords = torch.from_numpy(grid_coords) self.grid_weights = torch.from_numpy(grid_weights) @@ -344,7 +339,7 @@ def __init__(self, grid_coords, grid_weights, basis_instructions, **kwargs): if len(species) < 3: W[species] = self.get_W(basis_instructions[species]) - self.my_box = torch.Tensor([[0, 1] for i in range(3)]) + self.my_box = torch.Tensor([[0, 1] for _ in range(3)]) self.unitcell = self.grid_coords self.grid = self.grid_weights diff --git a/neuralxc/projector/pyscf.py b/neuralxc/projector/pyscf.py index 77c9f95..be874e4 100644 --- a/neuralxc/projector/pyscf.py +++ b/neuralxc/projector/pyscf.py @@ -30,7 +30,7 @@ def get_eri3c(mol, auxmol, op): elif op == 'delta': eri3c = pmol.intor('int3c1e_sph', shls_slice=(0, mol.nbas, 0, mol.nbas, mol.nbas, mol.nbas + auxmol.nbas)) else: - raise ValueError('Operator {} not implemented'.format(op)) + raise ValueError(f'Operator {op} not implemented') return eri3c.reshape(mol.nao_nr(), mol.nao_nr(), -1) @@ -137,8 +137,7 @@ def get_V(self, dEdC, **kwargs): dEdC = self.bp.unpad_basis(dEdC) if self.dfit: dEdC = np.linalg.solve(self.S_aux, dEdC) - V = contract('ijk, k', self.eri3c, dEdC) - return V + return contract('ijk, k', self.eri3c, dEdC) class BasisPadder(): @@ -155,15 +154,15 @@ def __init__(self, mol): # Find maximum angular momentum and n for each species for atom_idx, _ in enumerate(mol.atom_charges()): sym = mol.atom_pure_symbol(atom_idx) - if not sym in sym_cnt: + if sym not in sym_cnt: sym_cnt[sym] = 0 sym_idx[sym] = [] sym_idx[sym].append(atom_idx) sym_cnt[sym] += 1 - for ao_idx, label in enumerate(mol.ao_labels(fmt=False)): + for label in mol.ao_labels(fmt=False): sym = label[1] - if not sym in max_l: + if sym not in max_l: max_l[sym] = 0 max_n[sym] = 0 @@ -176,16 +175,24 @@ def __init__(self, mol): indexing_left = {sym: [] for sym in max_n} indexing_right = {sym: [] for sym in max_n} labels = mol.ao_labels() - for sym in max_n: + for sym, value in max_n.items(): for idx in sym_idx[sym]: indexing_left[sym].append([]) indexing_right[sym].append([]) - for n in range(1, max_n[sym] + 1): + for n in range(1, value + 1): for l in range(max_l[sym] + 1): - if any(['{} {} {}{}'.format(idx, sym, n, l_dict_inv[l]) in lab for lab in labels]): + if any( + f'{idx} {sym} {n}{l_dict_inv[l]}' in lab + for lab in labels + ): indexing_left[sym][-1] += [True] * (2 * l + 1) - sidx = np.where(['{} {} {}{}'.format(idx, sym, n, l_dict_inv[l]) in lab - for lab in labels])[0][0] + sidx = np.where( + [ + f'{idx} {sym} {n}{l_dict_inv[l]}' in lab + for lab in labels + ] + )[0][0] + indexing_right[sym][-1] += np.arange(sidx, sidx + (2 * l + 1)).astype(int).tolist() else: indexing_left[sym][-1] += [False] * (2 * l + 1) @@ -198,10 +205,11 @@ def __init__(self, mol): def get_basis_json(self): - basis = {} + basis = { + sym: {'n': self.max_n[sym], 'l': self.max_l[sym] + 1} + for sym in self.sym_cnt + } - for sym in self.sym_cnt: - basis[sym] = {'n': self.max_n[sym], 'l': self.max_l[sym] + 1} if 'O' in basis: basis['X'] = {'n': self.max_n['O'], 'l': self.max_l['O'] + 1} diff --git a/neuralxc/pyscf/pyscf.py b/neuralxc/pyscf/pyscf.py index afa32c7..6ac7fc7 100644 --- a/neuralxc/pyscf/pyscf.py +++ b/neuralxc/pyscf/pyscf.py @@ -26,7 +26,7 @@ def RKS(mol, nxc='', **kwargs): that uses a NeuralXC potential """ mf = dft.RKS(mol, **kwargs) - if not nxc is '': + if nxc is not '': model = neuralxc.PySCFNXC(nxc) model.initialize(mol) mf.get_veff = veff_mod(mf, model) @@ -44,8 +44,8 @@ def compute_KS(atoms, path='pyscf.chkpt', basis='ccpvdz', xc='PBE', nxc='', **kw mol = gto.M(atom=mol_input, basis=basis) # mol.verbose= 4 if nxc: - model_paths = glob(nxc + '/*') - if any(['projector' in path for path in model_paths]): + model_paths = glob(f'{nxc}/*') + if any('projector' in path for path in model_paths): mf = RKSrad(mol, nxc=nxc, nxc_kind='atomic') # Model that uses projector on radial grid else: mf = RKS(mol, nxc=nxc) # Model that uses overlap integrals and density matrix diff --git a/neuralxc/symmetrizer/symmetrizer.py b/neuralxc/symmetrizer/symmetrizer.py index f3e54e1..0237a85 100644 --- a/neuralxc/symmetrizer/symmetrizer.py +++ b/neuralxc/symmetrizer/symmetrizer.py @@ -23,10 +23,7 @@ def wrapped_func(X, *args, **kwargs): X = torch.from_numpy(X) made_tensor = True Y = func(X, *args, **kwargs) - if made_tensor: - return Y.detach().numpy() - else: - return Y + return Y.detach().numpy() if made_tensor else Y return wrapped_func @@ -45,14 +42,14 @@ def Symmetrizer(symmetrize_instructions): sym_ins = symmetrize_instructions registry = BaseSymmetrizer.get_registry() - if not 'symmetrizer_type' in sym_ins: + if 'symmetrizer_type' not in sym_ins: raise Exception('symmetrize_instructions must contain symmetrizer_type key') symtype = sym_ins['symmetrizer_type'] print('Using symmetrizer ', symtype) - if not symtype in registry: - raise Exception('Symmetrizer: {} not registered'.format(symtype)) + if symtype not in registry: + raise Exception(f'Symmetrizer: {symtype} not registered') return registry[symtype](sym_ins) @@ -77,7 +74,7 @@ def forward(self, C): return BaseSymmetrizer.get_symmetrized(self, C) @abstractmethod - def _symmetrize_function(c, n_l, n, *args): + def _symmetrize_function(self, n_l, n, *args): pass def get_params(self, *args, **kwargs): @@ -92,11 +89,10 @@ def transform(self, X, y=None): self._attrs.update({'basis': X['basis_instructions']}) X = X['data'] - if isinstance(X, tuple): - symmetrized = self.get_symmetrized(X[0]) - return symmetrized, X[1] - else: + if not isinstance(X, tuple): return self.get_symmetrized(X) + symmetrized = self.get_symmetrized(X[0]) + return symmetrized, X[1] def get_symmetrized(self, C): """ @@ -122,10 +118,7 @@ def get_symmetrized(self, C): results[idx][key] = self._symmetrize_function(*data, basis[key]['l'], basis[key]['n'] * grad_mult, self._cgs) - if not isinstance(C, list): - return results[0] - else: - return results + return results if isinstance(C, list) else results[0] class TraceSymmetrizer(BaseSymmetrizer): @@ -167,7 +160,7 @@ def _symmetrize_function(c, n_l, n, *args): traces = [] idx = 0 - for n_ in range(0, n): + for _ in range(n): for l in range(n_l): traces.append(torch.norm(c[:, idx:idx + (2 * l + 1)], dim=1)**2) idx += 2 * l + 1 @@ -215,12 +208,12 @@ def _symmetrize_function(c, n_l, n, *args): c = c.view(len(c), n, -1) traces = [] - for n1 in range(0, n): + for n1 in range(n): for n2 in range(n1, n): idx = 0 for l in range(n_l): traces.append(torch.sum(c[:,n1,idx:idx+(2*l+1)]*\ - c[:,n2,idx:idx+(2*l+1)], + c[:,n2,idx:idx+(2*l+1)], dim = -1)) idx += 2 * l + 1 diff --git a/neuralxc/tests/test_drivers.py b/neuralxc/tests/test_drivers.py index 3e38ad6..1124842 100644 --- a/neuralxc/tests/test_drivers.py +++ b/neuralxc/tests/test_drivers.py @@ -16,7 +16,7 @@ test_dir = os.path.dirname(os.path.abspath(__file__)) if 'driver_data_tmp' in os.listdir(test_dir): - shutil.rmtree(test_dir + '/driver_data_tmp') + shutil.rmtree(f'{test_dir}/driver_data_tmp') def shcopytree(src, dest): @@ -31,25 +31,25 @@ def shcopytree(src, dest): @pytest.mark.driver_fit def test_fit(): os.chdir(test_dir) - shcopytree(test_dir + '/driver_data', test_dir + '/driver_data_tmp') + shcopytree(f'{test_dir}/driver_data', f'{test_dir}/driver_data_tmp') cwd = os.getcwd() - os.chdir(test_dir + '/driver_data_tmp') + os.chdir(f'{test_dir}/driver_data_tmp') # Fit model fit_driver(preprocessor='pre.json', hyper='hyper.json', sets='sets.inp', hyperopt=True) # Continue training fit_driver(preprocessor='pre.json', hyper='hyper.json', model='best_model', sets='sets.inp') os.chdir(cwd) - shutil.rmtree(test_dir + '/driver_data_tmp') + shutil.rmtree(f'{test_dir}/driver_data_tmp') @pytest.mark.driver @pytest.mark.driver_fit def test_eval(): os.chdir(test_dir) - shcopytree(test_dir + '/driver_data', test_dir + '/driver_data_tmp') + shcopytree(f'{test_dir}/driver_data', f'{test_dir}/driver_data_tmp') cwd = os.getcwd() - os.chdir(test_dir + '/driver_data_tmp') + os.chdir(f'{test_dir}/driver_data_tmp') eval_driver(hdf5=['data.hdf5', 'system/it1', 'system/ref']) eval_driver(model='model_old', hdf5=['data.hdf5', 'system/it0', 'system/ref']) @@ -57,7 +57,7 @@ def test_eval(): eval_driver(model='model_old', hdf5=['data.hdf5', 'system/it0', 'system/ref'], predict=True, dest='prediction') os.chdir(cwd) - shutil.rmtree(test_dir + '/driver_data_tmp') + shutil.rmtree(f'{test_dir}/driver_data_tmp') @pytest.mark.driver @@ -65,9 +65,9 @@ def test_eval(): def test_data(): os.chdir(test_dir) - shcopytree(test_dir + '/driver_data', test_dir + '/driver_data_tmp') + shcopytree(f'{test_dir}/driver_data', f'{test_dir}/driver_data_tmp') cwd = os.getcwd() - os.chdir(test_dir + '/driver_data_tmp') + os.chdir(f'{test_dir}/driver_data_tmp') add_data_driver(hdf5='data.hdf5', system='system', @@ -91,16 +91,16 @@ def test_data(): sample_driver(preprocessor='pre.json', size=5, dest='sample.npy', hdf5=['data.hdf5', 'system/it0']) os.chdir(cwd) - shutil.rmtree(test_dir + '/driver_data_tmp') + shutil.rmtree(f'{test_dir}/driver_data_tmp') def test_serialize(): os.chdir(test_dir) - shcopytree(test_dir + '/driver_data', test_dir + '/driver_data_tmp') + shcopytree(f'{test_dir}/driver_data', f'{test_dir}/driver_data_tmp') cwd = os.getcwd() - os.chdir(test_dir + '/driver_data_tmp') + os.chdir(f'{test_dir}/driver_data_tmp') serialize('model', 'model.jit', False) os.chdir(cwd) - shutil.rmtree(test_dir + '/driver_data_tmp') + shutil.rmtree(f'{test_dir}/driver_data_tmp') diff --git a/neuralxc/tests/test_neuralxc.py b/neuralxc/tests/test_neuralxc.py index 2d440f8..2ad5b9c 100644 --- a/neuralxc/tests/test_neuralxc.py +++ b/neuralxc/tests/test_neuralxc.py @@ -81,7 +81,7 @@ def test_formatter(): os.path.join(test_dir, 'var09.pckl')]]) def test_grouped_transformers(transformer, filepath): - for use_torch in [False, True] if torch_found else [False]: + for _ in [False, True] if torch_found else [False]: with open(os.path.join(test_dir, 'transformer_in.pckl'), 'rb') as file: C = pickle.load(file) diff --git a/neuralxc/tests/test_projectors.py b/neuralxc/tests/test_projectors.py index d36d26d..8b186ff 100644 --- a/neuralxc/tests/test_projectors.py +++ b/neuralxc/tests/test_projectors.py @@ -78,11 +78,7 @@ def test_jacobs_projector(rad_type, grid_type): positions = np.array([[0.0, 0.0, 0.0], [-0.75846035, -0.59257417, 0.0], [0.75846035, -0.59257417, 0.0] ]) / xc.constants.Bohr - if grid_type == 'euclidean': - application = 'siesta' - else: - application = 'pyscf' - + application = 'siesta' if grid_type == 'euclidean' else 'pyscf' if rad_type == 'ortho': basis_instructions = {'basis': {'n': 2, 'l': 3, 'r_o': 1}, 'projector': rad_type, 'grid': grid_type, 'grad': 1} diff --git a/neuralxc/tests/test_pyscf.py b/neuralxc/tests/test_pyscf.py index 47a11aa..f0a2087 100644 --- a/neuralxc/tests/test_pyscf.py +++ b/neuralxc/tests/test_pyscf.py @@ -27,7 +27,7 @@ test_dir = os.path.dirname(os.path.abspath(__file__)) if 'driver_data_tmp' in os.listdir(test_dir): - shutil.rmtree(test_dir + '/driver_data_tmp') + shutil.rmtree(f'{test_dir}/driver_data_tmp') def shcopytree(src, dest): @@ -56,20 +56,20 @@ def test_radial_model(): species=['O', 'H', 'H']) res = model.get_V(rho)[0] - assert np.allclose(res, np.load(test_dir + '/rad_energy.npy')) + assert np.allclose(res, np.load(f'{test_dir}/rad_energy.npy')) @pytest.mark.skipif(not pyscf_found, reason='requires pyscf') @pytest.mark.pyscf def test_pre(): try: - shutil.rmtree(test_dir + '/driver_data_tmp') + shutil.rmtree(f'{test_dir}/driver_data_tmp') except: pass os.chdir(test_dir) - shcopytree(test_dir + '/driver_data', test_dir + '/driver_data_tmp') + shcopytree(f'{test_dir}/driver_data', f'{test_dir}/driver_data_tmp') cwd = os.getcwd() - os.chdir(test_dir + '/driver_data_tmp') + os.chdir(f'{test_dir}/driver_data_tmp') run_engine_driver('benzene_small.traj', 'pre_rad.json', workdir='workdir_engine') @@ -86,16 +86,16 @@ def test_pre(): with h5py.File('data.hdf5', 'r') as f: for hashkey in f['/test/test/density']: - data0 = f['/test/test/density/' + hashkey][:] + data0 = f[f'/test/test/density/{hashkey}'][:] for hashkey in f['/test/test1/density']: - data1 = f['/test/test1/density/' + hashkey][:] + data1 = f[f'/test/test1/density/{hashkey}'][:] for hashkey in f['/test/test2/density']: - data2 = f['/test/test2/density/' + hashkey][:] + data2 = f[f'/test/test2/density/{hashkey}'][:] assert data0.shape[-1] * 2 == data1.shape[-1] assert data0.shape[-1] * 4 == data2.shape[-1] os.chdir(cwd) - shutil.rmtree(test_dir + '/driver_data_tmp') + shutil.rmtree(f'{test_dir}/driver_data_tmp') @pytest.mark.skipif(not pyscf_found, reason='requires pyscf') @@ -103,25 +103,31 @@ def test_pre(): @pytest.mark.parametrize('projector', ['ga_ana','ga_rad','or_rad', 'ga_ana_f','ga_rad_f']) def test_sc(projector): os.chdir(test_dir) - shcopytree(test_dir + '/driver_data', test_dir + '/driver_data_tmp') + shcopytree(f'{test_dir}/driver_data', f'{test_dir}/driver_data_tmp') cwd = os.getcwd() - os.chdir(test_dir + '/driver_data_tmp') - sc_driver('water.traj', 'pre_sc_{}.json'.format(projector), - 'hyper.json', maxit=1, hyperopt=True) - os.chdir(test_dir + '/driver_data_tmp') + os.chdir(f'{test_dir}/driver_data_tmp') + sc_driver( + 'water.traj', + f'pre_sc_{projector}.json', + 'hyper.json', + maxit=1, + hyperopt=True, + ) + + os.chdir(f'{test_dir}/driver_data_tmp') # engine = Engine('pyscf', nxc='testing/nxc.jit', basis='sto3g') # engine.compute(read('testing.traj', '0')) os.chdir(cwd) - shutil.rmtree(test_dir + '/driver_data_tmp') + shutil.rmtree(f'{test_dir}/driver_data_tmp') def test_pyscf_radial(): os.chdir(test_dir) - shcopytree(test_dir + '/driver_data', test_dir + '/driver_data_tmp') + shcopytree(f'{test_dir}/driver_data', f'{test_dir}/driver_data_tmp') cwd = os.getcwd() - os.chdir(test_dir + '/driver_data_tmp') + os.chdir(f'{test_dir}/driver_data_tmp') serialize('model', 'benzene.pyscf.jit', as_radial=False) engine = Engine('pyscf', nxc='benzene.pyscf.jit') @@ -133,8 +139,7 @@ def test_pyscf_radial(): assert np.allclose(atoms_rad.get_potential_energy(), atoms.get_potential_energy()) os.chdir(cwd) - shutil.rmtree(test_dir + '/driver_data_tmp') - pass + shutil.rmtree(f'{test_dir}/driver_data_tmp') if __name__ == '__main__': diff --git a/neuralxc/tests/test_torch_comp.py b/neuralxc/tests/test_torch_comp.py index c43425a..443eb37 100644 --- a/neuralxc/tests/test_torch_comp.py +++ b/neuralxc/tests/test_torch_comp.py @@ -96,8 +96,8 @@ def test_stress(): forces = V_comp[1][1][:-3] stress_diag = [] + dx = 0.0001 for ij in range(3): - dx = 0.0001 energies = [] for ix in [-1, 1]: unitcell = np.array(unitcell_true) diff --git a/neuralxc/timer.py b/neuralxc/timer.py index 91dbc01..ceb110e 100644 --- a/neuralxc/timer.py +++ b/neuralxc/timer.py @@ -30,32 +30,32 @@ def __init__(self): def start(self, name, threadsafe=True): - if not (self.threaded and not threadsafe): + if not self.threaded or threadsafe: if name in self.cnt_dict: self.cnt_dict[name] += 1 else: self.cnt_dict[name] = 1 - if not name in self.start_dict: + if name not in self.start_dict: self.start_dict[name] = time.time() def stop(self, name, threadsafe=True): - if not (self.threaded and not threadsafe): - if name in self.start_dict: - dt = time.time() - self.start_dict[name] - if name in self.accum_dict: - self.accum_dict[name] += time.time() - self.start_dict[name] - self.max_dict[name] = max(self.max_dict[name], dt) - self.min_dict[name] = min(self.min_dict[name], dt) - else: - self.accum_dict[name] = time.time() - self.start_dict[name] - self.max_dict[name] = dt - self.min_dict[name] = dt - - self.start_dict.pop(name) - else: - raise ValueError('Timer with name {} was never started'.format(name)) + if (self.threaded and not threadsafe): + return + if name not in self.start_dict: + raise ValueError(f'Timer with name {name} was never started') + dt = time.time() - self.start_dict[name] + if name in self.accum_dict: + self.accum_dict[name] += time.time() - self.start_dict[name] + self.max_dict[name] = max(self.max_dict[name], dt) + self.min_dict[name] = min(self.min_dict[name], dt) + else: + self.accum_dict[name] = time.time() - self.start_dict[name] + self.max_dict[name] = dt + self.min_dict[name] = dt + + self.start_dict.pop(name) def create_report(self, path=None): # keys = list(self.start_dict.keys()) @@ -75,7 +75,4 @@ def create_report(self, path=None): print(report) -if config.UseTimer: - timer = Timer() -else: - timer = DummyTimer() +timer = Timer() if config.UseTimer else DummyTimer() diff --git a/neuralxc/utils/config.py b/neuralxc/utils/config.py index 2191351..f86341d 100644 --- a/neuralxc/utils/config.py +++ b/neuralxc/utils/config.py @@ -57,7 +57,7 @@ def find_projector_type(config): if pre["grid"] == "radial": ptype += "_radial" elif pre["grid"] == "analytical": - if not config["engine"]["application"] == "pyscf": + if config["engine"]["application"] != "pyscf": raise ValueError("Analytical projection only supported if application is PySCF") ptype = 'pyscf' elif pre["grid"] == "euclidean": @@ -76,12 +76,12 @@ def fix_basis(config): if 'ortho' in ptype: if not isinstance(pre['basis'], dict): raise ValueError('Dict expected for "basis"') - if any([isinstance(val, dict) for val in pre['basis'].values()]): - basis.update(pre['basis']) + if any(isinstance(val, dict) for val in pre['basis'].values()): + basis |= pre['basis'] else: basis['X'] = pre['basis'] agnostic = True - elif 'pyscf' == ptype or 'gaussian' in ptype: + elif ptype == 'pyscf' or 'gaussian' in ptype: if isinstance(pre['basis'], str): #Short-hand notation for PySCF basis sets basis['basis'] = {'name': pre['basis']} agnostic = False diff --git a/neuralxc/utils/density_getter.py b/neuralxc/utils/density_getter.py index 5e3a835..d4fe165 100644 --- a/neuralxc/utils/density_getter.py +++ b/neuralxc/utils/density_getter.py @@ -89,10 +89,7 @@ def get_density(self, file_path, return_dict=False): core += 1 results['mo_occ'][:core] = 0 - if return_dict: - return {'rho': res[0], 'mol': res[1], 'mf': res[2]} - else: - return res + return {'rho': res[0], 'mol': res[1], 'mf': res[2]} if return_dict else res class PySCFRadDensityGetter(BaseDensityGetter): @@ -175,7 +172,7 @@ def get_density_bin(file_path): else: raise Exception('get_data_bin cannot handle non-cubic unitcells or spin') - block = '<' + 'I{}fI'.format(a) * a * a + block = '<' + f'I{a}fI' * a * a content = np.array(struct.unpack(block, bin_file.read(struct.calcsize(block)))) rho = content.reshape(a + 2, a, a, order='F')[1:-1, :, :] @@ -206,7 +203,7 @@ def get_density_formatted(file_path): with open(file_path, 'r') as rhofile: # unit cell (in Bohr) - for i in range(0, 3): + for i in range(3): unitcell[i, :] = rhofile.readline().split() grid[:] = rhofile.readline().split() @@ -242,10 +239,7 @@ def get_forces(self, path, n_atoms=-1): if f == 'siesta:': continue forces.append(float(f)) forces = np.array(forces).reshape(-1, 3) - if n_atoms == -1: - return forces - else: - return forces[:n_atoms] + return forces if n_atoms == -1 else forces[:n_atoms] def density_getter_factory(application, *args, **kwargs): @@ -265,7 +259,7 @@ def density_getter_factory(application, *args, **kwargs): registry = BaseDensityGetter.get_registry() - if not application in registry: - raise Exception('DensityGetter: {} not registered'.format(application)) + if application not in registry: + raise Exception(f'DensityGetter: {application} not registered') return registry[application](*args, **kwargs) diff --git a/neuralxc/utils/geom.py b/neuralxc/utils/geom.py index 2f86a78..d6f0a23 100644 --- a/neuralxc/utils/geom.py +++ b/neuralxc/utils/geom.py @@ -9,7 +9,7 @@ def associated_legendre_polynomial(l, m, x, pmm, pll): if m > 0: somx2 = torch.sqrt((1 - x) * (1 + x)) fact = 1.0 - for i in range(1, m + 1): + for _ in range(1, m + 1): pmm = pmm * (-fact) * somx2 fact += 2.0 if l == m: diff --git a/neuralxc/utils/visualize.py b/neuralxc/utils/visualize.py index 241d988..7b18978 100644 --- a/neuralxc/utils/visualize.py +++ b/neuralxc/utils/visualize.py @@ -57,7 +57,4 @@ def _plane_cut(data, plane, height, grid, rmax=0, return_mesh=False): indeces = [A, B] indeces.insert(plane, height) - if not return_mesh: - return data[indeces[0], indeces[1], indeces[2]] - else: - return A, B + return (A, B) if return_mesh else data[indeces[0], indeces[1], indeces[2]] diff --git a/versioneer.py b/versioneer.py index fc6fc62..10edddc 100644 --- a/versioneer.py +++ b/versioneer.py @@ -325,7 +325,10 @@ def get_root(): me_dir = os.path.normcase(os.path.splitext(me)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) if me_dir != vsr_dir: - print("Warning: build in %s is using versioneer.py from %s" % (os.path.dirname(me), versioneer_py)) + print( + f"Warning: build in {os.path.dirname(me)} is using versioneer.py from {versioneer_py}" + ) + except NameError: pass return root @@ -401,20 +404,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= if e.errno == errno.ENOENT: continue if verbose: - print("unable to run %s" % dispcmd) + print(f"unable to run {dispcmd}") print(e) return None, None else: if verbose: - print("unable to find command, tried %s" % (commands, )) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) + print(f"unable to run {dispcmd} (error)") + print(f"stdout was {stdout}") return None, p.returncode return stdout, p.returncode @@ -951,21 +954,17 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() + with open(versionfile_abs, "r") as f: + for line in f: + if line.strip().startswith("git_refnames ="): + if mo := re.search(r'=\s*"(.*)"', line): + keywords["refnames"] = mo[1] + if line.strip().startswith("git_full ="): + if mo := re.search(r'=\s*"(.*)"', line): + keywords["full"] = mo[1] + if line.strip().startswith("git_date ="): + if mo := re.search(r'=\s*"(.*)"', line): + keywords["date"] = mo[1] except EnvironmentError: pass return keywords @@ -990,11 +989,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1003,17 +1002,17 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) + print(f'likely tags: {",".join(sorted(tags))}') for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: - print("picking %s" % r) + print(f"picking {r}") return { "version": r, "full-revisionid": keywords["full"].strip(), @@ -1041,21 +1040,29 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - + GITS = ["git.cmd", "git.exe"] if sys.platform == "win32" else ["git"] out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: - print("Directory %s not under git control" % root) + print(f"Directory {root} not under git control") raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = run_command( - GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", - "%s*" % tag_prefix], cwd=root) + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + f"{tag_prefix}*", + ], + cwd=root, + ) + # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -1065,11 +1072,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - + pieces = {"long": full_out, "short": full_out[:7], "error": None} # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out @@ -1091,7 +1094,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): return pieces # tag - full_tag = mo.group(1) + full_tag = mo[1] if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" @@ -1101,10 +1104,10 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) + pieces["distance"] = int(mo[2]) # commit: short hex revision ID - pieces["short"] = mo.group(3) + pieces["short"] = mo[3] else: # HEX: no tags @@ -1125,34 +1128,32 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): For Git, this means creating/changing .gitattributes to mark _version.py for export-subst keyword substitution. """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] + GITS = ["git.cmd", "git.exe"] if sys.platform == "win32" else ["git"] files = [manifest_in, versionfile_source] if ipy: files.append(ipy) try: me = __file__ if me.endswith(".pyc") or me.endswith(".pyo"): - me = os.path.splitext(me)[0] + ".py" + me = f"{os.path.splitext(me)[0]}.py" versioneer_file = os.path.relpath(me) except NameError: versioneer_file = "versioneer.py" files.append(versioneer_file) present = False try: - f = open(".gitattributes", "r") - for line in f.readlines(): - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: + with open(".gitattributes", "r") as f: + for line in f: + if ( + line.strip().startswith(versionfile_source) + and "export-subst" in line.strip().split()[1:] + ): present = True - f.close() except EnvironmentError: pass if not present: - f = open(".gitattributes", "a+") - f.write("%s export-subst\n" % versionfile_source) - f.close() + with open(".gitattributes", "a+") as f: + f.write("%s export-subst\n" % versionfile_source) files.append(".gitattributes") run_command(GITS, ["add", "--"] + files) @@ -1166,7 +1167,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): """ rootdirs = [] - for i in range(3): + for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return { @@ -1176,12 +1177,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): "error": None, "date": None } - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level + rootdirs.append(root) + root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix)) + print( + f"Tried directories {rootdirs} but none started with prefix {parentdir_prefix}" + ) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -1215,7 +1218,7 @@ def versions_from_file(filename): mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) if not mo: raise NotThisMethod("no version_json in _version.py") - return json.loads(mo.group(1)) + return json.loads(mo[1]) def write_to_version_file(filename, versions): @@ -1230,9 +1233,7 @@ def write_to_version_file(filename, versions): def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" + return "." if "+" in pieces.get("closest-tag", "") else "+" def render_pep440(pieces): @@ -1249,13 +1250,11 @@ def render_pep440(pieces): if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" + if pieces["dirty"]: + rendered += ".dirty" return rendered @@ -1292,13 +1291,13 @@ def render_pep440_post(pieces): if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] + rendered += f'g{pieces["short"]}' else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" - rendered += "+g%s" % pieces["short"] + rendered += f'+g{pieces["short"]}' return rendered @@ -1314,13 +1313,11 @@ def render_pep440_old(pieces): rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" + if pieces["dirty"]: + rendered += ".dev0" return rendered @@ -1441,7 +1438,7 @@ def get_versions(verbose=False): keywords = get_keywords_f(versionfile_abs) ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) if verbose: - print("got version from expanded keyword %s" % ver) + print(f"got version from expanded keyword {ver}") return ver except NotThisMethod: pass @@ -1449,18 +1446,17 @@ def get_versions(verbose=False): try: ver = versions_from_file(versionfile_abs) if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) + print(f"got version from file {versionfile_abs} {ver}") return ver except NotThisMethod: pass - from_vcs_f = handlers.get("pieces_from_vcs") - if from_vcs_f: + if from_vcs_f := handlers.get("pieces_from_vcs"): try: pieces = from_vcs_f(cfg.tag_prefix, root, verbose) ver = render(pieces, cfg.style) if verbose: - print("got version from VCS %s" % ver) + print(f"got version from VCS {ver}") return ver except NotThisMethod: pass @@ -1469,7 +1465,7 @@ def get_versions(verbose=False): if cfg.parentdir_prefix: ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) if verbose: - print("got version from parentdir %s" % ver) + print(f"got version from parentdir {ver}") return ver except NotThisMethod: pass @@ -1508,11 +1504,11 @@ def get_cmdclass(): # happens, we protect the child from the parent's versioneer too. # Also see https://github.com/warner/python-versioneer/issues/52 - cmds = {} - # we add "version" to both distutils and setuptools from distutils.core import Command + + class cmd_version(Command): description = "report generated version string" user_options = [] @@ -1526,14 +1522,13 @@ def finalize_options(self): def run(self): vers = get_versions(verbose=True) - print("Version: %s" % vers["version"]) - print(" full-revisionid: %s" % vers.get("full-revisionid")) - print(" dirty: %s" % vers.get("dirty")) - print(" date: %s" % vers.get("date")) + print(f'Version: {vers["version"]}') + print(f' full-revisionid: {vers.get("full-revisionid")}') + print(f' dirty: {vers.get("dirty")}') + print(f' date: {vers.get("date")}') if vers["error"]: - print(" error: %s" % vers["error"]) + print(f' error: {vers["error"]}') - cmds["version"] = cmd_version # we override "build_py" in both distutils and setuptools # @@ -1556,6 +1551,8 @@ def run(self): else: from distutils.command.build_py import build_py as _build_py + + class cmd_build_py(_build_py): def run(self): root = get_root() @@ -1566,20 +1563,15 @@ def run(self): # it with an updated value if cfg.versionfile_build: target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) - print("UPDATING %s" % target_versionfile) + print(f"UPDATING {target_versionfile}") write_to_version_file(target_versionfile, versions) - cmds["build_py"] = cmd_build_py + cmds = {"version": cmd_version, "build_py": cmd_build_py} if "cx_Freeze" in sys.modules: # cx_freeze enabled? from cx_Freeze.dist import build_exe as _build_exe - # nczeczulin reports that py2exe won't like the pep440-style string - # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. - # setup(console=[{ - # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION - # "product_version": versioneer.get_version(), - # ... + class cmd_build_exe(_build_exe): def run(self): @@ -1587,7 +1579,7 @@ def run(self): cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) + print(f"UPDATING {target_versionfile}") write_to_version_file(target_versionfile, versions) _build_exe.run(self) @@ -1603,6 +1595,7 @@ def run(self): "VERSIONFILE_SOURCE": cfg.versionfile_source, }) + cmds["build_exe"] = cmd_build_exe del cmds["build_py"] @@ -1612,13 +1605,15 @@ def run(self): except ImportError: from py2exe.build_exe import py2exe as _py2exe # py2 + + class cmd_py2exe(_py2exe): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) + print(f"UPDATING {target_versionfile}") write_to_version_file(target_versionfile, versions) _py2exe.run(self) @@ -1634,6 +1629,7 @@ def run(self): "VERSIONFILE_SOURCE": cfg.versionfile_source, }) + cmds["py2exe"] = cmd_py2exe # we override different "sdist" commands for both environments @@ -1642,6 +1638,8 @@ def run(self): else: from distutils.command.sdist import sdist as _sdist + + class cmd_sdist(_sdist): def run(self): versions = get_versions() @@ -1659,9 +1657,10 @@ def make_release_tree(self, base_dir, files): # (remembering that it may be a hardlink) and replace it with an # updated value target_versionfile = os.path.join(base_dir, cfg.versionfile_source) - print("UPDATING %s" % target_versionfile) + print(f"UPDATING {target_versionfile}") write_to_version_file(target_versionfile, self._versioneer_generated_versions) + cmds["sdist"] = cmd_sdist return cmds @@ -1724,7 +1723,7 @@ def do_setup(): print(CONFIG_ERROR, file=sys.stderr) return 1 - print(" creating %s" % cfg.versionfile_source) + print(f" creating {cfg.versionfile_source}") with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write( @@ -1744,11 +1743,11 @@ def do_setup(): except EnvironmentError: old = "" if INIT_PY_SNIPPET not in old: - print(" appending to %s" % ipy) + print(f" appending to {ipy}") with open(ipy, "a") as f: f.write(INIT_PY_SNIPPET) else: - print(" %s unmodified" % ipy) + print(f" {ipy} unmodified") else: print(" %s doesn't exist, ok" % ipy) ipy = None @@ -1797,7 +1796,7 @@ def scan_setup_py(): setters = False errors = 0 with open("setup.py", "r") as f: - for line in f.readlines(): + for line in f: if "import versioneer" in line: found.add("import") if "versioneer.get_cmdclass()" in line: