172 lines
6.5 KiB
Python
172 lines
6.5 KiB
Python
|
|
import numpy as np
|
||
|
|
import pandas as pd
|
||
|
|
import re
|
||
|
|
from monty.re import regrep
|
||
|
|
from tqdm import tqdm
|
||
|
|
from .universal import Xyz
|
||
|
|
from nptyping import NDArray, Shape, Number
|
||
|
|
|
||
|
|
|
||
|
|
class SCFLog:
|
||
|
|
""""""
|
||
|
|
def __init__(self, eigenvalues=None, occupation=None, mol_orbs=None):
|
||
|
|
""""""
|
||
|
|
self.eigenvalues = eigenvalues
|
||
|
|
self.occupations = occupation
|
||
|
|
self.mol_orbs = mol_orbs
|
||
|
|
|
||
|
|
@property
|
||
|
|
def natoms(self):
|
||
|
|
if self.mol_orbs is not None:
|
||
|
|
return np.max(self.mol_orbs[0]['atom_ids']) + 1
|
||
|
|
else:
|
||
|
|
return ValueError('natoms might be calculated only if mol_orbs had been read')
|
||
|
|
|
||
|
|
@property
|
||
|
|
def nbands(self):
|
||
|
|
if self.eigenvalues is not None:
|
||
|
|
return len(self.eigenvalues[0])
|
||
|
|
elif self.mol_orbs is not None:
|
||
|
|
return len(self.mol_orbs[0].columns) - 3
|
||
|
|
else:
|
||
|
|
return ValueError('nbands might be calculated only if eigenvalues or mol_orbs had been read')
|
||
|
|
|
||
|
|
@property
|
||
|
|
def nsteps(self):
|
||
|
|
if self.eigenvalues is not None:
|
||
|
|
return len(self.eigenvalues)
|
||
|
|
elif self.mol_orbs is not None:
|
||
|
|
return len(self.mol_orbs)
|
||
|
|
else:
|
||
|
|
return ValueError('nbands might be calculated only if eigenvalues or mol_orbs had been read')
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def from_file(filepath):
|
||
|
|
file = open(filepath, 'r')
|
||
|
|
data = file.readlines()
|
||
|
|
file.close()
|
||
|
|
|
||
|
|
patterns = {'eigs': 'ORBITAL ENERGIES',
|
||
|
|
'mos': 'MOLECULAR ORBITALS'}
|
||
|
|
matches = regrep(filepath, patterns)
|
||
|
|
|
||
|
|
occs = []
|
||
|
|
eigs = []
|
||
|
|
for match in tqdm(matches['eigs'], desc='Eigenvalues', total=len(matches['eigs'])):
|
||
|
|
eigs_tmp = []
|
||
|
|
occs_tmp = []
|
||
|
|
i = match[1] + 4
|
||
|
|
while data[i] != '\n' and data[i] != '------------------\n':
|
||
|
|
line = data[i].split()
|
||
|
|
occs_tmp.append(float(line[1]))
|
||
|
|
eigs_tmp.append(float(line[3]))
|
||
|
|
i += 1
|
||
|
|
occs.append(occs_tmp)
|
||
|
|
eigs.append(eigs_tmp)
|
||
|
|
|
||
|
|
mos_arr = []
|
||
|
|
for match in tqdm(matches['mos'], desc='Molecular Orbitals', total=len(matches['mos'])):
|
||
|
|
df = pd.DataFrame()
|
||
|
|
first_columns_appended = None
|
||
|
|
last_batch_added = False
|
||
|
|
i = match[1] + 2
|
||
|
|
|
||
|
|
while data[i] != '\n' and data[i] != '------------------\n':
|
||
|
|
if re.match(r'\s*\w+\s+\w+\s+([-+]?\d*\.\d*\s+)+', data[i]) is not None:
|
||
|
|
last_batch_added = False
|
||
|
|
line = data[i].split()
|
||
|
|
if first_columns_appended is False:
|
||
|
|
atom_number = re.match(r'\d+', line[0])
|
||
|
|
mos_tmp[0].append(int(atom_number[0]))
|
||
|
|
atom_symbol = line[0][len(atom_number[0]):]
|
||
|
|
mos_tmp[1].append(atom_symbol)
|
||
|
|
orbital = line[1]
|
||
|
|
mos_tmp[2].append(orbital)
|
||
|
|
for j, value in enumerate(line[2:]):
|
||
|
|
mos_tmp[3 + j].append(float(value))
|
||
|
|
i += 1
|
||
|
|
elif first_columns_appended is True:
|
||
|
|
for j, value in enumerate(line[2:]):
|
||
|
|
mos_tmp[j].append(float(value))
|
||
|
|
i += 1
|
||
|
|
else:
|
||
|
|
pass
|
||
|
|
|
||
|
|
elif re.match(r'\s*(\d+\s+)+', data[i]) is not None:
|
||
|
|
line = data[i].split()
|
||
|
|
if first_columns_appended is False:
|
||
|
|
first_columns_appended = True
|
||
|
|
last_batch_added = True
|
||
|
|
df['atom_ids'] = mos_tmp[0][1:]
|
||
|
|
df['species'] = mos_tmp[1][1:]
|
||
|
|
df['orbital'] = mos_tmp[2][1:]
|
||
|
|
for j in range(3, len(mos_tmp)):
|
||
|
|
df[mos_tmp[j][0]] = mos_tmp[j][1:]
|
||
|
|
mos_tmp = [[] for _ in range(len(line))]
|
||
|
|
for j, n_mo in enumerate(line):
|
||
|
|
mos_tmp[j].append(int(n_mo))
|
||
|
|
i += 1
|
||
|
|
elif first_columns_appended is None:
|
||
|
|
last_batch_added = True
|
||
|
|
mos_tmp = [[] for j in range(len(line) + 3)]
|
||
|
|
mos_tmp[0].append('')
|
||
|
|
mos_tmp[1].append('')
|
||
|
|
mos_tmp[2].append('')
|
||
|
|
for j, n_mo in enumerate(line):
|
||
|
|
mos_tmp[3 + j].append(int(n_mo))
|
||
|
|
first_columns_appended = False
|
||
|
|
i += 1
|
||
|
|
elif first_columns_appended is True:
|
||
|
|
last_batch_added = True
|
||
|
|
for j in range(len(mos_tmp)):
|
||
|
|
df[mos_tmp[j][0]] = mos_tmp[j][1:]
|
||
|
|
mos_tmp = [[] for _ in range(len(line))]
|
||
|
|
for j, n_mo in enumerate(line):
|
||
|
|
mos_tmp[j].append(int(n_mo))
|
||
|
|
i += 1
|
||
|
|
else:
|
||
|
|
i += 1
|
||
|
|
|
||
|
|
if not last_batch_added:
|
||
|
|
# df = pd.concat([df, pd.DataFrame(mos_tmp)], axis=1)
|
||
|
|
for j in range(len(mos_tmp)):
|
||
|
|
df[mos_tmp[j][0]] = mos_tmp[j][1:]
|
||
|
|
|
||
|
|
mos_arr.append(df)
|
||
|
|
|
||
|
|
return SCFLog(np.array(eigs), np.array(occs), mos_arr)
|
||
|
|
|
||
|
|
|
||
|
|
class XyzTrajectory:
|
||
|
|
def __init__(self,
|
||
|
|
first_xyz: Xyz,
|
||
|
|
trajectory: NDArray[Shape['Nsteps, Natoms, 3'], Number],
|
||
|
|
energies_pot: NDArray[Shape['Nsteps'], Number]):
|
||
|
|
self.first_xyz = first_xyz
|
||
|
|
self.trajectory = trajectory
|
||
|
|
self.energies_pot = energies_pot
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def from_file(filepath):
|
||
|
|
first_xyz = Xyz.from_file(filepath)
|
||
|
|
|
||
|
|
trajectory = []
|
||
|
|
energies_pot = []
|
||
|
|
with open(filepath, 'rt') as file:
|
||
|
|
while True:
|
||
|
|
try:
|
||
|
|
natoms = int(file.readline().strip())
|
||
|
|
except:
|
||
|
|
break
|
||
|
|
line = file.readline()
|
||
|
|
energies_pot.append(float(line.split()[5]))
|
||
|
|
#energies_pot.append(float(line.split()[8].split('=')[1]))
|
||
|
|
|
||
|
|
coords = np.zeros((natoms, 3))
|
||
|
|
for i in range(natoms):
|
||
|
|
line = file.readline().split()
|
||
|
|
coords[i] = [float(j) for j in line[1:]]
|
||
|
|
trajectory.append(coords)
|
||
|
|
|
||
|
|
return XyzTrajectory(first_xyz, np.array(trajectory), np.array(energies_pot))
|