aton.api.pwx
Description
Tools to work with the pw.x module from Quantum ESPRESSO.
Index
Input and output reading
read_in() |
Read an input file as a Python dict |
read_out() |
Read an output file as a Python dict |
read_dir() |
Read the input and output from a directory and return a dict |
read_dirs() |
Read all inputs and outputs from all subfolders, and save to CSVs |
Input file manipulation
set_value() |
Replace the value of a specific parameter from an input file |
set_values() |
Replace the value of multiple specific parameters from an input file |
add_atom() |
Add an atom to a given input file |
resume() |
Update an input file with the atomic coordinates of a previous output file |
scf_from_relax() |
Create a scf.in from a previous relax calculation |
Data analysis
get_atom() |
Take the approximate position of an atom and return the full coordinates |
count_elements() |
Take the ATOMIC_POSITIONS and return a dict as {element: number} |
normalize_card() |
Take a matched card, and return it in a normalized format |
consts_from_cell_parameters() |
Get the lattice parameters from the CELL_PARAMETERS matrix |
to_cartesian() |
Convert coordinates from crystal lattice vectors to cartesian |
from_cartesian() |
Convert coordinates from cartesian to the base of lattice vectors |
Dicts with input file description
pw_namelists |
All possible NAMELISTS as keys, and the corresponding variables as values |
pw_cards |
All possible CARDs as keys, and the corresponding variables as values |
1""" 2# Description 3 4Tools to work with the [pw.x](https://www.quantum-espresso.org/Doc/INPUT_PW.html) module from [Quantum ESPRESSO](https://www.quantum-espresso.org/). 5 6 7# Index 8 9 10## Input and output reading 11 12| | | 13| --- | --- | 14| `read_in()` | Read an input file as a Python dict | 15| `read_out()` | Read an output file as a Python dict | 16| `read_dir()` | Read the input and output from a directory and return a dict | 17| `read_dirs()` | Read all inputs and outputs from all subfolders, and save to CSVs | 18 19 20## Input file manipulation 21 22| | | 23| --- | --- | 24| `set_value()` | Replace the value of a specific parameter from an input file | 25| `set_values()` | Replace the value of multiple specific parameters from an input file | 26| `add_atom()` | Add an atom to a given input file | 27| `resume()` | Update an input file with the atomic coordinates of a previous output file | 28| `scf_from_relax()` | Create a scf.in from a previous relax calculation | 29 30 31## Data analysis 32 33| | | 34| --- | --- | 35| `get_atom()` | Take the approximate position of an atom and return the full coordinates | 36| `count_elements()` | Take the ATOMIC_POSITIONS and return a dict as {element: number} | 37| `normalize_card()` | Take a matched card, and return it in a normalized format | 38| `consts_from_cell_parameters()` | Get the lattice parameters from the CELL_PARAMETERS matrix | 39| `to_cartesian()` | Convert coordinates from crystal lattice vectors to cartesian | 40| `from_cartesian()` | Convert coordinates from cartesian to the base of lattice vectors | 41 42 43## Dicts with input file description 44 45| | | 46| --- | --- | 47`pw_namelists` | All possible NAMELISTS as keys, and the corresponding variables as values | 48`pw_cards` | All possible CARDs as keys, and the corresponding variables as values | 49 50--- 51""" 52 53 54import pandas as pd 55import numpy as np 56import os 57from aton._version import __version__ 58import aton.file as file 59import aton.txt.find as find 60import aton.txt.edit as edit 61import aton.txt.extract as extract 62import periodictable 63from scipy.constants import physical_constants 64from copy import deepcopy 65 66 67# Handy conversion factors 68_BOHR_TO_ANGSTROM = physical_constants['Bohr radius'][0] * 1e10 69 70 71def read_in(filepath) -> dict: 72 """Reads a Quantum ESPRESSO input `filepath` and returns the values as a dict. 73 74 Dict keys are named after the corresponding variable. 75 CARDS are returned as lists, and contain the 76 title card + parameters in the first item. 77 """ 78 file_path = file.get(filepath) 79 data: dict = {} 80 # First get the values from the namelists 81 lines = find.lines(file_path, '=') 82 for line in lines: 83 line.strip() 84 var, value = line.split('=', 1) 85 var = var.strip() 86 value = value.strip() 87 if var.startswith('!'): 88 continue 89 try: 90 value_float = value.replace('d', 'e') 91 value_float = value_float.replace('D', 'e') 92 value_float = value_float.replace('E', 'e') 93 value_float = float(value_float) 94 value = value_float 95 if var in _pw_int_values: # Keep ints as int 96 value = int(value) 97 except ValueError: 98 pass # Then it is a string 99 data[var] = value 100 # Try to find all the cards. Card titles will be saved in the 0 position of each result. 101 for card in pw_cards.keys(): 102 card_lower = card.lower() 103 card_uncommented = rf'(?!\s*!\s*)({card}|{card_lower})' # Legacy regex 104 card_content = find.between(filepath=file_path, key1=card_uncommented, key2=_all_cards_regex, include_keys=True, match=-1, regex=True) 105 if not card_content: 106 continue 107 # If found, clean and normalise the card's content 108 card_content = card_content.splitlines() 109 card_content = normalize_card(card_content) 110 # Ignore OCCUPATIONS card if not required to avoid saving nonsense data 111 if card_lower == 'occupations': 112 if data.get('occupations') is None or 'from_input' not in data.get('occupations'): 113 continue 114 data[card] = card_content 115 # If there are CELL_PARAMETERS, check if we can extract the alat to celldm(1). 116 if 'CELL_PARAMETERS' in data.keys(): 117 if 'alat' in data['CELL_PARAMETERS'][0]: 118 alat = extract.number(data['CELL_PARAMETERS'][0]) 119 if alat: # This overwrites any possible celldm(1) previously defined! 120 data['celldm(1)'] = alat 121 data['CELL_PARAMETERS'][0] = 'CELL_PARAMETERS alat' 122 return data 123 124 125def read_out(filepath) -> dict: 126 """Reads a Quantum ESPRESSO output `filepath`, returns a dict with the output keys. 127 128 The output keys are: 129 `'Energy'` (Ry), `'Total force'` (float), `'Total SCF correction'` (float), 130 `'Runtime'` (str), `'Success'` (bool), `'JOB DONE'` (bool), 131 `'BFGS converged'` (bool), `'BFGS failed'` (bool), 132 `'Maxiter reached'` (bool), `'Error'` (str), `'Efermi'` (eV), 133 `'Alat'` (bohr), `'Volume'` (a.u.^3), `'Volume AA'` (AA^3), 134 `'Density'` (g/cm^3), `'Pressure'` (kbar), 135 `'CELL_PARAMETERS out'` (list of str), `'ATOMIC_POSITIONS out'` (list of str), 136 `'cosBC out'`, `'cosAC out'`, `'cosAB out'` (float), 137 `'A out'`, `'B out'`, `'C out'` (angstrom), 138 """ 139 file_path = file.get(filepath) 140 141 energy_key = '! total energy' 142 force_key = 'Total force' 143 scf_key = 'Total SCF correction' 144 pressure_key = 'P=' 145 efermi_key = 'the Fermi energy is' 146 time_key = 'PWSCF' 147 time_stop_key = 'CPU' 148 job_done_key = 'JOB DONE.' 149 bfgs_converged_key = 'bfgs converged' 150 bfgs_failed_key = 'bfgs failed' 151 maxiter_reached_key = r'(Maximum number of iterations reached|maximum number of steps has been reached)' 152 error_key = 'Error in routine' 153 error_failed_line = 'pw.x: Failed' 154 cell_parameters_key = 'CELL_PARAMETERS' 155 atomic_positions_key = 'ATOMIC_POSITIONS' 156 157 energy_line = find.lines(file_path, energy_key, -1) 158 force_line = find.lines(file_path, force_key, -1) 159 pressure_line = find.lines(file_path, pressure_key, -1) 160 efermi_line = find.lines(file_path, efermi_key, -1) 161 time_line = find.lines(file_path, time_key, -1) 162 job_done_line = find.lines(file_path, job_done_key, -1) 163 bfgs_converged_line = find.lines(file_path, bfgs_converged_key, -1) 164 bfgs_failed_line = find.lines(file_path, bfgs_failed_key, -1) 165 maxiter_reached_line = find.lines(file_path, maxiter_reached_key, -1, regex=True) 166 error_line = find.lines(file_path, error_key, -1, 1, True) 167 error_failed_line = find.lines(file_path, error_failed_line, -1) 168 169 energy: float = None 170 force: float = None 171 scf: float = None 172 pressure: float = None 173 efermi: float = None 174 time: str = None 175 job_done: bool = False 176 bfgs_converged: bool = False 177 bfgs_failed: bool = False 178 maxiter_reached: bool = False 179 error: str = '' 180 success: bool = False 181 182 if energy_line: 183 energy = extract.number(energy_line[0], energy_key) 184 if force_line: 185 force = extract.number(force_line[0], force_key) 186 scf = extract.number(force_line[0], scf_key) 187 if pressure_line: 188 pressure = extract.number(pressure_line[0], pressure_key) 189 if efermi_line: 190 efermi = extract.number(efermi_line[0], efermi_key) 191 if time_line: 192 time = extract.string(time_line[0], time_key, time_stop_key) 193 if job_done_line: 194 job_done = True 195 if bfgs_converged_line: 196 bfgs_converged = True 197 if bfgs_failed_line: 198 bfgs_failed = True 199 if maxiter_reached_line: 200 maxiter_reached = True 201 if error_line: 202 error = error_line[1].strip() 203 elif error_failed_line: 204 error = error_failed_line[0].strip() 205 206 # Was the calculation successful? 207 if job_done and not bfgs_failed and not maxiter_reached and not error: 208 success = True 209 210 # CELL_PARAMETERS and ATOMIC_POSITIONS 211 alat = None 212 volume = None 213 volume_AA = None 214 density = None 215 cell_parameters = None 216 atomic_positions = None 217 cell_parameters_raw = [] 218 atomic_positions_raw = [] 219 coordinates_raw = find.between(file_path, 'Begin final coordinates', 'End final coordinates', False, -1, False) 220 if coordinates_raw: 221 coordinates_raw = coordinates_raw.splitlines() 222 append_cell = False 223 append_positions = False 224 for line in coordinates_raw: 225 line = line.strip() 226 if cell_parameters_key in line: 227 append_cell = True 228 append_positions = False 229 elif atomic_positions_key in line: 230 append_cell = False 231 append_positions = True 232 elif 'volume' in line: 233 volume = extract.number(line, 'volume') 234 volume_AA = volume * (_BOHR_TO_ANGSTROM ** 3) 235 elif 'density' in line: 236 density = extract.number(line, 'density') 237 if line == '' or line.startswith('!'): 238 continue 239 if append_cell: 240 cell_parameters_raw.append(line) 241 elif append_positions: 242 atomic_positions_raw.append(line) 243 atomic_positions = normalize_card(atomic_positions_raw) 244 if cell_parameters_raw: 245 cell_parameters = normalize_card(cell_parameters_raw) 246 if 'alat' in cell_parameters[0]: 247 alat = extract.number(cell_parameters[0], 'alat') 248 # If not found, at least try to keep the latest iteration 249 else: 250 _nat_raw = find.lines(file_path, 'number of atoms/cell', 1) 251 if _nat_raw: 252 _nat = int(extract.number(_nat_raw[0], 'number of atoms/cell')) 253 atomic_positions_raw = find.lines(file_path, atomic_positions_key, matches=-1, additional=_nat, split=True) 254 if atomic_positions_raw: 255 atomic_positions = normalize_card(atomic_positions_raw) 256 cell_parameters_raw = find.lines(file_path, cell_parameters_key, matches=-1, additional=3, split=True) 257 if cell_parameters_raw: 258 cell_parameters = normalize_card(cell_parameters_raw) 259 if 'alat' in cell_parameters[0]: 260 alat = extract.number(cell_parameters[0], 'alat') 261 262 output = { 263 'Energy' : energy, 264 'Total force' : force, 265 'Total SCF correction' : scf, 266 'Runtime' : time, 267 'JOB DONE' : job_done, 268 'BFGS converged' : bfgs_converged, 269 'BFGS failed' : bfgs_failed, 270 'Maxiter reached' : maxiter_reached, 271 'Error' : error, 272 'Success' : success, 273 'CELL_PARAMETERS out' : cell_parameters, 274 'ATOMIC_POSITIONS out' : atomic_positions, 275 'Alat' : alat, 276 'Volume' : volume, 277 'Volume AA' : volume_AA, 278 'Density' : density, 279 'Pressure' : pressure, 280 'Efermi' : efermi, 281 } 282 283 # Extract lattice parameters A, B, C, celldm(i) and cosines 284 if alat: 285 consts = consts_from_cell_parameters(cell_parameters, alat) 286 else: 287 consts = consts_from_cell_parameters(cell_parameters) 288 consts_out = {key + ' out': value for key, value in consts.items()} 289 output.update(consts_out) 290 291 return output 292 293 294def read_dir( 295 folder=None, 296 in_str:str='.in', 297 out_str:str='.out' 298 ) -> dict: 299 """Takes a `folder` from a QE pw.x calculation, returns a dict with input and output values. 300 301 Input and output files are determined automatically, 302 but must be specified with `in_str` and `out_str` 303 if more than one file ends with `.in` or `.out`. 304 """ 305 folder = file.get_dir(folder) 306 input_file = file.get(folder, in_str) 307 output_file = file.get(folder, out_str) 308 if not input_file and not output_file: 309 return None 310 if input_file: 311 dict_in = read_in(input_file) 312 if not output_file: 313 return dict_in 314 if output_file: 315 dict_out = read_out(output_file) 316 if not input_file: 317 return dict_out 318 # Merge both dictionaries 319 merged_dict = {**dict_in, **dict_out} 320 return merged_dict 321 322 323def read_dirs( 324 folder, 325 in_str:str='.in', 326 out_str:str='.out', 327 include=None, 328 exclude=None, 329 separator='_', 330 type_index=0, 331 id_index=1 332 ) -> None: 333 """Reads recursively QE pw.x calculations from all the subfolders inside the given `directory`. 334 335 Results are saved to CSV files inside the current directory. 336 Input and output files are determined automatically, but must be specified with 337 `in_str` and `out_str` if more than one file ends with `.in` or `.out`. 338 339 Only folders containing all strings in the `include` list will be included. 340 Folders containing any string from the `exclude` list will be ignored. 341 342 To properly group the calculations per type, saving separated CSVs for each calculation type, 343 you can modify `separator` ('_' by default), `type_index` (0) and `id_index` (1). 344 With these default values, a subfolder named './CalculationType_CalculationID_AdditionalText/' 345 will be interpreted as follows: 346 - Calculation type: 'CalculationType' (The output CSV will be named after this) 347 - CalculationID: 'CalculationID' (Stored in the 'ID' column of the resulting dataframe) 348 349 If the detection fails, the subfolder name will be used for the CSV file. 350 """ 351 print(f'Reading all Quantum ESPRESSO calculations from {folder} ...') 352 folders = file.get_list(folder, include=include, exclude=exclude, only_folders=True) 353 if not folders: 354 raise FileNotFoundError('The directory is empty!') 355 # Separate calculations by their title in an array 356 calc_types = [] 357 for f in folders: 358 folder_name = os.path.basename(f) 359 try: 360 calc_name = folder_name.split(separator)[type_index] 361 except: 362 calc_name = folder_name 363 if not calc_name in calc_types: 364 calc_types.append(calc_name) 365 len_folders = len(folders) 366 total_success_counter = 0 367 for calc in calc_types: 368 len_calcs = 0 369 success_counter = 0 370 results = pd.DataFrame() 371 for f in folders: 372 if not calc in f: 373 continue 374 len_calcs += 1 375 folder_name = os.path.basename(f) 376 try: 377 calc_id = folder_name.split(separator)[id_index] 378 except: 379 calc_id = folder_name 380 dictionary = read_dir(f, in_str, out_str) 381 #df = pd.DataFrame.from_dict(dictionary) 382 df = pd.DataFrame({k: [v] for k, v in dictionary.items()}) if dictionary else None 383 if df is None: 384 continue 385 # Join input and output in the same dataframe 386 df.insert(0, 'ID', calc_id) 387 df = df.dropna(axis=1, how='all') 388 results = pd.concat([results, df], axis=0, ignore_index=True) 389 if df['Success'][0]: 390 success_counter += 1 391 total_success_counter += 1 392 results.to_csv(os.path.join(folder, calc+'.csv')) 393 print(f'Saved to CSV: {calc} ({success_counter} successful calculations out of {len_calcs})') 394 print(f'Total successful calculations: {total_success_counter} out of {len_folders}') 395 396 397def set_value( 398 filepath, 399 key:str, 400 value, 401 indent:str=' ', 402 ) -> None: 403 """Replace the `value` of a `key` parameter in an input `filepath`. 404 405 Delete parameters with `value=''`. 406 Remember to include the single quotes `'` on values that use them. 407 408 Updating 'ATOMIC_POSITIONS' updates 'nat' automatically, 409 and updating 'ATOMIC_SPECIES' updates 'ntyp'. 410 411 Optionally change indentation with `indent`, 2 spaces by default. 412 """ 413 key = key.strip() 414 file_path = file.get(filepath) 415 input_old = read_in(file_path) 416 # Present keys uppercase 417 present_keys_upper = [] 418 for present_key in input_old.keys(): 419 present_keys_upper.append(present_key.upper()) 420 # All namelist values 421 pw_values = [] 422 for namelist_values in pw_namelists.values(): 423 pw_values.extend(namelist_values) 424 # All cards 425 pw_cards_upper = [] # Don't forget about these! 426 for card in pw_cards.keys(): 427 pw_cards_upper.append(card.upper()) 428 # Check if it's a namelist 429 if key in pw_values: 430 if key in input_old.keys(): 431 _update_value(filepath, key, value, indent) 432 else: # Write the value from scratch 433 _add_value(filepath, key, value, indent) 434 # Check if it's a card 435 elif key.upper() in pw_cards_upper: 436 if key.upper() in present_keys_upper: 437 _update_card(filepath, key, value, indent) 438 else: 439 _add_card(filepath, key, value, indent) 440 else: 441 raise ValueError(f'Unrecognised key: {key}') 442 return None 443 444 445def set_values( 446 filepath, 447 update:dict, 448 indent:str=' ', 449 ) -> None: 450 """Replace multiple values of an input `filepath` with an `update` dict. 451 452 Calls `set_value` recursively. If `update` is empty, nothig will happen. 453 """ 454 if not update: 455 return None 456 for key, value in update.items(): 457 set_value(filepath=filepath, key=key, value=value, indent=indent) 458 return None 459 460 461def _update_value( 462 filepath, 463 key:str, 464 value, 465 indent:str=' ' 466 ) -> None: 467 """Update the `value` of an existing `key` from a namelist. Called by `set_value()`. 468 469 Optionally change indentation with `indent`, 2 spaces by default. 470 """ 471 key = key.strip() 472 key_uncommented = key 473 key_uncommented = key_uncommented.replace('(', r'\(') 474 key_uncommented = key_uncommented.replace(')', r'\)') 475 key_uncommented = rf'^\s*\b({key_uncommented})\s*=' 476 #key_uncommented = rf'(?!\s*!\s*){key_uncommented}\s*=' # Legacy regex 477 # Convert to int if necessary 478 if key in _pw_int_values: 479 value = int(value) 480 line = f'{indent}{key} = {value}' 481 if value == '': 482 line = '' 483 edit.replace_line(filepath=filepath, key=key_uncommented, text=line, replacements=1, regex=True) 484 _update_other_values(filepath, key, value, indent) 485 return None 486 487 488def _add_value( 489 filepath, 490 key:str, 491 value, 492 indent:str=' ' 493 ) -> None: 494 """Adds a `key` = `value` to a NAMELIST. Called by `set_value()`. 495 496 Optionally change indentation with `indent`, 2 spaces by default. 497 """ 498 if value == '': # No need to delete an unexisting value! 499 return None 500 if key in _pw_int_values: 501 value = int(value) 502 key = key.strip() 503 value = str(value).strip() 504 # Which namelist? 505 parent_namelist = None 506 for namelist in pw_namelists.keys(): 507 if key in pw_namelists[namelist]: 508 parent_namelist = namelist 509 break 510 if not parent_namelist: 511 raise ValueError(f"Key is not valid, '{key}' is not from any NAMELIST!") 512 # Add the parent namelist if it does not exist, then add the value 513 _add_namelist(filepath, parent_namelist) 514 # Convert to int if necessary 515 if value in _pw_int_values: 516 value = int(value) 517 line = f'{indent}{key} = {value}' 518 parent_namelist_regex = rf'(?!\s*!\s*)({parent_namelist}|{parent_namelist.lower()})' 519 edit.insert_under(filepath=filepath, key=parent_namelist_regex, text=line, insertions=1, regex=True) 520 # Update other values if necessary 521 _update_other_values(filepath, key, value, indent) 522 return None 523 524 525def _add_namelist( 526 filepath, 527 namelist:str, 528 ) -> None: 529 """Adds a `namelist` to the `filepath` if not present. Called by `add_value()` if needed.""" 530 namelists = list(pw_namelists.keys()) 531 namelist = namelist.upper().strip() 532 if not namelist in namelists: 533 raise ValueError(f'{namelist} is not a valid namelist! Valid namelists are:\n{namelists}') 534 # Is the namelist already there? 535 namelist_regex = rf'(?!\s*!\s*)({namelist}|{namelist.lower()})' 536 is_present = find.lines(filepath=filepath, key=namelist_regex, regex=True) 537 if is_present: 538 return None 539 # Find where to insert the namelist, from last to first. 540 # We might have to insert it on top of the first CARD found. 541 next_namelist = _all_cards_regex 542 namelists.reverse() 543 # Get the present namelists, plus the target one 544 present_namelists = [] 545 for section in namelists: 546 is_section_present = find.lines(filepath=filepath, key=rf'^\s*\b({section})', regex=True) 547 #is_section_present = find.lines(filepath=filepath, key=rf'(?!\s*!\s*)({section})', regex=True) # Legacy regex 548 if is_section_present or section.upper() == namelist.upper(): 549 present_namelists.append(section) 550 # Get the very next section after the desired one 551 for section in present_namelists: 552 if section == namelist: 553 break 554 next_namelist = rf'^\s*\b({section})(?!\s*=)' 555 #next_namelist = rf'(?!\s*!\s*)({section})(?!\s*=)' 556 # Insert the target namelist on top of it! 557 edit.insert_under(filepath, next_namelist, f'{namelist}\n/\n', 1, -1, True) 558 return None 559 560 561def _update_card( 562 filepath, 563 key:str, 564 value:list, 565 indent:str=' ', 566 ) -> None: 567 """Updates the `value` of a `key` CARD, present in `filepath`. Called by `set_value()`. 568 569 Optionally change indentation with `indent`, 2 spaces by default. 570 """ 571 if value == '': 572 return None 573 value = normalize_card(value, indent) 574 # Replace the CARD value up to the next CARD found 575 key = key.upper().strip() 576 key_uncommented = rf'(?!\s*!\s*)({key}|{key.lower()})' 577 lines = '\n'.join(value) 578 edit.replace_between(filepath=filepath, key1=key_uncommented, key2=_all_cards_regex, text=lines, delete_keys=False, regex=True) 579 # We added the CARD below the previous CARD title, so we remove the previous CARD title 580 edit.replace_line(filepath, key_uncommented, '', 1, 0, 0, True) 581 # We might have to update other values, such as nat or ntyp 582 _update_other_values(filepath, key, value, indent) 583 return None 584 585 586def _add_card( 587 filepath, 588 key:str, 589 value:list, 590 indent:str=' ' 591 ) -> None: 592 """Adds a non-present `key` CARD with a given `value` to the `filepath`. Called by `set_value()`. 593 594 Optionally change indentation with `indent`, 2 spaces by default. 595 """ 596 if value == '': 597 return None 598 value = normalize_card(value, indent) 599 insert_value = value 600 if isinstance(value, list): 601 insert_value = '\n'.join(value) 602 edit.insert_at(filepath, insert_value, -1) 603 _update_other_values(filepath, key, value, indent) 604 return None 605 606 607def _update_other_values( 608 filepath, 609 key:str, 610 value, 611 indent:str=' ' 612 ) -> None: 613 """Updates values that depend on the input value, eg. updates 'nat' when updating ATOMIC_POSITIONS. 614 615 Optionally change indentation with `indent`, 2 spaces by default. 616 """ 617 file_path = file.get(filepath) 618 old_values = read_in(file_path) 619 # Key in upper cases for CARD values 620 key = key.strip().upper() 621 # CELL_PARAMETERS ? 622 if key in ['CELL_PARAMETERS', 'CELL_PARAMETERS out']: 623 if 'angstrom' in value[0] or 'bohr' in value[0]: 624 edit.replace_line(file_path, r'^\s*\b(celldm\(\d\))\s*=', '', 0, 0, 0, True) 625 edit.replace_line(file_path, r'^\s*\b([ABC])\s*=', '', 0, 0, 0, True) 626 edit.replace_line(file_path, r'^\s*\b(cos[AB][BC])\s*=', '', 0, 0, 0, True) 627 #edit.replace_line(file_path, r'(?!\s*!\s*)celldm\(\d\)\s*=', '', 0, 0, 0, True) # Legacy regex 628 #edit.replace_line(file_path, r'(?!\s*!\s*)[ABC]\s*=', '', 0, 0, 0, True) # Legacy regex 629 #edit.replace_line(file_path, r'(?!\s*!\s*)cos[AB][BC]\s*=', '', 0, 0, 0, True) # Legacy regex 630 elif 'alat' in value[0]: 631 alat = extract.number(value[0]) 632 if alat: 633 edit.replace_line(file_path, r'^\s*\b(CELL_PARAMETERS)', 'CELL_PARAMETERS alat', -1, 0, 0, True) 634 edit.replace_line(file_path, r'^\s*\b(celldm\(\d\))\s*=', f'celldm(1) = {alat}', 1, 0, 0, True) 635 #edit.replace_line(file_path, r'(?!\s*!\s*)CELL_PARAMETERS', 'CELL_PARAMETERS alat', -1, 0, 0, True) # Legacy regex 636 #edit.replace_line(file_path, r'(?!\s*!\s*)celldm\(\d\)\s*=', f'celldm(1) = {alat}', 1, 0, 0, True) # Legacy regex 637 return None 638 # ATOMIC_SPECIES ? 639 elif key == 'ATOMIC_SPECIES': 640 old_ntyp = old_values['ntyp'] 641 elements_found = [] 642 for line in value[1:]: 643 element = extract.element(line) 644 if element: 645 if not element in elements_found: 646 elements_found.append(element) 647 new_ntyp = len(elements_found) 648 if old_ntyp != new_ntyp: 649 set_value(filepath, 'ntyp', new_ntyp, indent) 650 return None 651 # ATOMIC_POSITIONS ? 652 elif key in ['ATOMIC_POSITIONS', 'ATOMIC_POSITIONS out']: 653 new_nat = len(value) - 1 654 if old_values['nat'] != new_nat: 655 set_value(filepath, 'nat', new_nat, indent) 656 return None 657 # Key in lower case for NAMELIST values 658 key = key.lower() 659 # Lattice params Angstroms? 660 if key in ['a', 'b', 'c', 'cosab', 'cosac', 'cosbc']: 661 edit.replace_line(file_path, r'^\s*\b(celldm\(\d\))\s*=', '', 0, 0, 0, True) 662 edit.replace_line(file_path, r'^\s*\b(CELL_PARAMETERS)', 'CELL_PARAMETERS alat', -1, 0, 0, True) 663 #edit.replace_line(file_path, r'(?!\s*!\s*)celldm\(\d\)\s*=', '', 0, 0, 0, True) # Legacy regex 664 #edit.replace_line(file_path, r'(?!\s*!\s*)CELL_PARAMETERS', 'CELL_PARAMETERS alat', -1, 0, 0, True) # Legacy regex 665 return None 666 # Lattice params Bohrs ? 667 elif 'celldm' in key: 668 edit.replace_line(file_path, r'^\s*\b([ABC])\s*=', '', 0, 0, 0, True) 669 edit.replace_line(file_path, r'^\s*\b(cos[AB][BC])\s*=', '', 0, 0, 0, True) 670 edit.replace_line(file_path, r'^\s*\b(CELL_PARAMETERS)', 'CELL_PARAMETERS alat', -1, 0, 0, True) 671 #edit.replace_line(file_path, r'(?!\s*!\s*)[ABC]\s*=', '', 0, 0, 0, True) # Legacy regex 672 #edit.replace_line(file_path, r'(?!\s*!\s*)cos[AB][BC]\s*=', '', 0, 0, 0, True) # Legacy regex 673 #edit.replace_line(file_path, r'(?!\s*!\s*)CELL_PARAMETERS', 'CELL_PARAMETERS alat', -1, 0, 0, True) # Legacy regex 674 return None 675 return None 676 677 678def add_atom(filepath, position, indent=' ') -> None: 679 """Adds an atom in a given input `filepath` at a specified `position`. 680 681 Position must be a string or a list, as follows: 682 `"specie:str float float float"` or `[specie:str, float, float, float]`. 683 684 This method updates automatically other related values, 685 such as 'ntyp' when updating ATOMIC_SPECIES, etc. 686 687 Optionally change indentation with `indent`, 2 spaces by default. 688 """ 689 new_atom = position 690 if isinstance(position, list): 691 if not len(position) == 4 or not isinstance(position[0], str): 692 raise ValueError('If your atomic position is a list, it must contain the atom type and the three coords, as in [str, str/float, str/float, str/float]') 693 new_atom = ' '.join(str(x) for x in position) 694 elif not isinstance(position, str): 695 raise ValueError(f'The specified position must be a list of size 4 (atom type and three coordinates) or an equivalent string! Your position was:\n{coords}') 696 # Let's check that our new_atom makes sense 697 atom = extract.element(new_atom) 698 coords = extract.coords(new_atom) 699 if not atom: 700 raise ValueError(f'The specified position does not contain an atom at the beginning! Your position was:\n{position}') 701 if len(coords) < 3: 702 raise ValueError(f'Your position has len(coordinates) < 3, please check it.\nYour position was: {position}\nCoordinates detected: {coords}') 703 if len(coords) > 3: 704 coords = coords[:3] 705 new_atom = f'{atom} {coords[0]} {coords[1]} {coords[2]}' 706 # Get the values from the file 707 values = read_in(filepath) 708 atomic_positions = values['ATOMIC_POSITIONS'] 709 atomic_positions.append(new_atom) 710 # Update ATOMIC_POSITIONS. nat should be updated automatically. 711 set_value(filepath=filepath, key='ATOMIC_POSITIONS', value=atomic_positions, indent=indent) 712 # We might have to update ATOMIC_SPECIES! 713 atomic_species = values['ATOMIC_SPECIES'] 714 is_atom_missing = True 715 for specie in atomic_species[1:]: 716 if atom == extract.element(specie): 717 is_atom_missing = False 718 break 719 if is_atom_missing: # Update ATOMIC_SPECIES. ntyp should be updated automatically. 720 mass = periodictable.elements.symbol(atom).mass 721 atomic_species.append(f'{atom} {mass} {atom}.upf') 722 set_value(filepath=filepath, key='ATOMIC_SPECIES', value=atomic_species, indent=indent) 723 return None 724 725 726def get_atom( 727 filepath:str, 728 position:list, 729 precision:int=3, 730 return_anyway:bool=False, 731 ) -> str: 732 """Takes the approximate `position` of an atom, and returns the full line from the input `filepath`. 733 734 It compares the atomic positions rounded up to the specified `precision` decimals. 735 If `return_anyway = True`, ignores errors and returns an empty string. 736 """ 737 # Check that the coordinates are valid 738 if isinstance(position, str): 739 coordinates = extract.coords(position) 740 elif isinstance(position, list): 741 coordinates = position 742 if len(position) == 1 and isinstance(position[0], str): # In case someone like me introduces ['x, y, z'] 743 coordinates = extract.coords(position[0]) 744 else: 745 if return_anyway: 746 return '' 747 raise ValueError(f'The atomic position must be a list or a string! Yours was:\n{position}\nDetected coordinates:\n{coordinates}') 748 if len(coordinates) < 3: 749 if return_anyway: 750 return '' 751 raise ValueError(f'Atomic position has less that 3 coordinates! Yours had len={len(coordinates)}:\n{position}\nDetected coordinates:\n{coordinates}') 752 if len(coordinates) > 3: 753 coordinates = coordinates[:3] 754 # Round the input position up to the specified precision 755 coordinates_rounded = [] 756 for coordinate in coordinates: 757 coordinates_rounded.append(round(coordinate, precision)) 758 # Compare the rounded coordinates with the atomic positions 759 content = read_in(filepath) 760 if not 'ATOMIC_POSITIONS' in content.keys(): 761 if return_anyway: 762 return '' 763 raise ValueError(f'ATOMIC_POSITIONS missing in {filepath}') 764 atomic_positions = content['ATOMIC_POSITIONS'][1:] # Remove header 765 matched_pos = None 766 matched_index = None 767 for i, atomic_position in enumerate(atomic_positions): 768 coords = extract.coords(atomic_position) 769 coords_rounded = [] 770 for pos in coords: 771 coords_rounded.append(round(pos, precision)) 772 if coordinates_rounded == coords_rounded: 773 if matched_pos: # There was a previous match! 774 if return_anyway: 775 return '' 776 raise ValueError(f'More than one matching position found! Try again with more precision.\nSearched coordinates: {coordinates_rounded}') 777 matched_pos = atomic_position 778 matched_index = i 779 if not matched_pos: 780 if return_anyway: 781 return '' 782 raise ValueError(f'No matching position found! Try again with a less tight precision parameter.\nSearched coordinates: {coordinates_rounded}') 783 # We must get the literal line, not the normalized one! 784 atomic_positions_uncommented = rf'^\s*\b(ATOMIC_POSITIONS|atomic_positions)' 785 #atomic_positions_uncommented = rf'(?!\s*!\s*)(ATOMIC_POSITIONS|atomic_positions)' # Legacy regex 786 atomic_positions_lines = find.between(filepath=filepath, key1=atomic_positions_uncommented, key2=_all_cards_regex, include_keys=False, match=-1, regex=True) 787 # Remove commented or empty lines 788 atomic_positions_lines = atomic_positions_lines.splitlines() 789 atomic_positions_cleaned = [] 790 for line in atomic_positions_lines: 791 if line == '' or line.startswith('!') or line.startswith('#'): 792 continue 793 atomic_positions_cleaned.append(line) 794 matched_line = atomic_positions_cleaned[matched_index] 795 return matched_line.strip() 796 797 798def normalize_card(card:list, indent:str='') -> list: 799 """Take a matched card, and return it in a normalised format. 800 801 Optionally change indentation with `indent`, 0 spaces by default. 802 """ 803 # Make sure it is a list! 804 if isinstance(card, str): 805 card = card.split('\n') 806 if not isinstance(card, list): 807 raise ValueError(f"Card must be a string or a list of strings! Yours was:\n{card}") 808 # Keep the header 809 cleaned_content = [card[0].strip()] 810 for line in card[1:]: 811 line = line.strip() 812 if line == '' or line.startswith('!') or line.startswith('#'): 813 continue 814 elif any(key in line.upper() for key in pw_cards.keys()): 815 break 816 items = line.split() 817 cleaned_line = f'{indent}{items[0].strip()}' # Add the specified intent at the start of the line 818 for item in items[1:]: 819 item = item.strip() 820 cleaned_line = cleaned_line + ' ' + item # Three spaces between elements 821 cleaned_content.append(cleaned_line) 822 # Perform extra normalization for some CARDs 823 if any(key in cleaned_content[0] for key in ['cell_parameters', 'CELL_PARAMETERS']): 824 cleaned_content = _normalize_cell_parameters(cleaned_content, indent) 825 elif any(key in cleaned_content[0] for key in ['atomic_positions', 'ATOMIC_POSITIONS']): 826 cleaned_content = _normalize_atomic_positions(cleaned_content, indent) 827 elif any(key in cleaned_content[0] for key in ['atomic_species', 'ATOMIC_SPECIES']): 828 cleaned_content = _normalize_atomic_species(cleaned_content, indent) 829 elif any(key in cleaned_content[0] for key in ['k_points', 'K_POINTS']): 830 cleaned_content = _normalize_k_points(cleaned_content) 831 return cleaned_content 832 833 834def _normalize_cell_parameters(card, indent:str='') -> list: 835 """Performs extra formatting to a previously cleaned CELL_PARAMETERS `card`. 836 837 Optionally change indentation with `indent`, 0 spaces by default. 838 """ 839 if card == None: 840 return None 841 cell_parameters = [card[0].strip()] 842 for line in card[1:]: 843 coords = extract.coords(line) 844 if len(coords) < 3: 845 raise ValueError(f'Each CELL_PARAMETER must have three coordinates! Yours was:\n{card}\nDetected coordinates were:\n{coords}') 846 if len(coords) > 3: # This should never happen but who knows... 847 coords = coords[:3] 848 new_line = f"{indent}{coords[0]:.15f} {coords[1]:.15f} {coords[2]:.15f}" 849 cell_parameters.append(new_line) 850 # Normalise the header 851 if 'angstrom' in cell_parameters[0]: 852 cell_parameters[0] = 'CELL_PARAMETERS angstrom' 853 elif 'bohr' in cell_parameters[0]: 854 cell_parameters[0] = 'CELL_PARAMETERS bohr' 855 elif 'alat' in cell_parameters[0]: 856 alat = extract.number(cell_parameters[0], 'alat') 857 if alat: 858 cell_parameters[0] = f'CELL_PARAMETERS alat= {alat}' 859 else: 860 cell_parameters[0] = 'CELL_PARAMETERS alat' 861 else: 862 raise ValueError(f'CELL_PARAMETERS must contain angstrom, bohr or alat! Yours was:\n{cell_parameters}') 863 return cell_parameters 864 865 866def _normalize_atomic_positions(card, indent:str='') -> list: 867 """Performs extra formatting to a previously cleaned ATOMIC_POSITIONS `card`. 868 869 Optionally change indentation with `indent`, 2 spaces by default. 870 """ 871 if card == None: 872 return None 873 atomic_positions = [card[0].strip()] 874 for line in card[1:]: 875 line = line.strip() 876 atom = extract.element(line) 877 if not atom: 878 raise ValueError(f'Atoms must be defined as the atom (H, He, Na...) or the isotope (H2, He4...)! Yours was:\n{line}') 879 if len(atom) == 1 and len(indent) > 0: # Align the line 880 atom = indent + ' ' + atom 881 else: 882 atom = indent + atom 883 coords = extract.coords(line) 884 if len(coords) < 3: 885 raise ValueError(f'Each ATOMIC_POSITION must have at least three coordinates! Yours contained the line:\n{line}\nDetected coordinates were:\n{coords}') 886 if len(coords) > 6: # Including optional parameters 887 coords = coords[:6] 888 new_line = f"{atom}" 889 for coord in coords: 890 new_line = f"{new_line} {coord:.15f}d0" # Double float precission 891 atomic_positions.append(new_line) 892 if 'alat' in atomic_positions[0]: 893 atomic_positions[0] = 'ATOMIC_POSITIONS alat' 894 elif 'bohr' in atomic_positions[0]: 895 atomic_positions[0] = 'ATOMIC_POSITIONS bohr' 896 elif 'angstrom' in atomic_positions[0]: 897 atomic_positions[0] = 'ATOMIC_POSITIONS angstrom' 898 elif 'crystal_sg' in atomic_positions[0]: 899 atomic_positions[0] = 'ATOMIC_POSITIONS crystal_sg' 900 elif 'crystal' in atomic_positions[0]: 901 atomic_positions[0] = 'ATOMIC_POSITIONS crystal' 902 else: 903 raise ValueError(f"ATOMIC_POSITIONS[0] must contain alat, bohr, angstrom, crystal or crystal_sg. Yours was:\n{atomic_positions[0]}") 904 return atomic_positions 905 906 907def _normalize_atomic_species(card, indent:str='') -> list: 908 """Performs extra formatting to a previously cleaned ATOMIC_SPECIES `card`. 909 910 Optionally change indentation with `indent`, 2 spaces by default. 911 """ 912 if card == None: 913 return None 914 atomic_species = [card[0].strip()] 915 for line in card[1:]: 916 line = line.strip() 917 atom = extract.element(line) 918 if not atom: 919 raise ValueError(f'Atom must be specified at the beginning! Your line was:\n{line}') 920 mass_list = extract.coords(line) 921 if len(mass_list) == 1: 922 mass = mass_list[0] 923 else: # Is the mass missing? 924 raise ValueError(f'Mass is not properly specified: {line}') 925 # Get the pseudo in the third position 926 line_split = line.split() 927 if len(line_split) < 3: 928 raise ValueError(f'Does the ATOMIC_SPECIES contain the pseudopotential? Your line was:\n{line}') 929 pseudopotential = line_split[2] 930 full_line = f"{indent}{atom} {mass} {pseudopotential}" 931 atomic_species.append(full_line) 932 return atomic_species 933 934 935def _normalize_k_points(card, indent:str='') -> list: 936 """Performs extra formatting to a previously cleaned K_POINTS `card`. 937 938 Optionally change indentation with `indent`, 2 spaces by default. 939 """ 940 if card == None: 941 return None 942 k_points = [card[0].strip()] 943 # Find the biggest str to align the columns to the left 944 longest_str = 0 945 for line in card[1:]: 946 points = line.split() 947 for point in points: 948 point.strip() 949 if len(point) > longest_str: 950 longest_str = len(point) 951 # Format the points 952 for line in card[1:]: 953 points = line.split() 954 new_line = '' 955 for point in points: 956 s = point.strip() 957 s = s.ljust(longest_str) 958 new_line = f'{new_line}{s} ' 959 new_line = indent + new_line.strip() 960 k_points.append(new_line) 961 if 'automatic' in k_points[0]: 962 k_points[0] = 'K_POINTS automatic' 963 elif 'gamma' in k_points[0]: 964 k_points[0] = 'K_POINTS gamma' 965 elif 'tpiba_b' in k_points[0]: 966 k_points[0] = 'K_POINTS tpiba_b' 967 elif 'tpiba_c' in k_points[0]: 968 k_points[0] = 'K_POINTS tpiba_c' 969 elif 'tpiba' in k_points[0]: 970 k_points[0] = 'K_POINTS tpiba' 971 elif 'crystal_b' in k_points[0]: 972 k_points[0] = 'K_POINTS crystal_b' 973 elif 'crystal_c' in k_points[0]: 974 k_points[0] = 'K_POINTS crystal_c' 975 elif 'crystal' in k_points[0]: 976 k_points[0] = 'K_POINTS crystal' 977 else: 978 raise ValueError(f'K_POINTS must be tpiba, automatic, crystal, gamma, tpiba_b, crystal_b, tpiba_c, or crystal_c. Yours was:\n{k_points}') 979 return k_points 980 981 982def count_elements(atomic_positions) -> dict: 983 """Takes ATOMIC_POSITIONS, returns a dict as `{element : number of atoms}`""" 984 if isinstance(atomic_positions, str): 985 atomic_positions.split() 986 if not isinstance(atomic_positions, list): 987 raise ValueError(f'To count the elements, atomic_positions must be a list or a str! Yours was:\n{atomic_positions}') 988 elements = {} 989 if any(header in atomic_positions[0] for header in ['ATOMIC_POSITIONS', 'atomic_positions']): 990 atomic_positions = atomic_positions[1:] 991 for line in atomic_positions: 992 element = extract.element(line) 993 if not element: 994 continue 995 if element in elements.keys(): 996 elements[element] = elements[element] + 1 997 else: 998 elements[element] = 1 999 return elements 1000 1001 1002def consts_from_cell_parameters( 1003 cell_parameters: list, 1004 AA: float | None = None, 1005 bohr: float | None = None, 1006 tol: float = 1e-4, 1007 ) -> dict: 1008 """Calculates the lattice parameters from CELL_PARAMETERS and determines Bravais lattice type. 1009 1010 Takes the normalized `cell_parameters` card, 1011 and optional `AA` (angstrom) or `bohr` values for the alat parameter. 1012 Automatically detects the ibrav Bravais lattice type from the cell vectors, 1013 with a default tolerance of `tol=1e-4`. 1014 1015 Returns a dictionary with the fundamental lattice parameters: 1016 `'A'`, `'B'`, `'C'`, (angstrom) `'alpha'`, `'beta'`, `'gamma'` (degrees), 1017 `'cosBC'`, `'cosAC'`, `'cosAB'`, `'Volume cell'` (AA^3), 1018 `'ibrav'`, `'ibrav name'`. 1019 """ 1020 # Check that the input is valid 1021 if not cell_parameters: 1022 return { 1023 'A': None, 'B': None, 'C': None, 1024 'alpha': None, 'beta': None, 'gamma': None, 1025 'cosBC': None, 'cosAC': None, 'cosAB': None, 1026 'Volume cell': None 1027 } 1028 if len(cell_parameters) < 4: 1029 raise ValueError("Input list is too short or empty for cell parameters.") 1030 # Check that only one of AA or bohr is provided 1031 if AA is not None and bohr is not None: 1032 raise ValueError("Only one of 'AA' or 'bohr' arguments can be provided, not both.") 1033 1034 header = cell_parameters[0].lower() 1035 scaling_factor = 1.0 1036 # Determine scaling factor based on the card header 1037 if 'bohr' in header: # Convert from Bohr to Angstrom for all calculations 1038 scaling_factor = _BOHR_TO_ANGSTROM 1039 elif 'angstrom' in header or 'ang' in header: # No scaling needed 1040 pass 1041 elif 'alat' in header: # Extract alat from header or use provided AA/bohr 1042 alat_from_header = extract.number(cell_parameters[0]) 1043 if alat_from_header is not None: # Convert from bohr to angstrom 1044 scaling_factor = alat_from_header * _BOHR_TO_ANGSTROM 1045 elif AA is not None: 1046 if AA <= tol: 1047 raise ValueError(f"CELL_PARAMETERS card is 'alat' but AA = {AA} < {tol}.") 1048 scaling_factor = AA # Use AA directly in Angstrom 1049 elif bohr is not None: 1050 if bohr <= tol: 1051 raise ValueError(f"CELL_PARAMETERS card is 'alat' but bohr = {bohr} < {tol}.") 1052 scaling_factor = bohr * _BOHR_TO_ANGSTROM # Convert Bohr to Angstrom 1053 else: 1054 raise ValueError("CELL_PARAMETERS card is 'alat' but no value could be determined.") 1055 else: 1056 raise ValueError("CELL_PARAMETERS header should specify units in bohr, angstom or alat.") 1057 1058 # Extract and scale vectors 1059 raw_vectors = [] 1060 for line in cell_parameters[1:4]: 1061 coords_list = extract.coords(line) 1062 if len(coords_list) != 3: 1063 raise ValueError(f"CELL_PARAMETERS should only have 3 components per line:\n {coords_list}") 1064 raw_vectors.append(coords_list) 1065 1066 # Convert to NumPy array for 3x3 matrix operations 1067 M = np.array(raw_vectors) 1068 M *= scaling_factor # Now all vectors are in Angstrom 1069 1070 # Scaled vectors 1071 a_vec, b_vec, c_vec = M[0], M[1], M[2] 1072 1073 # Lengths (in Angstrom) 1074 a_len = np.linalg.norm(a_vec) 1075 b_len = np.linalg.norm(b_vec) 1076 c_len = np.linalg.norm(c_vec) 1077 1078 # Check for zero length vectors 1079 if a_len <= tol or b_len <= tol or c_len <= tol: 1080 raise ZeroDivisionError("One or more cell vectors have near-zero length after scaling.") 1081 1082 # Cosines 1083 cosBC = np.dot(b_vec, c_vec) / (b_len * c_len) 1084 cosAC = np.dot(a_vec, c_vec) / (a_len * c_len) 1085 cosAB = np.dot(a_vec, b_vec) / (a_len * b_len) 1086 1087 # Angles in degrees from the cosines 1088 # Using np.clip to avoid numerical issues with arccos 1089 alpha_deg = np.degrees(np.arccos(np.clip(cosBC, -1.0, 1.0))) 1090 beta_deg = np.degrees(np.arccos(np.clip(cosAC, -1.0, 1.0))) 1091 gamma_deg = np.degrees(np.arccos(np.clip(cosAB, -1.0, 1.0))) 1092 1093 consts = { 1094 'A': float(a_len), 1095 'B': float(b_len), 1096 'C': float(c_len), 1097 'alpha': float(alpha_deg), 1098 'beta': float(beta_deg), 1099 'gamma': float(gamma_deg), 1100 'cosBC': float(cosBC), 1101 'cosAC': float(cosAC), 1102 'cosAB': float(cosAB), 1103 } 1104 1105 # Additional values for ibrav detection 1106 temp = { 1107 'a_vec': a_vec.tolist(), 1108 'b_vec': b_vec.tolist(), 1109 'c_vec': c_vec.tolist(), 1110 'a_len': float(a_len), 1111 'b_len': float(b_len), 1112 'c_len': float(c_len), 1113 } 1114 consts_temp = deepcopy(consts) 1115 consts_temp.update(temp) 1116 ibrav = _ibrav_from_consts(consts_temp, tol) 1117 consts.update(ibrav) 1118 return consts 1119 1120 1121def _ibrav_from_consts( 1122 lattice_params: dict, 1123 tol: float = 1e-4 1124 ) -> dict: 1125 """Determine the Bravais lattice type (ibrav) from lattice constants and vectors. 1126 1127 Analyzes lattice parameters and vectors to determine 1128 the appropriate Bravais lattice type according to 1129 [Quantum ESPRESSO's ibrav system](https://www.quantum-espresso.org/Doc/INPUT_PW.html#ibrav). 1130 Uses vector pattern matching to distinguish between primitive and centered lattices. 1131 """ 1132 # Input values 1133 A = lattice_params['A'] 1134 B = lattice_params['B'] 1135 C = lattice_params['C'] 1136 alpha = lattice_params['alpha'] 1137 beta = lattice_params['beta'] 1138 gamma = lattice_params['gamma'] 1139 cosBC = lattice_params['cosBC'] 1140 cosAC = lattice_params['cosAC'] 1141 cosAB = lattice_params['cosAB'] 1142 a_vec = np.array(lattice_params['a_vec']) 1143 b_vec = np.array(lattice_params['b_vec']) 1144 c_vec = np.array(lattice_params['c_vec']) 1145 a_len = lattice_params['a_len'] 1146 1147 # Helper functions for comparisons 1148 def eq(x, y): 1149 return abs(x - y) < tol 1150 def eq_angle(angle, target): 1151 return abs(angle - target) < tol or abs(angle - (180 - target)) < tol 1152 def eq_vec(v1, v2): 1153 """Compare vectors allowing for sign changes and normalization.""" 1154 v1_norm = v1 / np.linalg.norm(v1) if np.linalg.norm(v1) > 0 else v1 1155 v2_norm = v2 / np.linalg.norm(v2) if np.linalg.norm(v2) > 0 else v2 1156 return (np.allclose(v1_norm, v2_norm, atol=tol) or 1157 np.allclose(v1_norm, -v2_norm, atol=tol)) 1158 def matches_pattern(vectors, patterns): 1159 """Check if vectors match any of the given patterns.""" 1160 for pattern in patterns: 1161 # Try all permutations of the vectors 1162 for perm in [(0,1,2), (0,2,1), (1,0,2), (1,2,0), (2,0,1), (2,1,0)]: 1163 matched = True 1164 for i in range(3): 1165 if not eq_vec(vectors[perm[i]], pattern[i]): 1166 matched = False 1167 break 1168 if matched: 1169 return True 1170 return False 1171 1172 # Vectors in alat units for pattern matching 1173 alat = a_len 1174 vectors_alat = [a_vec / alat, b_vec / alat, c_vec / alat] 1175 # Ratios 1176 b_over_a = B / A 1177 c_over_a = C / A 1178 1179 ######## 1180 # Cubic 1181 ######## 1182 if (eq(A, B) and eq(B, C) and 1183 eq_angle(alpha, 90) and eq_angle(beta, 90) and eq_angle(gamma, 90)): 1184 sc_patterns = [ # Cubic P (sc) - ibrav=1 1185 [np.array([1, 0, 0]), np.array([0, 1, 0]), np.array([0, 0, 1])]] 1186 fcc_patterns = [ # Cubic F (fcc) - ibrav=2 1187 [np.array([-0.5, 0, 0.5]), np.array([0, 0.5, 0.5]), np.array([-0.5, 0.5, 0])], 1188 [np.array([0.5, 0.5, 0]), np.array([0.5, 0, 0.5]), np.array([0, 0.5, 0.5])]] 1189 bcc_patterns = [ # Cubic I (bcc) - ibrav=3 1190 [np.array([0.5, 0.5, 0.5]), np.array([-0.5, 0.5, 0.5]), np.array([-0.5, -0.5, 0.5])]] 1191 bcc_alt_patterns = [ # Cubic I (bcc) alternate - ibrav=-3 1192 [np.array([-0.5, 0.5, 0.5]), np.array([0.5, -0.5, 0.5]), np.array([0.5, 0.5, -0.5])]] 1193 if matches_pattern(vectors_alat, fcc_patterns): 1194 return {'ibrav': 2, 'ibrav name': "cubic F (fcc)"} 1195 elif matches_pattern(vectors_alat, bcc_patterns): 1196 return {'ibrav': 3, 'ibrav name': "cubic I (bcc)"} 1197 elif matches_pattern(vectors_alat, bcc_alt_patterns): 1198 return {'ibrav': -3, 'ibrav name': "cubic I (bcc), symmetric axis"} 1199 elif matches_pattern(vectors_alat, sc_patterns): 1200 return {'ibrav': 1, 'ibrav name': "cubic P (sc)"} 1201 else: # Suspicious, but let's default to simple cubic 1202 return {'ibrav': 1, 'ibrav name': "cubic P (sc) (default)"} 1203 1204 ############# 1205 # Tetragonal 1206 ############# 1207 if (eq(A, B) and not eq(A, C) and 1208 eq_angle(alpha, 90) and eq_angle(beta, 90) and eq_angle(gamma, 90)): 1209 st_patterns = [ # Tetragonal P (st) - ibrav=6 1210 [np.array([1, 0, 0]), 1211 np.array([0, 1, 0]), 1212 np.array([0, 0, c_over_a])]] 1213 bct_patterns = [ # Tetragonal I (bct) - ibrav=7 - FIXED pattern 1214 [np.array([0.5, -0.5, 0.5*c_over_a]), 1215 np.array([0.5, 0.5, 0.5*c_over_a]), 1216 np.array([-0.5, -0.5, 0.5*c_over_a])]] 1217 if matches_pattern(vectors_alat, bct_patterns): 1218 return {'ibrav': 7, 'ibrav name': "tetragonal I (bct)"} 1219 elif matches_pattern(vectors_alat, st_patterns): 1220 return {'ibrav': 6, 'ibrav name': "tetragonal P (st)"} 1221 else: # Suspicious but let's go default 1222 return {'ibrav': 6, 'ibrav name': "tetragonal P (st) (default)"} 1223 1224 ############### 1225 # Orthorhombic 1226 ############### 1227 if (not eq(A, B) and not eq(B, C) and not eq(A, C) and 1228 eq_angle(alpha, 90) and eq_angle(beta, 90) and eq_angle(gamma, 90)): 1229 op_patterns = [ # Orthorhombic P - ibrav=8 1230 [np.array([1, 0, 0]), 1231 np.array([0, b_over_a, 0]), 1232 np.array([0, 0, c_over_a])]] 1233 bco_patterns = [ # Orthorhombic base-centered - ibrav=9 1234 [np.array([0.5, 0.5*b_over_a, 0]), 1235 np.array([-0.5, 0.5*b_over_a, 0]), 1236 np.array([0, 0, c_over_a])]] 1237 bco_alt_patterns = [ # Orthorhombic base-centered alternate - ibrav=-9 1238 [np.array([0.5, -0.5*b_over_a, 0]), 1239 np.array([0.5, 0.5*b_over_a, 0]), 1240 np.array([0, 0, c_over_a])]] 1241 bcoA_patterns = [ # Orthorhombic one-face base-centered A-type - ibrav=91 1242 [np.array([1, 0, 0]), 1243 np.array([0, 0.5*b_over_a, -0.5*c_over_a]), 1244 np.array([0, 0.5*b_over_a, 0.5*c_over_a])]] 1245 of_patterns = [ # Orthorhombic face-centered - ibrav=10 1246 [np.array([0.5, 0, 0.5*c_over_a]), 1247 np.array([0.5, 0.5*b_over_a, 0]), 1248 np.array([0, 0.5*b_over_a, 0.5*c_over_a])]] 1249 oi_patterns = [ # Orthorhombic body-centered - ibrav=11 1250 [np.array([0.5, 0.5*b_over_a, 0.5*c_over_a]), 1251 np.array([-0.5, 0.5*b_over_a, 0.5*c_over_a]), 1252 np.array([-0.5, -0.5*b_over_a, 0.5*c_over_a])]] 1253 if matches_pattern(vectors_alat, bco_patterns): 1254 return {'ibrav': 9, 'ibrav name': "orthorhombic base-centered (bco)"} 1255 elif matches_pattern(vectors_alat, bco_alt_patterns): 1256 return {'ibrav': -9, 'ibrav name': "orthorhombic base-centered (bco) alternate"} 1257 elif matches_pattern(vectors_alat, bcoA_patterns): 1258 return {'ibrav': 91, 'ibrav name': "orthorhombic one-face base-centered A-type"} 1259 elif matches_pattern(vectors_alat, of_patterns): 1260 return {'ibrav': 10, 'ibrav name': "orthorhombic face-centered"} 1261 elif matches_pattern(vectors_alat, oi_patterns): 1262 return {'ibrav': 11, 'ibrav name': "orthorhombic body-centered"} 1263 elif matches_pattern(vectors_alat, op_patterns): 1264 return {'ibrav': 8, 'ibrav name': "orthorhombic P"} 1265 else: # Suspicious but let's go default 1266 return {'ibrav': 8, 'ibrav name': "orthorhombic P (default)"} 1267 1268 ################################# 1269 # Hexagonal/trigonal P (ibrav=4) 1270 ################################# 1271 if eq(A, B) and eq_angle(alpha, 90) and eq_angle(beta, 90) and eq_angle(gamma, 120): 1272 hex_patterns = [ # Hexagonal pattern 1273 [np.array([1, 0, 0]), 1274 np.array([-0.5, 0.5*np.sqrt(3), 0]), 1275 np.array([0, 0, c_over_a])]] 1276 if matches_pattern(vectors_alat, hex_patterns): 1277 return {'ibrav': 4, 'ibrav name': "hexagonal/trigonal P"} 1278 else: # Suspicious but let's go default 1279 return {'ibrav': 4, 'ibrav name': "hexagonal/trigonal P (default)"} 1280 1281 ############################################# 1282 # Trigonal R (rhombohedral) - ibrav=5 and -5 1283 ############################################# 1284 # More restrictive condition: all sides equal, all angles equal, but not 90° nor 120° 1285 if (eq(A, B) and eq(B, C) and 1286 eq_angle(alpha, beta) and eq_angle(beta, gamma) and 1287 not eq_angle(alpha, 90) and not eq_angle(alpha, 120)): 1288 # Calculate tx, ty, tz for trigonal R 1289 tx = np.sqrt((1 - cosAB) / 2) 1290 ty = np.sqrt((1 - cosAB) / 6) 1291 tz = np.sqrt((1 + 2 * cosAB) / 3) 1292 trigonalR_patterns = [ # Trigonal R, 3fold axis c - ibrav=5 1293 [np.array([tx, -ty, tz]), np.array([0, 2*ty, tz]), np.array([-tx, -ty, tz])]] 1294 # Trigonal R, 3fold axis <111> - ibrav=-5 1295 a_prime = 1 / np.sqrt(3) 1296 u = tz - 2 * np.sqrt(2) * ty 1297 v = tz + np.sqrt(2) * ty 1298 trigonalR_alt_patterns = [ 1299 [np.array([a_prime*u, a_prime*v, a_prime*v]), 1300 np.array([a_prime*v, a_prime*u, a_prime*v]), 1301 np.array([a_prime*v, a_prime*v, a_prime*u])]] 1302 if matches_pattern(vectors_alat, trigonalR_alt_patterns): 1303 return {'ibrav': -5, 'ibrav name': "trigonal R (3fold axis <111>)"} 1304 elif matches_pattern(vectors_alat, trigonalR_patterns): 1305 return {'ibrav': 5, 'ibrav name': "trigonal R (3fold axis c)"} 1306 else: # Suspicious but let's default 1307 return {'ibrav': 5, 'ibrav name': "trigonal R (3fold axis c) (default)"} 1308 1309 ############# 1310 # Monoclinic 1311 ############# 1312 if (eq_angle(alpha, 90) and eq_angle(beta, 90) and not eq_angle(gamma, 90)): 1313 monoclinicPc_patterns = [ # Monoclinic P (unique axis c) - ibrav=12 1314 [np.array([1, 0, 0]), 1315 np.array([b_over_a*cosAB, b_over_a*np.sqrt(1-cosAB**2), 0]), 1316 np.array([0, 0, c_over_a])]] 1317 monoclinicBc_patterns = [ # Monoclinic base-centered (unique axis c) - ibrav=13 - FIXED 1318 [np.array([0.5, 0, -0.5*c_over_a]), 1319 np.array([b_over_a*cosAB, b_over_a*np.sqrt(1-cosAB**2), 0]), 1320 np.array([0.5, 0, 0.5*c_over_a])]] 1321 if matches_pattern(vectors_alat, monoclinicBc_patterns): 1322 return {'ibrav': 13, 'ibrav name': "monoclinic base-centered (unique axis c)"} 1323 elif matches_pattern(vectors_alat, monoclinicPc_patterns): 1324 return {'ibrav': 12, 'ibrav name': "monoclinic P (unique axis c)"} 1325 else: # Sus... but ok. 1326 return {'ibrav': 12, 'ibrav name': "monoclinic P (unique axis c) (default)"} 1327 if (eq_angle(alpha, 90) and not eq_angle(beta, 90) and eq_angle(gamma, 90)): 1328 monoclinicPb_patterns = [ # Monoclinic P (unique axis b) - ibrav=-12 1329 [np.array([1, 0, 0]), 1330 np.array([0, b_over_a, 0]), 1331 np.array([c_over_a*cosAC, 0, c_over_a*np.sqrt(1-cosAC**2)])]] 1332 monoclinicBb_patterns = [ # Monoclinic base-centered (unique axis b) - ibrav=-13 - FIXED 1333 [np.array([0.5, 0.5*b_over_a, 0]), 1334 np.array([-0.5, 0.5*b_over_a, 0]), 1335 np.array([c_over_a*cosAC, 0, c_over_a*np.sqrt(1-cosAC**2)])]] 1336 if matches_pattern(vectors_alat, monoclinicBb_patterns): 1337 return {'ibrav': -13, 'ibrav name': "monoclinic base-centered (unique axis b)"} 1338 elif matches_pattern(vectors_alat, monoclinicPb_patterns): 1339 return {'ibrav': -12, 'ibrav name': "monoclinic P (unique axis b)"} 1340 else: # Sus... 1341 return {'ibrav': -12, 'ibrav name': "monoclinic P (unique axis b) (default)"} 1342 1343 ############ 1344 # Triclinic 1345 ############ 1346 triclinic_patterns = [ # Triclinic - ibrav=14 1347 [np.array([1, 0, 0]), 1348 np.array([b_over_a*cosAB, b_over_a*np.sqrt(1-cosAB**2), 0]), 1349 np.array([c_over_a*cosAC, 1350 c_over_a*(cosBC-cosAC*cosAB)/np.sqrt(1-cosAB**2), 1351 c_over_a*np.sqrt(1+2*cosBC*cosAC*cosAB-cosBC**2-cosAC**2-cosAB**2)/np.sqrt(1-cosAB**2)])]] 1352 if matches_pattern(vectors_alat, triclinic_patterns): 1353 return {'ibrav': 14, 'ibrav name': "triclinic"} 1354 else: # We couldn't figure it out :/ 1355 return {'ibrav': 0, 'ibrav name': "free (default)"} 1356 1357 1358def resume( 1359 folder=None, 1360 in_str:str='.in', 1361 out_str:str='.out', 1362 ) -> None: 1363 """Update an input file with the atomic coordinates of an output file. 1364 1365 This can be used to quickly resume an unfinished or interrupted calculation. 1366 1367 Note that for the moment this is only expected to work for `ibrav=0` calculations. 1368 1369 Takes a `folder` from a QE pw.x calculation, CWD if empty. 1370 Input and output files are determined automatically, 1371 but must be specified with `in_str` and `out_str` 1372 if more than one file ends with `.in` or `.out`. 1373 1374 Old input and output files will be renamed automatically. 1375 """ 1376 folder = file.get_dir(folder) 1377 exclude = ['resumed', 'slurm-'] 1378 input_file = file.get(folder, include=in_str, exclude=exclude) 1379 output_file = file.get(folder, out_str, exclude=exclude) 1380 if not input_file or not output_file: 1381 raise FileNotFoundError('Missing input or output file') 1382 # Check that ibrav == 0 1383 dict_in = read_in(input_file) 1384 if dict_in['ibrav'] != 0: # TODO: make it work for non-ibrav=0 calculations 1385 raise NotImplementedError('Currently only ibrav=0 calculations can be resumed automatically!') 1386 # Get the new values from the output file 1387 dict_out = read_out(output_file) 1388 atomic_positions = dict_out.get('ATOMIC_POSITIONS out') 1389 cell_parameters = dict_out.get('CELL_PARAMETERS out') 1390 if not atomic_positions: 1391 raise ValueError(f'Missing atomic positions in output file {output_file}') 1392 # Backup old files 1393 backup_in = file.backup(input_file, keep=True, label='resumed') 1394 backup_out = file.backup(output_file, keep=False, label='resumed') 1395 # Update input file 1396 set_value(input_file, 'ATOMIC_POSITIONS', atomic_positions) 1397 if cell_parameters: 1398 set_value(input_file, 'CELL_PARAMETERS', cell_parameters) 1399 print(f'Updated {input_file} from previous output, ready to resume!') 1400 print('Previous input and output files are backuped at:') 1401 print(f' {backup_in}') 1402 print(f' {backup_out}') 1403 return None 1404 1405 1406def scf_from_relax( 1407 folder:str=None, 1408 relax_in:str='relax.in', 1409 relax_out:str='relax.out', 1410 update:dict=None, 1411 indent:str=' ', 1412 ) -> None: 1413 """Create a Quantum ESPRESSO `scf.in` file from a previous relax calculation. 1414 1415 If no `folder` is provided, the current working directory is used. 1416 The `relax_in` and `relax_out` files are `relax.in` and `relax.out` by default. 1417 Update specific values (such as convergence values) with an `update` dictionaty. 1418 """ 1419 # Terminal feedback 1420 print(f'\naton.api.pwx {__version__}\n' 1421 f'Creating Quantum ESPRESSO SCF input from previous relax calculation:\n' 1422 f'{relax_in}\n{relax_out}\n') 1423 folder_path = folder 1424 if not folder_path: 1425 folder_path = os.getcwd() 1426 relax_in = file.get(folder_path, relax_in) 1427 relax_out = file.get(folder_path, relax_out) 1428 data = read_dir(folder_path, relax_in, relax_out) 1429 # Create the scf.in from the previous relax.in 1430 scf_in = os.path.join(folder_path, 'scf.in') 1431 comment = f'! Automatic SCF input made with ATON {__version__}. https://pablogila.github.io/aton' 1432 edit.from_template(relax_in, scf_in, None, comment) 1433 scf_in = file.get(folder_path, scf_in) 1434 # Replace CELL_PARAMETERS, ATOMIC_POSITIONS, ATOMIC_SPECIES, alat, ibrav and calculation 1435 atomic_species = data['ATOMIC_SPECIES'] 1436 cell_parameters = data['CELL_PARAMETERS out'] 1437 atomic_positions = data['ATOMIC_POSITIONS out'] 1438 alat = data['Alat'] 1439 calculation = data['calculation'] 1440 set_value(scf_in, 'ATOMIC_SPECIES', atomic_species) 1441 set_value(scf_in, 'ATOMIC_POSITIONS', atomic_positions) 1442 set_value(scf_in, 'ibrav', 0) 1443 set_value(scf_in, 'calculation', "'scf'") 1444 if cell_parameters and alat: 1445 set_value(scf_in, 'CELL_PARAMETERS', cell_parameters) 1446 set_value(scf_in, 'celldm(1)', alat) 1447 elif calculation == 'vc-relax': 1448 raise ValueError(f'Missing lattice parameters from {calculation} calculation, CELL_PARAMETERS={cell_parameters}, celldm(1)={alat}') 1449 # Update user-specified values 1450 set_values(filepath=scf_in, update=update, indent=indent) 1451 # Terminal feedback 1452 print(f'Created input SCF file at:' 1453 f'{scf_in}\n') 1454 return None 1455 1456 1457def to_cartesian(filepath, coordinates) -> str: 1458 """Converts a given `cordinates` from crystal lattice vectors to cartesian. 1459 1460 Only for `ibrav=0`. Uses the cell parameters from the input `filepath`. 1461 Note that the result is not multiplied by `A` nor `celldm(1)`. 1462 """ 1463 #print(f'COORDINATES: {coordinates}') 1464 cell_base = _get_base_change_matrix(filepath) 1465 coords = _clean_coords(coordinates) 1466 coords = np.array(coords).transpose() 1467 converted_coords = np.matmul(cell_base, coords) 1468 converted_coords_row = converted_coords.transpose() 1469 final_coords = converted_coords_row.tolist() 1470 #print(f'FINAL_COORDINATES: {final_coords}') 1471 return final_coords 1472 1473 1474def from_cartesian(filepath, coordinates:list) -> str: 1475 """Converts a given `cordinates` from cartesian to the base of lattice vectors. 1476 1477 Only for `ibrav=0`. Uses the cell parameters from the input `filepath`. 1478 Note that the result is not divided by `A` nor `celldm(1)`. 1479 """ 1480 #print(f'COORDINATES: {coordinates}') # DEBUG 1481 cell_base = _get_base_change_matrix(filepath) 1482 cell_base_inverse = np.linalg.inv(cell_base) 1483 coords = _clean_coords(coordinates) 1484 coords = np.array(coords).transpose() 1485 converted_coords = np.matmul(cell_base_inverse, coords) 1486 converted_coords_row = converted_coords.transpose() 1487 final_coords = converted_coords_row.tolist() 1488 #print(f'FINAL_COORDINATES: {final_coords}') # DEBUG 1489 return final_coords 1490 1491 1492def _get_base_change_matrix(filepath): 1493 """Get the base change matrix, based on the cell parameters. Only for `ibrav=0`.""" 1494 content = read_in(filepath) 1495 # Get the base change matrix 1496 cell_parameters = content['CELL_PARAMETERS'] 1497 cell_parameters = cell_parameters[1:] # Remove header 1498 cell_coords = [] 1499 for parameter in cell_parameters: 1500 #print(parameter) # DEBUG 1501 coords = extract.coords(parameter) 1502 cell_coords.append(coords) 1503 cell_numpy = np.array(cell_coords) 1504 cell_base = cell_numpy.transpose() 1505 #print(f'BASE CHANGE MATRIX: {cell_base}') # DEBUG 1506 return cell_base 1507 1508 1509def _clean_coords(coordinates) -> list: 1510 """Make sure that `coordinates` is a list with 3 floats. Removes extra values if present.""" 1511 if isinstance(coordinates, str): 1512 coordinates = extract.coords(coordinates) 1513 if not isinstance(coordinates, list): 1514 raise ValueError("The coordinates must be a list of three floats, or a string as in 'Element x1 x2 x3'") 1515 if len(coordinates) < 3: 1516 raise ValueError("The coordinates must be a list of three floats, or a string as in 'Element x1 x2 x3'") 1517 if len(coordinates) > 3: 1518 coordinates = coordinates [:3] 1519 cleaned_coords = [] 1520 for coord in coordinates: 1521 cleaned_coords.append(float(coord)) 1522 return cleaned_coords 1523 1524 1525############################################################ 1526#################### COMMON VARIABLES #################### 1527############################################################ 1528 1529 1530pw_namelists = { 1531 '&CONTROL' : ['calculation', 'title', 'verbosity', 'restart_mode', 'wf_collect', 'nstep', 'iprint', 'tstress', 'tprnfor', 'dt', 'outdir', 'wfcdir', 'prefix', 'lkpoint_dir', 'max_seconds', 'etot_conv_thr', 'forc_conv_thr', 'disk_io', 'pseudo_dir', 'tefield', 'dipfield', 'lelfield', 'nberrycyc', 'lorbm', 'lberry', 'gdir', 'nppstr', 'gate', 'twochem', 'lfcp', 'trism'], 1532 # 1533 '&SYSTEM' : ['ibrav', 'celldm(1)', 'celldm(2)', 'celldm(3)', 'celldm(4)', 'celldm(5)', 'celldm(6)', 'A', 'B', 'C', 'cosAB', 'cosAC', 'cosBC', 'nat', 'ntyp', 'nbnd', 'nbnd_cond', 'tot_charge', 'starting_charge', 'tot_magnetization', 'starting_magnetization', 'ecutwfc', 'ecutrho', 'ecutfock', 'nr1', 'nr2', 'nr3', 'nr1s', 'nr2s', 'nr3s', 'nosym', 'nosym_evc', 'noinv', 'no_t_rev', 'force_symmorphic', 'use_all_frac', 'occupations', 'one_atom_occupations', 'starting_spin_angle', 'degauss_cond', 'nelec_cond', 'degauss', 'smearing', 'nspin', 'sic_gamma', 'pol_type', 'sic_energy', 'sci_vb', 'sci_cb', 'noncolin', 'ecfixed', 'qcutz', 'q2sigma', 'input_dft', 'ace', 'exx_fraction', 'screening_parameter', 'exxdiv_treatment', 'x_gamma_extrapolation', 'ecutvcut' 'nqx1', 'nqx2', 'nqx3', 'localization_thr', 'Hubbard_occ', 'Hubbard_alpha', 'Hubbard_beta', 'starting_ns_eigenvalue', 'dmft', 'dmft_prefix', 'ensemble_energies', 'edir', 'emaxpos', 'eopreg', 'eamp', 'angle1', 'angle2', 'lforcet', 'constrained_magnetization', 'fixed_magnetization', 'lambda', 'report', 'lspinorb', 'assume_isolated', 'esm_bc', 'esm_w', 'esm_efield', 'esm_nfit', 'lgcscf', 'gcscf_mu', 'gcscf_conv_thr', 'gcscf_beta', 'vdw_corr', 'london', 'london_s6', 'london_c6', 'london_rvdw', 'london_rcut', 'dftd3_version', 'dftd3_threebody', 'ts_vdw_econv_thr', 'ts_vdw_isolated', 'xdm', 'xdm_a1', 'xdm_a2', 'space_group', 'uniqueb', 'origin_choice', 'rhombohedral', 'zgate', 'relaxz', 'block', 'block_1', 'block_2', 'block_height', 'nextffield'], 1534 # 1535 '&ELECTRONS' : ['electron_maxstep', 'exx_maxstep', 'scf_must_converge', 'conv_thr', 'adaptive_thr', 'conv_thr_init', 'conv_thr_multi', 'mixing_mode', 'mixing_beta', 'mixing_ndim', 'mixing_fixed_ns', 'diagonalization', 'diago_thr_init', 'diago_cg_maxiter', 'diago_ppcg_maxiter', 'diago_david_ndim', 'diago_rmm_ndim', 'diago_rmm_conv', 'diago_gs_nblock', 'diago_full_acc', 'efield', 'efield_cart', 'efield_phase', 'startingpot', 'startingwfc', 'tqr', 'real_space'], 1536 # 1537 '&IONS' : ['ion_positions', 'ion_velocities', 'ion_dynamics', 'pot_extrapolation', 'wfc_extrapolation', 'remove_rigid_rot', 'ion_temperature', 'tempw', 'tolp', 'delta_t', 'nraise', 'refold_pos', 'upscale', 'bfgs_ndim', 'trust_radius_max', 'trust_radius_min', 'trust_radius_ini', 'w_1', 'w_2', 'fire_alpha_init', 'fire_falpha', 'fire_nmin', 'fire_f_inc', 'fire_f_dec', 'fire_dtmax'], 1538 # 1539 '&CELL' : ['cell_dynamics', 'press', 'wmass', 'cell_factor', 'press_conv_thr' 'cell_dofree'], 1540 # 1541 '&FCP' : ['fcp_mu', 'fcp_dynamics', 'fcp_conv_thr', 'fcp_ndiis', 'fcp_mass','fcp_velocity', 'fcp_temperature', 'fcp_tempw', 'fcp_tolp ', 'fcp_delta_t', 'fcp_nraise', 'freeze_all_atoms'], 1542 # 1543 '&RISM' : ['nsolv', 'closure', 'tempv', 'ecutsolv', 'solute_lj', 'solute_epsilon', 'solute_sigma', 'starting1d', 'starting3d', 'smear1d', 'smear3d', 'rism1d_maxstep', 'rism3d_maxstep', 'rism1d_conv_thr', 'rism3d_conv_thr', 'mdiis1d_size', 'mdiis3d_size', 'mdiis1d_step', 'mdiis3d_step', 'rism1d_bond_width', 'rism1d_dielectric', 'rism1d_molesize', 'rism1d_nproc', 'rism3d_conv_level', 'rism3d_planar_average', 'laue_nfit', 'laue_expand_right', 'laue_expand_left', 'laue_starting_right', 'laue_starting_left', 'laue_buffer_right', 'laue_buffer_left', 'laue_both_hands', 'laue_wall', 'laue_wall_z', 'laue_wall_rho', 'laue_wall_epsilon', 'laue_wall_sigma', 'laue_wall_lj6'], 1544} 1545"""Dictionary with all possible NAMELISTs as keys, and the corresponding variables as values.""" 1546 1547 1548pw_cards = { 1549 'ATOMIC_SPECIES' : ['X', 'Mass_X', 'PseudoPot_X'], 1550 # 1551 'ATOMIC_POSITIONS' : ['X', 'x', 'y', 'z', 'if_pos(1)', 'if_pos(2)', 'if_pos(3)'], 1552 # 1553 'K_POINTS' : ['nks', 'xk_x', 'xk_y', 'xk_z', 'wk', 'nk1', 'nk2', 'nk3', 'sk1', 'sk2', 'sk3'], 1554 # 1555 'ADDITIONAL_K_POINTS' : ['nks_add', 'k_x', 'k_y', 'k_z', 'wk_'], 1556 # 1557 'CELL_PARAMETERS': ['v1', 'v2', 'v3'], 1558 # 1559 'CONSTRAINTS' : ['nconstr', 'constr_tol', 'constr_type', 'constr(1)', 'constr(2)', 'constr(3)', 'constr(4)', 'constr_target'], 1560 # 1561 'OCCUPATIONS': ['f_inp1', 'f_inp2'], 1562 # 1563 'ATOMIC_VELOCITIES' : ['V', 'vx', 'vy', 'vz'], 1564 # 1565 'ATOMIC_FORCES' : ['X', 'fx', 'fy', 'fz'], 1566 # 1567 'SOLVENTS' : ['X', 'Density', 'Molecule', 'X', 'Density_Left', 'Density_Right', 'Molecule'], 1568 # 1569 'HUBBARD' : ['label(1)-manifold(1)', 'u_val(1)', 'label(1)-manifold(1)', 'j0_val(1)', 'paramType(1)', 'label(1)-manifold(1)', 'paramValue(1)', 'label(I)-manifold(I)', 'u_val(I)', 'label(I)-manifold(I)', 'j0_val(I)', 'label(I)-manifold(I)', 'label(J)-manifold(J)', 'I', 'J', 'v_val(I,J)'], 1570 # Extra card for output CELL_PARAMETERS 1571 'CELL_PARAMETERS out': ['v1', 'v2', 'v3'], 1572 # Extra card for output ATOMIC_POSITIONS 1573 'ATOMIC_POSITIONS out' : ['X', 'x', 'y', 'z', 'if_pos(1)', 'if_pos(2)', 'if_pos(3)'], 1574} 1575"""Dictionary with every possible CARDs as keys, and the corresponding variables as values.""" 1576 1577 1578_pw_int_values = ['max_seconds', 'nstep', 'ibrav', 'nat', 'ntyp', 'dftd3_version', 'electron_maxstep'] 1579"""Values from any namelist that must be integers""" 1580 1581 1582_upper_cards = pw_cards.keys() 1583_all_cards = [] 1584for _upper_card in _upper_cards: 1585 _all_cards.append(_upper_card.lower()) 1586_all_cards.extend(_upper_cards) 1587_all_cards_regex = '|'.join(_all_cards) 1588"""Regex string that matches all CARDS present in the file.""" 1589_all_cards_regex = rf'(?!\s*!\s*)({_all_cards_regex})(?!\s*=)'
72def read_in(filepath) -> dict: 73 """Reads a Quantum ESPRESSO input `filepath` and returns the values as a dict. 74 75 Dict keys are named after the corresponding variable. 76 CARDS are returned as lists, and contain the 77 title card + parameters in the first item. 78 """ 79 file_path = file.get(filepath) 80 data: dict = {} 81 # First get the values from the namelists 82 lines = find.lines(file_path, '=') 83 for line in lines: 84 line.strip() 85 var, value = line.split('=', 1) 86 var = var.strip() 87 value = value.strip() 88 if var.startswith('!'): 89 continue 90 try: 91 value_float = value.replace('d', 'e') 92 value_float = value_float.replace('D', 'e') 93 value_float = value_float.replace('E', 'e') 94 value_float = float(value_float) 95 value = value_float 96 if var in _pw_int_values: # Keep ints as int 97 value = int(value) 98 except ValueError: 99 pass # Then it is a string 100 data[var] = value 101 # Try to find all the cards. Card titles will be saved in the 0 position of each result. 102 for card in pw_cards.keys(): 103 card_lower = card.lower() 104 card_uncommented = rf'(?!\s*!\s*)({card}|{card_lower})' # Legacy regex 105 card_content = find.between(filepath=file_path, key1=card_uncommented, key2=_all_cards_regex, include_keys=True, match=-1, regex=True) 106 if not card_content: 107 continue 108 # If found, clean and normalise the card's content 109 card_content = card_content.splitlines() 110 card_content = normalize_card(card_content) 111 # Ignore OCCUPATIONS card if not required to avoid saving nonsense data 112 if card_lower == 'occupations': 113 if data.get('occupations') is None or 'from_input' not in data.get('occupations'): 114 continue 115 data[card] = card_content 116 # If there are CELL_PARAMETERS, check if we can extract the alat to celldm(1). 117 if 'CELL_PARAMETERS' in data.keys(): 118 if 'alat' in data['CELL_PARAMETERS'][0]: 119 alat = extract.number(data['CELL_PARAMETERS'][0]) 120 if alat: # This overwrites any possible celldm(1) previously defined! 121 data['celldm(1)'] = alat 122 data['CELL_PARAMETERS'][0] = 'CELL_PARAMETERS alat' 123 return data
Reads a Quantum ESPRESSO input filepath and returns the values as a dict.
Dict keys are named after the corresponding variable. CARDS are returned as lists, and contain the title card + parameters in the first item.
126def read_out(filepath) -> dict: 127 """Reads a Quantum ESPRESSO output `filepath`, returns a dict with the output keys. 128 129 The output keys are: 130 `'Energy'` (Ry), `'Total force'` (float), `'Total SCF correction'` (float), 131 `'Runtime'` (str), `'Success'` (bool), `'JOB DONE'` (bool), 132 `'BFGS converged'` (bool), `'BFGS failed'` (bool), 133 `'Maxiter reached'` (bool), `'Error'` (str), `'Efermi'` (eV), 134 `'Alat'` (bohr), `'Volume'` (a.u.^3), `'Volume AA'` (AA^3), 135 `'Density'` (g/cm^3), `'Pressure'` (kbar), 136 `'CELL_PARAMETERS out'` (list of str), `'ATOMIC_POSITIONS out'` (list of str), 137 `'cosBC out'`, `'cosAC out'`, `'cosAB out'` (float), 138 `'A out'`, `'B out'`, `'C out'` (angstrom), 139 """ 140 file_path = file.get(filepath) 141 142 energy_key = '! total energy' 143 force_key = 'Total force' 144 scf_key = 'Total SCF correction' 145 pressure_key = 'P=' 146 efermi_key = 'the Fermi energy is' 147 time_key = 'PWSCF' 148 time_stop_key = 'CPU' 149 job_done_key = 'JOB DONE.' 150 bfgs_converged_key = 'bfgs converged' 151 bfgs_failed_key = 'bfgs failed' 152 maxiter_reached_key = r'(Maximum number of iterations reached|maximum number of steps has been reached)' 153 error_key = 'Error in routine' 154 error_failed_line = 'pw.x: Failed' 155 cell_parameters_key = 'CELL_PARAMETERS' 156 atomic_positions_key = 'ATOMIC_POSITIONS' 157 158 energy_line = find.lines(file_path, energy_key, -1) 159 force_line = find.lines(file_path, force_key, -1) 160 pressure_line = find.lines(file_path, pressure_key, -1) 161 efermi_line = find.lines(file_path, efermi_key, -1) 162 time_line = find.lines(file_path, time_key, -1) 163 job_done_line = find.lines(file_path, job_done_key, -1) 164 bfgs_converged_line = find.lines(file_path, bfgs_converged_key, -1) 165 bfgs_failed_line = find.lines(file_path, bfgs_failed_key, -1) 166 maxiter_reached_line = find.lines(file_path, maxiter_reached_key, -1, regex=True) 167 error_line = find.lines(file_path, error_key, -1, 1, True) 168 error_failed_line = find.lines(file_path, error_failed_line, -1) 169 170 energy: float = None 171 force: float = None 172 scf: float = None 173 pressure: float = None 174 efermi: float = None 175 time: str = None 176 job_done: bool = False 177 bfgs_converged: bool = False 178 bfgs_failed: bool = False 179 maxiter_reached: bool = False 180 error: str = '' 181 success: bool = False 182 183 if energy_line: 184 energy = extract.number(energy_line[0], energy_key) 185 if force_line: 186 force = extract.number(force_line[0], force_key) 187 scf = extract.number(force_line[0], scf_key) 188 if pressure_line: 189 pressure = extract.number(pressure_line[0], pressure_key) 190 if efermi_line: 191 efermi = extract.number(efermi_line[0], efermi_key) 192 if time_line: 193 time = extract.string(time_line[0], time_key, time_stop_key) 194 if job_done_line: 195 job_done = True 196 if bfgs_converged_line: 197 bfgs_converged = True 198 if bfgs_failed_line: 199 bfgs_failed = True 200 if maxiter_reached_line: 201 maxiter_reached = True 202 if error_line: 203 error = error_line[1].strip() 204 elif error_failed_line: 205 error = error_failed_line[0].strip() 206 207 # Was the calculation successful? 208 if job_done and not bfgs_failed and not maxiter_reached and not error: 209 success = True 210 211 # CELL_PARAMETERS and ATOMIC_POSITIONS 212 alat = None 213 volume = None 214 volume_AA = None 215 density = None 216 cell_parameters = None 217 atomic_positions = None 218 cell_parameters_raw = [] 219 atomic_positions_raw = [] 220 coordinates_raw = find.between(file_path, 'Begin final coordinates', 'End final coordinates', False, -1, False) 221 if coordinates_raw: 222 coordinates_raw = coordinates_raw.splitlines() 223 append_cell = False 224 append_positions = False 225 for line in coordinates_raw: 226 line = line.strip() 227 if cell_parameters_key in line: 228 append_cell = True 229 append_positions = False 230 elif atomic_positions_key in line: 231 append_cell = False 232 append_positions = True 233 elif 'volume' in line: 234 volume = extract.number(line, 'volume') 235 volume_AA = volume * (_BOHR_TO_ANGSTROM ** 3) 236 elif 'density' in line: 237 density = extract.number(line, 'density') 238 if line == '' or line.startswith('!'): 239 continue 240 if append_cell: 241 cell_parameters_raw.append(line) 242 elif append_positions: 243 atomic_positions_raw.append(line) 244 atomic_positions = normalize_card(atomic_positions_raw) 245 if cell_parameters_raw: 246 cell_parameters = normalize_card(cell_parameters_raw) 247 if 'alat' in cell_parameters[0]: 248 alat = extract.number(cell_parameters[0], 'alat') 249 # If not found, at least try to keep the latest iteration 250 else: 251 _nat_raw = find.lines(file_path, 'number of atoms/cell', 1) 252 if _nat_raw: 253 _nat = int(extract.number(_nat_raw[0], 'number of atoms/cell')) 254 atomic_positions_raw = find.lines(file_path, atomic_positions_key, matches=-1, additional=_nat, split=True) 255 if atomic_positions_raw: 256 atomic_positions = normalize_card(atomic_positions_raw) 257 cell_parameters_raw = find.lines(file_path, cell_parameters_key, matches=-1, additional=3, split=True) 258 if cell_parameters_raw: 259 cell_parameters = normalize_card(cell_parameters_raw) 260 if 'alat' in cell_parameters[0]: 261 alat = extract.number(cell_parameters[0], 'alat') 262 263 output = { 264 'Energy' : energy, 265 'Total force' : force, 266 'Total SCF correction' : scf, 267 'Runtime' : time, 268 'JOB DONE' : job_done, 269 'BFGS converged' : bfgs_converged, 270 'BFGS failed' : bfgs_failed, 271 'Maxiter reached' : maxiter_reached, 272 'Error' : error, 273 'Success' : success, 274 'CELL_PARAMETERS out' : cell_parameters, 275 'ATOMIC_POSITIONS out' : atomic_positions, 276 'Alat' : alat, 277 'Volume' : volume, 278 'Volume AA' : volume_AA, 279 'Density' : density, 280 'Pressure' : pressure, 281 'Efermi' : efermi, 282 } 283 284 # Extract lattice parameters A, B, C, celldm(i) and cosines 285 if alat: 286 consts = consts_from_cell_parameters(cell_parameters, alat) 287 else: 288 consts = consts_from_cell_parameters(cell_parameters) 289 consts_out = {key + ' out': value for key, value in consts.items()} 290 output.update(consts_out) 291 292 return output
Reads a Quantum ESPRESSO output filepath, returns a dict with the output keys.
The output keys are:
'Energy' (Ry), 'Total force' (float), 'Total SCF correction' (float),
'Runtime' (str), 'Success' (bool), 'JOB DONE' (bool),
'BFGS converged' (bool), 'BFGS failed' (bool),
'Maxiter reached' (bool), 'Error' (str), 'Efermi' (eV),
'Alat' (bohr), 'Volume' (a.u.^3), 'Volume AA' (AA^3),
'Density' (g/cm^3), 'Pressure' (kbar),
'CELL_PARAMETERS out' (list of str), 'ATOMIC_POSITIONS out' (list of str),
'cosBC out', 'cosAC out', 'cosAB out' (float),
'A out', 'B out', 'C out' (angstrom),
295def read_dir( 296 folder=None, 297 in_str:str='.in', 298 out_str:str='.out' 299 ) -> dict: 300 """Takes a `folder` from a QE pw.x calculation, returns a dict with input and output values. 301 302 Input and output files are determined automatically, 303 but must be specified with `in_str` and `out_str` 304 if more than one file ends with `.in` or `.out`. 305 """ 306 folder = file.get_dir(folder) 307 input_file = file.get(folder, in_str) 308 output_file = file.get(folder, out_str) 309 if not input_file and not output_file: 310 return None 311 if input_file: 312 dict_in = read_in(input_file) 313 if not output_file: 314 return dict_in 315 if output_file: 316 dict_out = read_out(output_file) 317 if not input_file: 318 return dict_out 319 # Merge both dictionaries 320 merged_dict = {**dict_in, **dict_out} 321 return merged_dict
Takes a folder from a QE pw.x calculation, returns a dict with input and output values.
Input and output files are determined automatically,
but must be specified with in_str and out_str
if more than one file ends with .in or .out.
324def read_dirs( 325 folder, 326 in_str:str='.in', 327 out_str:str='.out', 328 include=None, 329 exclude=None, 330 separator='_', 331 type_index=0, 332 id_index=1 333 ) -> None: 334 """Reads recursively QE pw.x calculations from all the subfolders inside the given `directory`. 335 336 Results are saved to CSV files inside the current directory. 337 Input and output files are determined automatically, but must be specified with 338 `in_str` and `out_str` if more than one file ends with `.in` or `.out`. 339 340 Only folders containing all strings in the `include` list will be included. 341 Folders containing any string from the `exclude` list will be ignored. 342 343 To properly group the calculations per type, saving separated CSVs for each calculation type, 344 you can modify `separator` ('_' by default), `type_index` (0) and `id_index` (1). 345 With these default values, a subfolder named './CalculationType_CalculationID_AdditionalText/' 346 will be interpreted as follows: 347 - Calculation type: 'CalculationType' (The output CSV will be named after this) 348 - CalculationID: 'CalculationID' (Stored in the 'ID' column of the resulting dataframe) 349 350 If the detection fails, the subfolder name will be used for the CSV file. 351 """ 352 print(f'Reading all Quantum ESPRESSO calculations from {folder} ...') 353 folders = file.get_list(folder, include=include, exclude=exclude, only_folders=True) 354 if not folders: 355 raise FileNotFoundError('The directory is empty!') 356 # Separate calculations by their title in an array 357 calc_types = [] 358 for f in folders: 359 folder_name = os.path.basename(f) 360 try: 361 calc_name = folder_name.split(separator)[type_index] 362 except: 363 calc_name = folder_name 364 if not calc_name in calc_types: 365 calc_types.append(calc_name) 366 len_folders = len(folders) 367 total_success_counter = 0 368 for calc in calc_types: 369 len_calcs = 0 370 success_counter = 0 371 results = pd.DataFrame() 372 for f in folders: 373 if not calc in f: 374 continue 375 len_calcs += 1 376 folder_name = os.path.basename(f) 377 try: 378 calc_id = folder_name.split(separator)[id_index] 379 except: 380 calc_id = folder_name 381 dictionary = read_dir(f, in_str, out_str) 382 #df = pd.DataFrame.from_dict(dictionary) 383 df = pd.DataFrame({k: [v] for k, v in dictionary.items()}) if dictionary else None 384 if df is None: 385 continue 386 # Join input and output in the same dataframe 387 df.insert(0, 'ID', calc_id) 388 df = df.dropna(axis=1, how='all') 389 results = pd.concat([results, df], axis=0, ignore_index=True) 390 if df['Success'][0]: 391 success_counter += 1 392 total_success_counter += 1 393 results.to_csv(os.path.join(folder, calc+'.csv')) 394 print(f'Saved to CSV: {calc} ({success_counter} successful calculations out of {len_calcs})') 395 print(f'Total successful calculations: {total_success_counter} out of {len_folders}')
Reads recursively QE pw.x calculations from all the subfolders inside the given directory.
Results are saved to CSV files inside the current directory.
Input and output files are determined automatically, but must be specified with
in_str and out_str if more than one file ends with .in or .out.
Only folders containing all strings in the include list will be included.
Folders containing any string from the exclude list will be ignored.
To properly group the calculations per type, saving separated CSVs for each calculation type,
you can modify separator ('_' by default), type_index (0) and id_index (1).
With these default values, a subfolder named './CalculationType_CalculationID_AdditionalText/'
will be interpreted as follows:
- Calculation type: 'CalculationType' (The output CSV will be named after this)
- CalculationID: 'CalculationID' (Stored in the 'ID' column of the resulting dataframe)
If the detection fails, the subfolder name will be used for the CSV file.
398def set_value( 399 filepath, 400 key:str, 401 value, 402 indent:str=' ', 403 ) -> None: 404 """Replace the `value` of a `key` parameter in an input `filepath`. 405 406 Delete parameters with `value=''`. 407 Remember to include the single quotes `'` on values that use them. 408 409 Updating 'ATOMIC_POSITIONS' updates 'nat' automatically, 410 and updating 'ATOMIC_SPECIES' updates 'ntyp'. 411 412 Optionally change indentation with `indent`, 2 spaces by default. 413 """ 414 key = key.strip() 415 file_path = file.get(filepath) 416 input_old = read_in(file_path) 417 # Present keys uppercase 418 present_keys_upper = [] 419 for present_key in input_old.keys(): 420 present_keys_upper.append(present_key.upper()) 421 # All namelist values 422 pw_values = [] 423 for namelist_values in pw_namelists.values(): 424 pw_values.extend(namelist_values) 425 # All cards 426 pw_cards_upper = [] # Don't forget about these! 427 for card in pw_cards.keys(): 428 pw_cards_upper.append(card.upper()) 429 # Check if it's a namelist 430 if key in pw_values: 431 if key in input_old.keys(): 432 _update_value(filepath, key, value, indent) 433 else: # Write the value from scratch 434 _add_value(filepath, key, value, indent) 435 # Check if it's a card 436 elif key.upper() in pw_cards_upper: 437 if key.upper() in present_keys_upper: 438 _update_card(filepath, key, value, indent) 439 else: 440 _add_card(filepath, key, value, indent) 441 else: 442 raise ValueError(f'Unrecognised key: {key}') 443 return None
Replace the value of a key parameter in an input filepath.
Delete parameters with value=''.
Remember to include the single quotes ' on values that use them.
Updating 'ATOMIC_POSITIONS' updates 'nat' automatically, and updating 'ATOMIC_SPECIES' updates 'ntyp'.
Optionally change indentation with indent, 2 spaces by default.
446def set_values( 447 filepath, 448 update:dict, 449 indent:str=' ', 450 ) -> None: 451 """Replace multiple values of an input `filepath` with an `update` dict. 452 453 Calls `set_value` recursively. If `update` is empty, nothig will happen. 454 """ 455 if not update: 456 return None 457 for key, value in update.items(): 458 set_value(filepath=filepath, key=key, value=value, indent=indent) 459 return None
Replace multiple values of an input filepath with an update dict.
Calls set_value recursively. If update is empty, nothig will happen.
679def add_atom(filepath, position, indent=' ') -> None: 680 """Adds an atom in a given input `filepath` at a specified `position`. 681 682 Position must be a string or a list, as follows: 683 `"specie:str float float float"` or `[specie:str, float, float, float]`. 684 685 This method updates automatically other related values, 686 such as 'ntyp' when updating ATOMIC_SPECIES, etc. 687 688 Optionally change indentation with `indent`, 2 spaces by default. 689 """ 690 new_atom = position 691 if isinstance(position, list): 692 if not len(position) == 4 or not isinstance(position[0], str): 693 raise ValueError('If your atomic position is a list, it must contain the atom type and the three coords, as in [str, str/float, str/float, str/float]') 694 new_atom = ' '.join(str(x) for x in position) 695 elif not isinstance(position, str): 696 raise ValueError(f'The specified position must be a list of size 4 (atom type and three coordinates) or an equivalent string! Your position was:\n{coords}') 697 # Let's check that our new_atom makes sense 698 atom = extract.element(new_atom) 699 coords = extract.coords(new_atom) 700 if not atom: 701 raise ValueError(f'The specified position does not contain an atom at the beginning! Your position was:\n{position}') 702 if len(coords) < 3: 703 raise ValueError(f'Your position has len(coordinates) < 3, please check it.\nYour position was: {position}\nCoordinates detected: {coords}') 704 if len(coords) > 3: 705 coords = coords[:3] 706 new_atom = f'{atom} {coords[0]} {coords[1]} {coords[2]}' 707 # Get the values from the file 708 values = read_in(filepath) 709 atomic_positions = values['ATOMIC_POSITIONS'] 710 atomic_positions.append(new_atom) 711 # Update ATOMIC_POSITIONS. nat should be updated automatically. 712 set_value(filepath=filepath, key='ATOMIC_POSITIONS', value=atomic_positions, indent=indent) 713 # We might have to update ATOMIC_SPECIES! 714 atomic_species = values['ATOMIC_SPECIES'] 715 is_atom_missing = True 716 for specie in atomic_species[1:]: 717 if atom == extract.element(specie): 718 is_atom_missing = False 719 break 720 if is_atom_missing: # Update ATOMIC_SPECIES. ntyp should be updated automatically. 721 mass = periodictable.elements.symbol(atom).mass 722 atomic_species.append(f'{atom} {mass} {atom}.upf') 723 set_value(filepath=filepath, key='ATOMIC_SPECIES', value=atomic_species, indent=indent) 724 return None
Adds an atom in a given input filepath at a specified position.
Position must be a string or a list, as follows:
"specie:str float float float" or [specie:str, float, float, float].
This method updates automatically other related values, such as 'ntyp' when updating ATOMIC_SPECIES, etc.
Optionally change indentation with indent, 2 spaces by default.
727def get_atom( 728 filepath:str, 729 position:list, 730 precision:int=3, 731 return_anyway:bool=False, 732 ) -> str: 733 """Takes the approximate `position` of an atom, and returns the full line from the input `filepath`. 734 735 It compares the atomic positions rounded up to the specified `precision` decimals. 736 If `return_anyway = True`, ignores errors and returns an empty string. 737 """ 738 # Check that the coordinates are valid 739 if isinstance(position, str): 740 coordinates = extract.coords(position) 741 elif isinstance(position, list): 742 coordinates = position 743 if len(position) == 1 and isinstance(position[0], str): # In case someone like me introduces ['x, y, z'] 744 coordinates = extract.coords(position[0]) 745 else: 746 if return_anyway: 747 return '' 748 raise ValueError(f'The atomic position must be a list or a string! Yours was:\n{position}\nDetected coordinates:\n{coordinates}') 749 if len(coordinates) < 3: 750 if return_anyway: 751 return '' 752 raise ValueError(f'Atomic position has less that 3 coordinates! Yours had len={len(coordinates)}:\n{position}\nDetected coordinates:\n{coordinates}') 753 if len(coordinates) > 3: 754 coordinates = coordinates[:3] 755 # Round the input position up to the specified precision 756 coordinates_rounded = [] 757 for coordinate in coordinates: 758 coordinates_rounded.append(round(coordinate, precision)) 759 # Compare the rounded coordinates with the atomic positions 760 content = read_in(filepath) 761 if not 'ATOMIC_POSITIONS' in content.keys(): 762 if return_anyway: 763 return '' 764 raise ValueError(f'ATOMIC_POSITIONS missing in {filepath}') 765 atomic_positions = content['ATOMIC_POSITIONS'][1:] # Remove header 766 matched_pos = None 767 matched_index = None 768 for i, atomic_position in enumerate(atomic_positions): 769 coords = extract.coords(atomic_position) 770 coords_rounded = [] 771 for pos in coords: 772 coords_rounded.append(round(pos, precision)) 773 if coordinates_rounded == coords_rounded: 774 if matched_pos: # There was a previous match! 775 if return_anyway: 776 return '' 777 raise ValueError(f'More than one matching position found! Try again with more precision.\nSearched coordinates: {coordinates_rounded}') 778 matched_pos = atomic_position 779 matched_index = i 780 if not matched_pos: 781 if return_anyway: 782 return '' 783 raise ValueError(f'No matching position found! Try again with a less tight precision parameter.\nSearched coordinates: {coordinates_rounded}') 784 # We must get the literal line, not the normalized one! 785 atomic_positions_uncommented = rf'^\s*\b(ATOMIC_POSITIONS|atomic_positions)' 786 #atomic_positions_uncommented = rf'(?!\s*!\s*)(ATOMIC_POSITIONS|atomic_positions)' # Legacy regex 787 atomic_positions_lines = find.between(filepath=filepath, key1=atomic_positions_uncommented, key2=_all_cards_regex, include_keys=False, match=-1, regex=True) 788 # Remove commented or empty lines 789 atomic_positions_lines = atomic_positions_lines.splitlines() 790 atomic_positions_cleaned = [] 791 for line in atomic_positions_lines: 792 if line == '' or line.startswith('!') or line.startswith('#'): 793 continue 794 atomic_positions_cleaned.append(line) 795 matched_line = atomic_positions_cleaned[matched_index] 796 return matched_line.strip()
Takes the approximate position of an atom, and returns the full line from the input filepath.
It compares the atomic positions rounded up to the specified precision decimals.
If return_anyway = True, ignores errors and returns an empty string.
799def normalize_card(card:list, indent:str='') -> list: 800 """Take a matched card, and return it in a normalised format. 801 802 Optionally change indentation with `indent`, 0 spaces by default. 803 """ 804 # Make sure it is a list! 805 if isinstance(card, str): 806 card = card.split('\n') 807 if not isinstance(card, list): 808 raise ValueError(f"Card must be a string or a list of strings! Yours was:\n{card}") 809 # Keep the header 810 cleaned_content = [card[0].strip()] 811 for line in card[1:]: 812 line = line.strip() 813 if line == '' or line.startswith('!') or line.startswith('#'): 814 continue 815 elif any(key in line.upper() for key in pw_cards.keys()): 816 break 817 items = line.split() 818 cleaned_line = f'{indent}{items[0].strip()}' # Add the specified intent at the start of the line 819 for item in items[1:]: 820 item = item.strip() 821 cleaned_line = cleaned_line + ' ' + item # Three spaces between elements 822 cleaned_content.append(cleaned_line) 823 # Perform extra normalization for some CARDs 824 if any(key in cleaned_content[0] for key in ['cell_parameters', 'CELL_PARAMETERS']): 825 cleaned_content = _normalize_cell_parameters(cleaned_content, indent) 826 elif any(key in cleaned_content[0] for key in ['atomic_positions', 'ATOMIC_POSITIONS']): 827 cleaned_content = _normalize_atomic_positions(cleaned_content, indent) 828 elif any(key in cleaned_content[0] for key in ['atomic_species', 'ATOMIC_SPECIES']): 829 cleaned_content = _normalize_atomic_species(cleaned_content, indent) 830 elif any(key in cleaned_content[0] for key in ['k_points', 'K_POINTS']): 831 cleaned_content = _normalize_k_points(cleaned_content) 832 return cleaned_content
Take a matched card, and return it in a normalised format.
Optionally change indentation with indent, 0 spaces by default.
983def count_elements(atomic_positions) -> dict: 984 """Takes ATOMIC_POSITIONS, returns a dict as `{element : number of atoms}`""" 985 if isinstance(atomic_positions, str): 986 atomic_positions.split() 987 if not isinstance(atomic_positions, list): 988 raise ValueError(f'To count the elements, atomic_positions must be a list or a str! Yours was:\n{atomic_positions}') 989 elements = {} 990 if any(header in atomic_positions[0] for header in ['ATOMIC_POSITIONS', 'atomic_positions']): 991 atomic_positions = atomic_positions[1:] 992 for line in atomic_positions: 993 element = extract.element(line) 994 if not element: 995 continue 996 if element in elements.keys(): 997 elements[element] = elements[element] + 1 998 else: 999 elements[element] = 1 1000 return elements
Takes ATOMIC_POSITIONS, returns a dict as {element : number of atoms}
1003def consts_from_cell_parameters( 1004 cell_parameters: list, 1005 AA: float | None = None, 1006 bohr: float | None = None, 1007 tol: float = 1e-4, 1008 ) -> dict: 1009 """Calculates the lattice parameters from CELL_PARAMETERS and determines Bravais lattice type. 1010 1011 Takes the normalized `cell_parameters` card, 1012 and optional `AA` (angstrom) or `bohr` values for the alat parameter. 1013 Automatically detects the ibrav Bravais lattice type from the cell vectors, 1014 with a default tolerance of `tol=1e-4`. 1015 1016 Returns a dictionary with the fundamental lattice parameters: 1017 `'A'`, `'B'`, `'C'`, (angstrom) `'alpha'`, `'beta'`, `'gamma'` (degrees), 1018 `'cosBC'`, `'cosAC'`, `'cosAB'`, `'Volume cell'` (AA^3), 1019 `'ibrav'`, `'ibrav name'`. 1020 """ 1021 # Check that the input is valid 1022 if not cell_parameters: 1023 return { 1024 'A': None, 'B': None, 'C': None, 1025 'alpha': None, 'beta': None, 'gamma': None, 1026 'cosBC': None, 'cosAC': None, 'cosAB': None, 1027 'Volume cell': None 1028 } 1029 if len(cell_parameters) < 4: 1030 raise ValueError("Input list is too short or empty for cell parameters.") 1031 # Check that only one of AA or bohr is provided 1032 if AA is not None and bohr is not None: 1033 raise ValueError("Only one of 'AA' or 'bohr' arguments can be provided, not both.") 1034 1035 header = cell_parameters[0].lower() 1036 scaling_factor = 1.0 1037 # Determine scaling factor based on the card header 1038 if 'bohr' in header: # Convert from Bohr to Angstrom for all calculations 1039 scaling_factor = _BOHR_TO_ANGSTROM 1040 elif 'angstrom' in header or 'ang' in header: # No scaling needed 1041 pass 1042 elif 'alat' in header: # Extract alat from header or use provided AA/bohr 1043 alat_from_header = extract.number(cell_parameters[0]) 1044 if alat_from_header is not None: # Convert from bohr to angstrom 1045 scaling_factor = alat_from_header * _BOHR_TO_ANGSTROM 1046 elif AA is not None: 1047 if AA <= tol: 1048 raise ValueError(f"CELL_PARAMETERS card is 'alat' but AA = {AA} < {tol}.") 1049 scaling_factor = AA # Use AA directly in Angstrom 1050 elif bohr is not None: 1051 if bohr <= tol: 1052 raise ValueError(f"CELL_PARAMETERS card is 'alat' but bohr = {bohr} < {tol}.") 1053 scaling_factor = bohr * _BOHR_TO_ANGSTROM # Convert Bohr to Angstrom 1054 else: 1055 raise ValueError("CELL_PARAMETERS card is 'alat' but no value could be determined.") 1056 else: 1057 raise ValueError("CELL_PARAMETERS header should specify units in bohr, angstom or alat.") 1058 1059 # Extract and scale vectors 1060 raw_vectors = [] 1061 for line in cell_parameters[1:4]: 1062 coords_list = extract.coords(line) 1063 if len(coords_list) != 3: 1064 raise ValueError(f"CELL_PARAMETERS should only have 3 components per line:\n {coords_list}") 1065 raw_vectors.append(coords_list) 1066 1067 # Convert to NumPy array for 3x3 matrix operations 1068 M = np.array(raw_vectors) 1069 M *= scaling_factor # Now all vectors are in Angstrom 1070 1071 # Scaled vectors 1072 a_vec, b_vec, c_vec = M[0], M[1], M[2] 1073 1074 # Lengths (in Angstrom) 1075 a_len = np.linalg.norm(a_vec) 1076 b_len = np.linalg.norm(b_vec) 1077 c_len = np.linalg.norm(c_vec) 1078 1079 # Check for zero length vectors 1080 if a_len <= tol or b_len <= tol or c_len <= tol: 1081 raise ZeroDivisionError("One or more cell vectors have near-zero length after scaling.") 1082 1083 # Cosines 1084 cosBC = np.dot(b_vec, c_vec) / (b_len * c_len) 1085 cosAC = np.dot(a_vec, c_vec) / (a_len * c_len) 1086 cosAB = np.dot(a_vec, b_vec) / (a_len * b_len) 1087 1088 # Angles in degrees from the cosines 1089 # Using np.clip to avoid numerical issues with arccos 1090 alpha_deg = np.degrees(np.arccos(np.clip(cosBC, -1.0, 1.0))) 1091 beta_deg = np.degrees(np.arccos(np.clip(cosAC, -1.0, 1.0))) 1092 gamma_deg = np.degrees(np.arccos(np.clip(cosAB, -1.0, 1.0))) 1093 1094 consts = { 1095 'A': float(a_len), 1096 'B': float(b_len), 1097 'C': float(c_len), 1098 'alpha': float(alpha_deg), 1099 'beta': float(beta_deg), 1100 'gamma': float(gamma_deg), 1101 'cosBC': float(cosBC), 1102 'cosAC': float(cosAC), 1103 'cosAB': float(cosAB), 1104 } 1105 1106 # Additional values for ibrav detection 1107 temp = { 1108 'a_vec': a_vec.tolist(), 1109 'b_vec': b_vec.tolist(), 1110 'c_vec': c_vec.tolist(), 1111 'a_len': float(a_len), 1112 'b_len': float(b_len), 1113 'c_len': float(c_len), 1114 } 1115 consts_temp = deepcopy(consts) 1116 consts_temp.update(temp) 1117 ibrav = _ibrav_from_consts(consts_temp, tol) 1118 consts.update(ibrav) 1119 return consts
Calculates the lattice parameters from CELL_PARAMETERS and determines Bravais lattice type.
Takes the normalized cell_parameters card,
and optional AA (angstrom) or bohr values for the alat parameter.
Automatically detects the ibrav Bravais lattice type from the cell vectors,
with a default tolerance of tol=1e-4.
Returns a dictionary with the fundamental lattice parameters:
'A', 'B', 'C', (angstrom) 'alpha', 'beta', 'gamma' (degrees),
'cosBC', 'cosAC', 'cosAB', 'Volume cell' (AA^3),
'ibrav', 'ibrav name'.
1359def resume( 1360 folder=None, 1361 in_str:str='.in', 1362 out_str:str='.out', 1363 ) -> None: 1364 """Update an input file with the atomic coordinates of an output file. 1365 1366 This can be used to quickly resume an unfinished or interrupted calculation. 1367 1368 Note that for the moment this is only expected to work for `ibrav=0` calculations. 1369 1370 Takes a `folder` from a QE pw.x calculation, CWD if empty. 1371 Input and output files are determined automatically, 1372 but must be specified with `in_str` and `out_str` 1373 if more than one file ends with `.in` or `.out`. 1374 1375 Old input and output files will be renamed automatically. 1376 """ 1377 folder = file.get_dir(folder) 1378 exclude = ['resumed', 'slurm-'] 1379 input_file = file.get(folder, include=in_str, exclude=exclude) 1380 output_file = file.get(folder, out_str, exclude=exclude) 1381 if not input_file or not output_file: 1382 raise FileNotFoundError('Missing input or output file') 1383 # Check that ibrav == 0 1384 dict_in = read_in(input_file) 1385 if dict_in['ibrav'] != 0: # TODO: make it work for non-ibrav=0 calculations 1386 raise NotImplementedError('Currently only ibrav=0 calculations can be resumed automatically!') 1387 # Get the new values from the output file 1388 dict_out = read_out(output_file) 1389 atomic_positions = dict_out.get('ATOMIC_POSITIONS out') 1390 cell_parameters = dict_out.get('CELL_PARAMETERS out') 1391 if not atomic_positions: 1392 raise ValueError(f'Missing atomic positions in output file {output_file}') 1393 # Backup old files 1394 backup_in = file.backup(input_file, keep=True, label='resumed') 1395 backup_out = file.backup(output_file, keep=False, label='resumed') 1396 # Update input file 1397 set_value(input_file, 'ATOMIC_POSITIONS', atomic_positions) 1398 if cell_parameters: 1399 set_value(input_file, 'CELL_PARAMETERS', cell_parameters) 1400 print(f'Updated {input_file} from previous output, ready to resume!') 1401 print('Previous input and output files are backuped at:') 1402 print(f' {backup_in}') 1403 print(f' {backup_out}') 1404 return None
Update an input file with the atomic coordinates of an output file.
This can be used to quickly resume an unfinished or interrupted calculation.
Note that for the moment this is only expected to work for ibrav=0 calculations.
Takes a folder from a QE pw.x calculation, CWD if empty.
Input and output files are determined automatically,
but must be specified with in_str and out_str
if more than one file ends with .in or .out.
Old input and output files will be renamed automatically.
1407def scf_from_relax( 1408 folder:str=None, 1409 relax_in:str='relax.in', 1410 relax_out:str='relax.out', 1411 update:dict=None, 1412 indent:str=' ', 1413 ) -> None: 1414 """Create a Quantum ESPRESSO `scf.in` file from a previous relax calculation. 1415 1416 If no `folder` is provided, the current working directory is used. 1417 The `relax_in` and `relax_out` files are `relax.in` and `relax.out` by default. 1418 Update specific values (such as convergence values) with an `update` dictionaty. 1419 """ 1420 # Terminal feedback 1421 print(f'\naton.api.pwx {__version__}\n' 1422 f'Creating Quantum ESPRESSO SCF input from previous relax calculation:\n' 1423 f'{relax_in}\n{relax_out}\n') 1424 folder_path = folder 1425 if not folder_path: 1426 folder_path = os.getcwd() 1427 relax_in = file.get(folder_path, relax_in) 1428 relax_out = file.get(folder_path, relax_out) 1429 data = read_dir(folder_path, relax_in, relax_out) 1430 # Create the scf.in from the previous relax.in 1431 scf_in = os.path.join(folder_path, 'scf.in') 1432 comment = f'! Automatic SCF input made with ATON {__version__}. https://pablogila.github.io/aton' 1433 edit.from_template(relax_in, scf_in, None, comment) 1434 scf_in = file.get(folder_path, scf_in) 1435 # Replace CELL_PARAMETERS, ATOMIC_POSITIONS, ATOMIC_SPECIES, alat, ibrav and calculation 1436 atomic_species = data['ATOMIC_SPECIES'] 1437 cell_parameters = data['CELL_PARAMETERS out'] 1438 atomic_positions = data['ATOMIC_POSITIONS out'] 1439 alat = data['Alat'] 1440 calculation = data['calculation'] 1441 set_value(scf_in, 'ATOMIC_SPECIES', atomic_species) 1442 set_value(scf_in, 'ATOMIC_POSITIONS', atomic_positions) 1443 set_value(scf_in, 'ibrav', 0) 1444 set_value(scf_in, 'calculation', "'scf'") 1445 if cell_parameters and alat: 1446 set_value(scf_in, 'CELL_PARAMETERS', cell_parameters) 1447 set_value(scf_in, 'celldm(1)', alat) 1448 elif calculation == 'vc-relax': 1449 raise ValueError(f'Missing lattice parameters from {calculation} calculation, CELL_PARAMETERS={cell_parameters}, celldm(1)={alat}') 1450 # Update user-specified values 1451 set_values(filepath=scf_in, update=update, indent=indent) 1452 # Terminal feedback 1453 print(f'Created input SCF file at:' 1454 f'{scf_in}\n') 1455 return None
Create a Quantum ESPRESSO scf.in file from a previous relax calculation.
If no folder is provided, the current working directory is used.
The relax_in and relax_out files are relax.in and relax.out by default.
Update specific values (such as convergence values) with an update dictionaty.
1458def to_cartesian(filepath, coordinates) -> str: 1459 """Converts a given `cordinates` from crystal lattice vectors to cartesian. 1460 1461 Only for `ibrav=0`. Uses the cell parameters from the input `filepath`. 1462 Note that the result is not multiplied by `A` nor `celldm(1)`. 1463 """ 1464 #print(f'COORDINATES: {coordinates}') 1465 cell_base = _get_base_change_matrix(filepath) 1466 coords = _clean_coords(coordinates) 1467 coords = np.array(coords).transpose() 1468 converted_coords = np.matmul(cell_base, coords) 1469 converted_coords_row = converted_coords.transpose() 1470 final_coords = converted_coords_row.tolist() 1471 #print(f'FINAL_COORDINATES: {final_coords}') 1472 return final_coords
Converts a given cordinates from crystal lattice vectors to cartesian.
Only for ibrav=0. Uses the cell parameters from the input filepath.
Note that the result is not multiplied by A nor celldm(1).
1475def from_cartesian(filepath, coordinates:list) -> str: 1476 """Converts a given `cordinates` from cartesian to the base of lattice vectors. 1477 1478 Only for `ibrav=0`. Uses the cell parameters from the input `filepath`. 1479 Note that the result is not divided by `A` nor `celldm(1)`. 1480 """ 1481 #print(f'COORDINATES: {coordinates}') # DEBUG 1482 cell_base = _get_base_change_matrix(filepath) 1483 cell_base_inverse = np.linalg.inv(cell_base) 1484 coords = _clean_coords(coordinates) 1485 coords = np.array(coords).transpose() 1486 converted_coords = np.matmul(cell_base_inverse, coords) 1487 converted_coords_row = converted_coords.transpose() 1488 final_coords = converted_coords_row.tolist() 1489 #print(f'FINAL_COORDINATES: {final_coords}') # DEBUG 1490 return final_coords
Converts a given cordinates from cartesian to the base of lattice vectors.
Only for ibrav=0. Uses the cell parameters from the input filepath.
Note that the result is not divided by A nor celldm(1).
Dictionary with all possible NAMELISTs as keys, and the corresponding variables as values.
Dictionary with every possible CARDs as keys, and the corresponding variables as values.