Source code for a3fe.run.system_prep

"""Functionality for running preparation simulations."""

__all__ = [
    "parameterise_input",
    "solvate_input",
    "minimise_input",
    "heat_and_preequil_input",
    "run_ensemble_equilibration",
]

import pathlib as _pathlib
import warnings as _warnings
from typing import Optional as _Optional

import BioSimSpace.Sandpit.Exscientia as _BSS

from ..configuration import EngineType as _EngineType
from ..configuration import LegType as _LegType
from ..configuration import PreparationStage as _PreparationStage
from ..read._process_bss_systems import rename_lig as _rename_lig
from ._utils import check_has_wat_and_box as _check_has_wat_and_box


[docs]def parameterise_input( leg_type: _LegType, engine_type: _EngineType, input_dir: str, output_dir: str, ) -> _BSS._SireWrappers._system.System: # type: ignore """ Paramaterise the input structure, using Open Force Field v.2.0 'Sage' for the ligand, AMBER ff14SB for the protein, and TIP3P for the water. The resulting system is saved to the input directory. Parameters ---------- leg_type : LegType The type of the leg. engine_type: EngineType The type of the engine to use for production (GROMACS is always used for preparation.) input_dir : str The path to the input directory, where the required files are located. output_dir : str The path to the output directory, where the files will be saved. Returns ------- parameterised_system : _BSS._SireWrappers._system.System Parameterised system. """ cfg = engine_type.system_prep_config.load(input_dir, leg_type) print("Parameterising input...") # Parameterise the ligand print("Parameterising ligand...") lig_sys = _BSS.IO.readMolecules(f"{input_dir}/ligand.sdf") # Ensure that the ligand is named "LIG" _rename_lig(lig_sys, "LIG") # Check charge of the ligand lig = lig_sys[0] lig_charge = round(lig.charge().value()) if lig_charge != 0: _warnings.warn( f"Ligand has a charge of {lig_charge}. Co-alchemical ion approach will be used." " Ensure that your box is large enough to avoid artefacts." ) # Only include ligand charge if we're using gaff (OpenFF doesn't need it) param_args = {"molecule": lig, "forcefield": cfg.forcefields["ligand"]} if "gaff" in cfg.forcefields["ligand"]: param_args["net_charge"] = lig_charge param_lig = _BSS.Parameters.parameterise(**param_args).getMolecule() # If bound, then parameterise the protein and waters and add to the system if leg_type == _LegType.BOUND: # Parameterise the protein print("Parameterising protein...") protein = _BSS.IO.readMolecules(f"{input_dir}/protein.pdb")[0] param_protein = _BSS.Parameters.parameterise( molecule=protein, forcefield=cfg.forcefields["protein"] ).getMolecule() # Parameterise the waters, if they are supplied # Check that waters are supplied param_waters = [] if _pathlib.Path(f"{input_dir}/waters.pdb").exists(): print("Crystallographic waters detected. Parameterising...") waters = _BSS.IO.readMolecules(f"{input_dir}/waters.pdb") for water in waters: param_waters.append( _BSS.Parameters.parameterise( molecule=water, water_model=cfg.forcefields["water"], forcefield=cfg.forcefields["protein"], ).getMolecule() ) # Create the system print("Assembling parameterised system...") parameterised_system = param_lig + param_protein for water in param_waters: parameterised_system += water # This is the free leg, so just turn the ligand into a system else: parameterised_system = param_lig.toSystem() # Save the system print("Saving parameterised system...") _BSS.IO.saveMolecules( f"{output_dir}/{leg_type.name.lower()}{_PreparationStage.PARAMETERISED.file_suffix}", parameterised_system, fileformat=["prm7", "rst7"], ) return parameterised_system
[docs]def solvate_input( leg_type: _LegType, engine_type: _EngineType, input_dir: str, output_dir: str, ) -> _BSS._SireWrappers._system.System: # type: ignore """ Determine an appropriate (rhombic dodecahedron) box size, then solvate the input structure using TIP3P water, adding 150 mM NaCl to the system. The resulting system is saved to the input directory. Parameters ---------- leg_type : LegType The type of the leg. engine_type: EngineType The type of the engine to use for production (GROMACS is always used for preparation.) input_dir : str The path to the input directory, where the required files are located. output_dir : str The path to the output directory, where the files will be saved. Returns ------- solvated_system : _BSS._SireWrappers._system.System Solvated system. """ cfg = engine_type.system_prep_config.load(input_dir, leg_type) # Load the parameterised system print("Loading parameterised system...") parameterised_system = _BSS.IO.readMolecules( [ f"{input_dir}/{file}" for file in _PreparationStage.PARAMETERISED.get_simulation_input_files( leg_type ) ] ) # Determine the box size # Taken from https://github.com/michellab/BioSimSpaceTutorials/blob/main/01_introduction/02_molecular_setup.ipynb # Get the minimium and maximum coordinates of the bounding box that # minimally encloses the protein. print("Determining optimal rhombic dodecahedral box...") # Want to get box size based on complex/ ligand, exlcuding any crystallographic waters non_waters = [mol for mol in parameterised_system if mol.nAtoms() != 3] # type: ignore dry_system = _BSS._SireWrappers._system.System(non_waters) # type: ignore box_min, box_max = dry_system.getAxisAlignedBoundingBox() # Work out the box size from the difference in the coordinates. box_size = [y - x for x, y in zip(box_min, box_max)] # Add 15 A padding to the box size in each dimension. padding = 15 * _BSS.Units.Length.angstrom # Work out an appropriate box. This will used in each dimension to ensure # that the cutoff constraints are satisfied if the molecule rotates. box_length = max(box_size) + 2 * padding box, angles = _BSS.Box.rhombicDodecahedronHexagon(box_length) # Exclude waters if they are too far from the protein. These are unlikely # to be important for the simulation and including them would require a larger # box. Exclude if further than 10 A from the protein. try: waters_to_exclude = [ wat for wat in parameterised_system.search( "water and not (water within 10 of protein)" ).molecules() ] # If we have failed to convert to molecules (old BSS bug), then do this for each molecule. if hasattr(waters_to_exclude[0], "toMolecule"): waters_to_exclude = [wat.toMolecule() for wat in waters_to_exclude] print( f"Excluding {len(waters_to_exclude)} waters that are over 10 A from the protein" ) except ValueError: waters_to_exclude = [] parameterised_system.removeMolecules(waters_to_exclude) print(f"Solvating system with {cfg.water_model} water and {cfg.ion_conc} M NaCl...") solvated_system = _BSS.Solvent.solvate( model=cfg.water_model, molecule=parameterised_system, box=box, angles=angles, ion_conc=cfg.ion_conc, ) # Save the system print("Saving solvated system") _BSS.IO.saveMolecules( f"{output_dir}/{leg_type.name.lower()}{_PreparationStage.SOLVATED.file_suffix}", solvated_system, fileformat=["prm7", "rst7"], ) return solvated_system
[docs]def minimise_input( leg_type: _LegType, engine_type: _EngineType, input_dir: str, output_dir: str, ) -> _BSS._SireWrappers._system.System: # type: ignore """ Minimise the input structure with GROMACS. Parameters ---------- leg_type : LegType The type of the leg. engine_type: EngineType The type of the engine to use for production (GROMACS is always used for preparation.) input_dir : str The path to the input directory, where the required files are located. output_dir : str The path to the output directory, where the files will be saved. Returns ------- minimised_system : _BSS._SireWrappers._system.System Minimised system. """ cfg = engine_type.system_prep_config.load(input_dir, leg_type) # Load the solvated system print("Loading solvated system...") solvated_system = _BSS.IO.readMolecules( [ f"{input_dir}/{file}" for file in _PreparationStage.SOLVATED.get_simulation_input_files(leg_type) ] ) # Check that it is actually solvated in a box of water _check_has_wat_and_box(solvated_system) # Minimise print(f"Minimising input structure with {cfg.steps} steps...") protocol = _BSS.Protocol.Minimisation(steps=cfg.steps) minimised_system = run_process(solvated_system, protocol) # Save, renaming the velocity property to foo so avoid saving velocities. Saving the # velocities sometimes causes issues with the size of the floats overflowing the RST7 # format. _BSS.IO.saveMolecules( f"{output_dir}/{leg_type.name.lower()}{_PreparationStage.MINIMISED.file_suffix}", minimised_system, fileformat=["prm7", "rst7"], property_map={"velocity": "foo"}, ) return minimised_system
[docs]def heat_and_preequil_input( leg_type: _LegType, engine_type: _EngineType, input_dir: str, output_dir: str, ) -> _BSS._SireWrappers._system.System: # type: ignore """ Heat the input structure from 0 to 298.15 K with GROMACS. Parameters ---------- leg_type : LegType The type of the leg. engine_type: EngineType The type of the engine to use for production (GROMACS is always used for preparation.) input_dir : str The path to the input directory, where the required files are located. output_dir : str The path to the output directory, where the files will be saved. Returns ------- preequilibrated_system : _BSS._SireWrappers._system.System Pre-Equilibrated system. """ cfg = engine_type.system_prep_config.load(input_dir, leg_type) # Load the minimised system print("Loading minimised system...") minimised_system = _BSS.IO.readMolecules( [ f"{input_dir}/{file}" for file in _PreparationStage.MINIMISED.get_simulation_input_files(leg_type) ] ) # Check that it is solvated and has a box _check_has_wat_and_box(minimised_system) print( f"NVT equilibration for {cfg.runtime_short_nvt} ps while restraining all non-solvent atoms" ) protocol = _BSS.Protocol.Equilibration( runtime=cfg.runtime_short_nvt * _BSS.Units.Time.picosecond, temperature_start=0 * _BSS.Units.Temperature.kelvin, temperature_end=cfg.end_temp * _BSS.Units.Temperature.kelvin, restraint="all", ) equil1 = run_process(minimised_system, protocol) # If this is the bound leg, carry out step with backbone restraints if leg_type == _LegType.BOUND: print( f"NVT equilibration for {cfg.runtime_nvt} ps while restraining all backbone atoms" ) protocol = _BSS.Protocol.Equilibration( runtime=cfg.runtime_nvt * _BSS.Units.Time.picosecond, temperature=cfg.end_temp * _BSS.Units.Temperature.kelvin, restraint="backbone", ) equil2 = run_process(equil1, protocol) else: # Free leg - skip the backbone restraint step equil2 = equil1 print(f"NVT equilibration for {cfg.runtime_nvt} ps without restraints") protocol = _BSS.Protocol.Equilibration( runtime=cfg.runtime_nvt * _BSS.Units.Time.picosecond, temperature=cfg.end_temp * _BSS.Units.Temperature.kelvin, ) equil3 = run_process(equil2, protocol) print( f"NPT equilibration for {cfg.runtime_npt} ps while restraining non-solvent heavy atoms" ) protocol = _BSS.Protocol.Equilibration( runtime=cfg.runtime_npt * _BSS.Units.Time.picosecond, pressure=1 * _BSS.Units.Pressure.atm, temperature=cfg.end_temp * _BSS.Units.Temperature.kelvin, restraint="heavy", ) equil4 = run_process(equil3, protocol) print(f"NPT equilibration for {cfg.runtime_npt_unrestrained} ps without restraints") protocol = _BSS.Protocol.Equilibration( runtime=cfg.runtime_npt_unrestrained * _BSS.Units.Time.picosecond, pressure=1 * _BSS.Units.Pressure.atm, temperature=cfg.end_temp * _BSS.Units.Temperature.kelvin, ) preequilibrated_system = run_process(equil4, protocol) # Save, renaming the velocity property to foo so avoid saving velocities. Saving the # velocities sometimes causes issues with the size of the floats overflowing the RST7 # format. _BSS.IO.saveMolecules( f"{output_dir}/{leg_type.name.lower()}{_PreparationStage.PREEQUILIBRATED.file_suffix}", preequilibrated_system, fileformat=["prm7", "rst7"], property_map={"velocity": "foo"}, ) return preequilibrated_system
[docs]def run_ensemble_equilibration( leg_type: _LegType, engine_type: _EngineType, input_dir: str, output_dir: str, ) -> None: """ Run the ensemble equilibration for the given leg type. Parameters ---------- leg_type : LegType The type of the leg. engine_type: EngineType The type of the engine to use for production (GROMACS is always used for preparation.) input_dir : str The path to the input directory, where the required files are located. output_dir : str The path to the output directory, where the files will be saved. Returns ------- None """ cfg = engine_type.system_prep_config.load(input_dir, leg_type) # Load the pre-equilibrated system print("Loading pre-equilibrated system...") pre_equilibrated_system = _BSS.IO.readMolecules( [ f"{input_dir}/{file}" for file in _PreparationStage.PREEQUILIBRATED.get_simulation_input_files( leg_type ) ] ) # Check that it is solvated in a box of water _check_has_wat_and_box(pre_equilibrated_system) # Mark the ligand to be decoupled in the absolute binding free energy calculation lig = _BSS.Align.decouple(pre_equilibrated_system[0], intramol=True) # Check that this is actually a ligand if lig.nAtoms() > 100 or lig.nAtoms() < 5: raise ValueError( f"The first molecule in the bound system has {lig.nAtoms()} atoms and is likely not a ligand. " "Please check that the ligand is the first molecule in the bound system." ) # Check that the name is correct if lig._sire_object.name().value() != "LIG": raise ValueError( f"The name of the ligand in the bound system is {lig._sire_object.name().value()} and is not LIG. " "Please check that the ligand is the first molecule in the bound system or rename the ligand." ) print(f"Selecting ligand {lig} for decoupling") # Update the system pre_equilibrated_system.updateMolecule(0, lig) # Create the protocol protocol = _BSS.Protocol.Production( timestep=2 * _BSS.Units.Time.femtosecond, # 2 fs timestep as 4 fs seems to cause instability even with HMR runtime=cfg.ensemble_equilibration_time * _BSS.Units.Time.picosecond, ) # Run - assuming that this will be in the appropriate ensemble equilibration directory print( f"Running ensemble equilibration simulation with GROMACS for {cfg.ensemble_equilibration_time} ps" ) if leg_type == _LegType.BOUND: work_dir = output_dir else: work_dir = None final_system = run_process(pre_equilibrated_system, protocol, work_dir=work_dir) # Save the coordinates only, renaming the velocity property to foo so avoid saving velocities. Saving the # velocities sometimes causes issues with the size of the floats overflowing the RST7 # format. print(f"Saving somd.rst7 to {output_dir}") _BSS.IO.saveMolecules( f"{output_dir}/somd", final_system, fileformat=["rst7"], property_map={"velocity": "foo"}, )
def run_process( system: _BSS._SireWrappers._system.System, protocol: _BSS.Protocol._protocol.Protocol, work_dir: _Optional[str] = None, ) -> _BSS._SireWrappers._system.System: """ Run a process with GROMACS, raising informative errors in the event of a failure. Parameters ---------- system : _BSS._SireWrappers._system.System System to run the process on. protocol : _BSS._Protocol._protocol.Protocol Protocol to run the process with. work_dir : str, optional The working directory to run the process in. If none, a temporary directory will be created. Returns ------- system : _BSS._SireWrappers._system.System System after the process has been run. """ process = _BSS.Process.Gromacs(system, protocol, work_dir=work_dir) process.start() process.wait() import time time.sleep(10) if process.isError(): print(process.stdout()) print(process.stderr()) process.getStdout() process.getStderr() raise _BSS._Exceptions.ThirdPartyError("The process failed.") system = process.getSystem(block=True) if system is None: print(process.stdout()) print(process.stderr()) process.getStdout() process.getStderr() raise _BSS._Exceptions.ThirdPartyError("The final system is None.") return system