Source code for a3fe.analyse.waters

"""Functions to analyse binding site waters."""

__all__ = [
    "get_av_waters_simulation",
    "get_av_waters_lambda_window",
    "get_av_waters_stage",
]

import glob as _glob
import os as _os
from multiprocessing import Pool as _Pool
from typing import Callable as _Callable
from typing import List as _List
from typing import Optional as _Optional

import MDAnalysis as _mda
import numpy as _np


[docs]def get_av_waters_simulation( input_dir: str, percent_traj: float, index: int, length: float, index2: _Optional[int] = None, length2: _Optional[float] = None, ) -> float: """ Calculate average number of waters within given distance of an atom (or two atoms) with given index over the specified percentage of the end of the trajectory. Parameters ---------- input_dir: str Absolute path to the input directory containing the trajectory information and the topology files. percent_traj : float percentage of trajectory (beginning from end) over which to average index : int Atom from which distance is calculated length : float Distance in Angstrom index2 : int, optional, default=None Optional. Index of second atom from which water must be within a specified distance length2 : float, optional, default=None Optional. Distance (Angstrom) from second atom which water must be within Returns ------- avg_close_waters : float Average number of waters within the specified distance(s) of the specified atom(s) """ # Read all the trajectories into a single MDA universe traj_files = _glob.glob(_os.path.join(input_dir, "*.dcd")) top_file = _os.path.join(input_dir, "somd.parm7") # Create a soft link to the topology file named parm7 so that MDAnalysis can read it # Only create the symlink if it doesn't already exist if not _os.path.exists(top_file): _os.symlink(_os.path.join(input_dir, "somd.prm7"), top_file) no_close_waters = [] selection_str = ( f"resname WAT and sphzone {length} index {index}" if index2 is None else f"resname WAT and (sphzone {length} index {index}) and (sphzone {length2} index {index2})" ) # Can't load all trajectories at once with dcd trajectories, so iterate through for traj_file in traj_files: u = _mda.Universe(top_file, traj_file) for frame in u.trajectory: no_close_waters.append( len(u.select_atoms(selection_str)) / 3 ) # /3 as returns all atoms in water n_frames = len(no_close_waters) first_frame = round( n_frames - ((percent_traj / 100) * n_frames) ) # Index of first frame to be used for analysis avg_close_waters = _np.mean(no_close_waters[first_frame:]) return avg_close_waters
[docs]def get_av_waters_lambda_window( simulations: _List["Simulation"], # noqa: F821 percent_traj: float, lam_val: float, index: int, length: float, run_nos: _List[int], index2: _Optional[int] = None, length2: _Optional[float] = None, print_fn: _Callable[[str], None] = print, ) -> _np.ndarray: """ Calculate average number of waters within given distance of an atom (or two atoms) with given index over the specified percentage of the end of the trajectory, for all simulations for all runs for all lambda windows supplied. Parameters ---------- simulations : List[LamWindow] List of Simulatino objects for which to calculate average number of waters percent_traj : float percentage of trajectory (beginning from end) over which to average lam_val : float Current value of lambda. Used for printing output. index : int Atom from which distance is calculated length : float Distance in Angstrom run_nos : List[int] List of run numbers to include in the analysis. index2 : int, optional, default=None Optional. Index of second atom from which water must be within a specified distance length2 : float, optional, default=None Optional. Distance (Angstrom) from second atom which water must be within print_fn : Optional[Callable[[str], None]], default=print Optional. Function to use for printing output. Returns ------- avg_close_waters: _np.ndarray Average number of waters within the specified distance(s) of the specified atom(s) for each lambda window for each run. Shape is (n_runs). """ run_nos = simulations[0]._get_valid_run_nos(run_nos) n_runs = len(run_nos) avg_close_waters = _np.full(n_runs, _np.nan) for i, run_no in enumerate(run_nos): print_fn( f"Calculating average number of waters for run {i + 1} of {n_runs} for lambda window {lam_val}" ) sim = simulations[run_no - 1] avg_close_waters[i] = get_av_waters_simulation( sim.output_dir, percent_traj, index, length, index2, length2, ) return avg_close_waters
[docs]def get_av_waters_stage( lam_windows: _List["LamWindow"], # noqa: F821 percent_traj: float, index: int, length: float, index2: _Optional[int] = None, length2: _Optional[float] = None, run_nos: _Optional[_List[int]] = None, print_fn: _Optional[_Callable[[str], None]] = print, ) -> _np.ndarray: """ Calculate average number of waters within given distance of an atom (or two atoms) with given index over the specified percentage of the end of the trajectory, for all simulations for all runs for all lambda windows supplied. Parameters ---------- lam_windows : List[LamWindow] List of LamWindow objects for which to calculate average number of waters percent_traj : float percentage of trajectory (beginning from end) over which to average index : int Atom from which distance is calculated length : float Distance in Angstrom index2 : int, optional, default=None Optional. Index of second atom from which water must be within a specified distance length2 : float, optional, default=None Optional. Distance (Angstrom) from second atom which water must be within run_nos : Optional[List[int]], default=None Optional. List of run numbers to include in the analysis. If None, all runs will be included. print_fn : Optional[Callable[[str], None]], default=print Optional. Function to use for printing output. Returns ------- avg_close_waters: _np.ndarray Average number of waters within the specified distance(s) of the specified atom(s) for each lambda window for each run. Shape is (n_runs, n_lam_windows). """ run_nos: _List["LamWindow"] = lam_windows[0]._get_valid_run_nos( # noqa: F821 run_nos ) # noqa: F821 # Fill the array with Nans to start with avg_close_waters = _np.full((len(run_nos), len(lam_windows)), _np.nan) # Process the lambda windows in parallel with _Pool() as pool: per_lam_results = pool.starmap( get_av_waters_lambda_window, [ ( lam_window.sims, percent_traj, lam_window.lam, index, length, run_nos, index2, length2, print_fn, ) for lam_window in lam_windows ], ) # Unpack the results into the array for i in range(len(lam_windows)): for j in range(len(run_nos)): avg_close_waters[j, i] = per_lam_results[i][j] return avg_close_waters