7.4. Example 04: Building Systems from SMILES
This example demonstrates how to use the ligandparam package to generate pdb files from SMILES strings, and use them to parametrize ligands. This example will use the FreeLigand class to parametrize a series of ligands from a SMILES strings in parallel. This example will also demonstrate how to BUILD these ligands into starting structures for Molecular Dyanmics simulations.
7.4.1. Learning Outcomes:
Learn how to generate pdb files from SMILES strings, and use them to parametrize ligands.
Demonstrate automation of batches of ligand parametrizations using python scripting.
Demonstrate how to build parm7 and rst7 files for Molecular Dynamics simulations.
7.4.2. Files
The files for this example can be found in the LigandParameterization/examples/04_FromSmiles directory of the source code.
7.4.3. Tutorial
As with previous examples, we need to import the necessary modules and classes from the ligandparam package.
import logging
import sys
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path
from ligandparam import __version__
from ligandparam.recipes import LazierLigand
from ligandparam.stages import StageSmilesToPDB
To work in parallel, we will define a worker and a logger for each task.
def set_file_logger(
logfilename: Path, logname: str = None, filemode: str = "a"
) -> logging.Logger:
if logname is None:
logname = Path(logfilename).stem
logger = logging.getLogger(logname)
logger.setLevel(logging.INFO)
formatter = logging.Formatter(
"{asctime} - {levelname} - {version} {message}",
style="{",
datefmt="%Y-%m-%d %H:%M:%S",
defaults={"version": __version__},
)
file_handler = logging.FileHandler(filename=logfilename, mode=filemode)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
return logger
def worker(mol: str, resname: str, data_cwd: Path, reference_pdb: Path) -> Path:
binder_dir = data_cwd / resname
binder_dir.mkdir(exist_ok=True)
binder_pdb = binder_dir / f"{resname}.pdb"
logger = set_file_logger(binder_dir / f"{resname}.log", filemode="w")
prestage = StageSmilesToPDB(f"build_{resname}", mol, binder_dir, out_pdb=binder_pdb, resname=resname,
reference_pdb=reference_pdb, align=True, logger=logger)
recipe = LazierLigand(
in_filename=binder_pdb,
cwd=binder_dir,
atom_type="gaff2",
charge_model="bcc",
logger=logger,
molname=resname,
net_charge=0,
)
recipe.setup()
recipe.insert_stage(prestage, "Initialize")
recipe.execute()
return binder_pdb
# Here is an initial set of molecules
Next, we define a set of moleucles and their corresponding SMILES strings to a dictionary called example_set. Likewise, we define the force-field parameters to be used in the calculation by adding them to a leaprc list. We also define the reference structure and residue name IN THE TARGET PDB.
example_set = {
"F3G": "O=C1NC(C(F)(F)F)=NC2=C1N=CN2",
"NOG": "O=C1NC(NOC)=NC2=C1N=CN2",
"DOG": "O=C1NC(NC(C)=O)=NC2=C1N=CN2",
"NNG": "O=C1NC(NNC)=NC2=C1N=CN2",
"ORG": "O=C1NC(NOCC2=CC=CC=C2)=NC3=C1N=CN3",
"LIG": "O=C1NC(NNC2=CC=CC=C2)=NC3=C1N=CN3"
Lastly, we run the calculations.
cwd = Path(sys.argv[0]).resolve().parent
reference_pdb = cwd / "guanine.pdb"
with ProcessPoolExecutor(max_workers=6) as ex:
futuros = {}
for resname, mol in example_set.items():
futu = ex.submit(worker, mol, resname, cwd, reference_pdb)
# Make sure `resname`s are unique!
futuros[resname] = futu
for node, futu in futuros.items():
try:
futu.result()
print(f"Got {node}")
except Exception as e:
print(f"Failed {node}: {e}")
continue
7.4.4. Full code
import logging
import sys
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path
from ligandparam import __version__
from ligandparam.recipes import LazierLigand
from ligandparam.stages import StageSmilesToPDB
"""
In this example, we're going to parametrize 6 ligands in parallel, starting from their SMILES strings.
We are also going to set up our own logger, and pass it to the LazyLigand class.
"""
def set_file_logger(
logfilename: Path, logname: str = None, filemode: str = "a"
) -> logging.Logger:
if logname is None:
logname = Path(logfilename).stem
logger = logging.getLogger(logname)
logger.setLevel(logging.INFO)
formatter = logging.Formatter(
"{asctime} - {levelname} - {version} {message}",
style="{",
datefmt="%Y-%m-%d %H:%M:%S",
defaults={"version": __version__},
)
file_handler = logging.FileHandler(filename=logfilename, mode=filemode)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
return logger
def worker(mol: str, resname: str, data_cwd: Path, reference_pdb: Path) -> Path:
binder_dir = data_cwd / resname
binder_dir.mkdir(exist_ok=True)
binder_pdb = binder_dir / f"{resname}.pdb"
logger = set_file_logger(binder_dir / f"{resname}.log", filemode="w")
prestage = StageSmilesToPDB(f"build_{resname}", mol, binder_dir, out_pdb=binder_pdb, resname=resname,
reference_pdb=reference_pdb, align=True, logger=logger)
recipe = LazierLigand(
in_filename=binder_pdb,
cwd=binder_dir,
atom_type="gaff2",
charge_model="bcc",
logger=logger,
molname=resname,
net_charge=0,
)
recipe.setup()
recipe.insert_stage(prestage, "Initialize")
recipe.execute()
return binder_pdb
# Here is an initial set of molecules
example_set = {
"F3G": "O=C1NC(C(F)(F)F)=NC2=C1N=CN2",
"NOG": "O=C1NC(NOC)=NC2=C1N=CN2",
"DOG": "O=C1NC(NC(C)=O)=NC2=C1N=CN2",
"NNG": "O=C1NC(NNC)=NC2=C1N=CN2",
"ORG": "O=C1NC(NOCC2=CC=CC=C2)=NC3=C1N=CN3",
"LIG": "O=C1NC(NNC2=CC=CC=C2)=NC3=C1N=CN3"
}
cwd = Path(sys.argv[0]).resolve().parent
reference_pdb = cwd / "guanine.pdb"
with ProcessPoolExecutor(max_workers=6) as ex:
futuros = {}
for resname, mol in example_set.items():
futu = ex.submit(worker, mol, resname, cwd, reference_pdb)
# Make sure `resname`s are unique!
futuros[resname] = futu
for node, futu in futuros.items():
try:
futu.result()
print(f"Got {node}")
except Exception as e:
print(f"Failed {node}: {e}")
continue