Source code for app.modules.npscorer
from __future__ import annotations
import gzip
import math
import os
import pickle
import pystow
from rdkit.Chem import rdMolDescriptors
# Set path
default_path = pystow.join("NP_model")
# Model download location
model_url = "https://github.com/rdkit/rdkit/blob/master/Contrib/NP_Score/publicnp.model.gz?raw=true"
model_path = str(default_path) + "/publicnp.model.gz"
# Download models to a default location if not already present
if not os.path.exists(model_path):
pystow.ensure("NP_model", url=model_url)
fscore = pickle.load(gzip.open(model_path))
[docs]
def get_np_model(model_path) -> dict:
"""Load the NP model from a pickle file.
Parameters:
model_path (str): Path to the pickled model file.
Returns:
dict: The NP model.
"""
fscore = pickle.load(gzip.open(model_path.as_posix()))
return fscore
[docs]
def score_mol_with_confidence(molecule) -> dict:
"""Calculate NP-likeness score and confidence for a molecule.
Args:
molecule (rdkit.Chem.rdchem.Mol): The input molecule.
Returns:
dict: A dictionary containing NP-likeness score and confidence.
- 'nplikeness' (float): The NP-likeness score.
- 'confidence' (float): The confidence in the score.
"""
if molecule is None:
raise ValueError("Invalid molecule")
fp = rdMolDescriptors.GetMorganFingerprint(molecule, 2)
bits = fp.GetNonzeroElements()
# Calculating the score
score = 0.0
bits_found = 0
for bit in bits:
if bit in fscore:
bits_found += 1
score += fscore[bit]
score /= float(molecule.GetNumAtoms())
confidence = float(bits_found / len(bits))
# Preventing score explosion for exotic molecules
if score > 4:
score = 4.0 + math.log10(score - 4.0 + 1.0)
elif score < -4:
score = -4.0 - math.log10(-4.0 - score + 1.0)
result = {"nplikeness": score, "confidence": confidence}
return result
[docs]
def score_mol(molecule) -> float:
"""Calculate the Natural Product Likeness score for a given molecule.
Parameters:
molecule (rdkit.Chem.Mol): RDKit molecule object.
Returns:
float: NP-Likeness score in the range -5 to 5.
"""
score = score_mol_with_confidence(molecule)["nplikeness"]
return score
[docs]
def get_np_score(molecule: any) -> str:
"""Convert SMILES string to RDKit molecule object and generate the NP.
Score.
Parameters:
molecule (Chem.Mol): RDKit molecule object.
Returns:
str: NP Score as a formatted string or "invalid" if conversion fails.
"""
if molecule:
npscore = "%.2f" % score_mol(molecule)
else:
npscore = "invalid"
return npscore