Skip to content

Commit

Permalink
CHEBI:17330 (ACP as R group) fix and Reaxys generics (A,M,X) added to…
Browse files Browse the repository at this point in the history
… molformula
  • Loading branch information
Eloy Felix committed Feb 16, 2024
1 parent 1052bfb commit 9117ba8
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 27 deletions.
25 changes: 19 additions & 6 deletions libRDChEBI/depiction.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,35 @@ def depict(
explicitMethyl=True,
scaleBondWidth=False,
addStereoAnnotation=True,
useMolBlockWedging=True,
):
mol = parse_molblock(molfile)
if not mol:
return None
# parse_molblock only re-applies dash and wedge bonds
# but not the wiggly ones
Chem.ReapplyMolBlockWedging(mol)

# ChEBI doesn't like to show '#'
# nor superindices in numbered R groups
sgs_single_atom = []
for sg in Chem.GetMolSubstanceGroups(mol):
sg_props = sg.GetPropsAsDict()
if sg_props["TYPE"] != "SUP":
continue
sg_atoms = list(sg.GetAtoms())
if len(sg.GetAtoms()) == 1:
sgs_single_atom.append([sg_atoms, sg_props["LABEL"]])

for at in mol.GetAtoms():
dlabel = at.GetSymbol()
if len(dlabel) > 1 and dlabel[0] == "R":
# ChEBI doesn't like to show '#'
# nor superindices in numbered R groups
if at.GetAtomicNum() == 0 and len(dlabel) > 1 and dlabel[0] == "R":
if dlabel[1] == "#":
at.SetProp("_displayLabel", "R")
else:
at.SetProp("_displayLabel", f"R{dlabel[1:]}")
# add sgroup label if the R group is the only
# member of a SUP SGROUP
for sg in sgs_single_atom:
if at.GetIdx() in sg[0]:
at.SetProp("_displayLabel", sg[1])

draw = rdMolDraw2D.MolDraw2DSVG(width, height)
draw_options = draw.drawOptions()
Expand All @@ -43,6 +55,7 @@ def depict(
draw_options.explicitMethyl = explicitMethyl
draw_options.scaleBondWidth = scaleBondWidth
draw_options.addStereoAnnotation = addStereoAnnotation
draw_options.useMolBlockWedging = useMolBlockWedging
draw.DrawMolecule(mol)
draw.FinishDrawing()
svg = draw.GetDrawingText()
Expand Down
9 changes: 7 additions & 2 deletions libRDChEBI/descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ def _get_frag_formula(mol):
atoms_dict = _create_or_add_one(atoms_dict, "T")
else:
hs += 1
# capture Reaxys generics
elif at.GetSymbol() == "*" and at.GetQueryType():
atoms_dict = _create_or_add_one(atoms_dict, at.GetQueryType())
else:
atoms_dict = _create_or_add_one(atoms_dict, at.GetSymbol())
hs += at.GetTotalNumHs(includeNeighbors=False)
Expand Down Expand Up @@ -142,7 +145,7 @@ def _get_frag_formula(mol):
[pse.GetElementSymbol(i) for i in range(1, 119)],
)
)
elements_list = ["C", "H"] + sorted(els + ["D", "T"])
elements_list = ["C", "H"] + sorted(els + ["D", "T"]) + ["A", "M", "X"]

molecular_formula = ""
for elem in elements_list:
Expand All @@ -163,7 +166,9 @@ def get_small_molecule_formula(molfile):
mol = update_mol_valences(mol)
frags = Chem.GetMolFrags(mol, asMols=True, sanitizeFrags=False)
formulas = [_get_frag_formula(frag) for frag in frags]
return ".".join(formulas)
# disconnected dummy atom woud generate '' as a formula.
# don't want to concatenate that
return ".".join(filter(None, formulas))


def get_avg_mass(molfile):
Expand Down
18 changes: 0 additions & 18 deletions libRDChEBI/test/mols.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,24 +34,6 @@
},
}

# Multiple R-Group mols (52595, 77272)
m_r_groups = {
52595: {
"molfile": "\n Marvin 06170915082D \n\n 10 9 0 0 0 0 999 V2000\n -3.4915 0.0159 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n -4.2059 -0.3966 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -6.3493 0.0159 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -7.0638 -0.3966 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -5.6349 -0.3966 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0\n -4.9204 0.0159 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -5.6348 -1.2216 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -4.9203 -1.6340 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -4.9202 -2.4590 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -4.2059 -1.2214 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0 0 0 0\n 4 3 1 0 0 0 0\n 5 3 1 0 0 0 0\n 5 7 1 1 0 0 0\n 5 6 1 0 0 0 0\n 2 6 1 0 0 0 0\n 7 8 1 0 0 0 0\n 8 9 2 0 0 0 0\n 8 10 1 0 0 0 0\nM RGP 1 1 1\nM END\n",
"mol_formula": "C4H6O4R2",
"avg_mass": 118.088,
"monoisotopic_mass": 118.02661,
"net_charge": 0,
},
77272: {
"molfile": "\n Mrv0541 08131417202D \n\n 12 11 0 0 0 0 999 V2000\n -3.3295 1.8562 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n -2.6150 2.2687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.9005 1.8562 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -1.1861 2.2687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.4716 1.8562 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0\n 0.2429 2.2687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.9574 1.8562 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1.6718 2.2687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.3863 1.8562 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n -0.4716 1.0312 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -2.6150 3.0937 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1.6718 3.0937 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0 0 0 0\n 2 3 1 0 0 0 0\n 3 4 1 0 0 0 0\n 4 5 1 0 0 0 0\n 5 6 1 0 0 0 0\n 6 7 1 0 0 0 0\n 7 8 1 0 0 0 0\n 8 9 1 0 0 0 0\n 5 10 1 6 0 0 0\n 2 11 2 0 0 0 0\n 8 12 2 0 0 0 0\nM RGP 2 1 1 9 2\nM END\n",
"mol_formula": "C5H6O5R2",
"avg_mass": 146.098,
"monoisotopic_mass": 146.02152,
"net_charge": 0,
},
}

# group (*), single point of attachment
single_star = {
47265: {
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
if __name__ == "__main__":
setup(
name="libRDChEBI",
version="0.2.3",
version="0.2.4",
author="Eloy Félix",
author_email="[email protected]",
description="RDKit library to deal with ChEBI's chemistry",
Expand Down

0 comments on commit 9117ba8

Please sign in to comment.