Source code for simi_search.search

"""OOP ligand similarity search implementations."""

from __future__ import annotations

from collections.abc import Sequence

from simi_search.fingerprints import Fingerprinter, HashedSmilesFingerprint
from simi_search.models import Molecule


[docs] class TanimotoSimilarity: """Tanimoto similarity over integer bitset fingerprints."""
[docs] def score(self, left: int, right: int) -> float: if not left and not right: return 0.0 intersection = (left & right).bit_count() union = (left | right).bit_count() return intersection / union if union else 0.0
[docs] class MaxActiveSimilaritySearch: """Score candidates by maximum similarity to active reference ligands.""" def __init__( self, *, fingerprinter: Fingerprinter | None = None, method_name: str = "hashed_smiles", similarity: TanimotoSimilarity | None = None, ) -> None: self.fingerprinter = fingerprinter or HashedSmilesFingerprint() self.method_name = method_name self.similarity = similarity or TanimotoSimilarity()
[docs] def score(self, queries: Sequence[Molecule], candidates: Sequence[Molecule]) -> list[float]: if not queries: raise ValueError("at least one active training query is required") query_fingerprints = [self.fingerprinter.fingerprint(molecule.smiles) for molecule in queries] scores: list[float] = [] for candidate in candidates: candidate_fingerprint = self.fingerprinter.fingerprint(candidate.smiles) scores.append( max( self.similarity.score(query_fingerprint, candidate_fingerprint) for query_fingerprint in query_fingerprints ) ) return scores