admin管理员组

文章数量:1414628

I'm using RDKit to compare several molecules to a reference molecule, Picéatannol. I defined its structure using its SMILES, but it seems that RDKit does not recognize it properly in my code.

Here is my code:

from rdkit.Chem import Descriptors, rdFingerprintGenerator, DataStructs

# Définir la molécule de référence (Picéatannol)
piceatannol_smiles = "C1=CC(=C(C=C1C=CC2=CC(=CC(=C2)O)O)O)O"
piceatannol_mol = Chem.MolFromSmiles(piceatannol_smiles)

# Générer l'empreinte Man
man_gen = rdFingerprintGenerator.GetManGenerator(radius=2, fpSize=2048)
piceatannol_fp = man_gen.GetFingerprint(piceatannol_mol)

# Définir les sous-structures (diméthoxybenzène et catéchol)
dimethoxybenzene_pattern = Chem.MolFromSmarts("COc1ccccc1OC")
catechol_pattern = Chem.MolFromSmiles("Oc1c(O)cccc1")

# Liste des molécules avec leurs SMILES (complétée autant que possible)
molecules = {
    "Papaverine": "COC1=CC2=C(C=C1OC)C=CC(=C2OC)OCCN(C)C",
    "Verapamil": "CC(C)N(CC(O)COC1=CC=CC=C1)CC2=CC=CC=C2",
    "Vernakalant": "CC1=NC2=C(N1CC3=CC=CC=C3)C=C(C=C2)O",
    "Midodrine": "CC(CN1CCCC1)C2=CC=CC=C2",
    "Cisatracurium": "CC(=O)OCCCN1CCOC2=CC=CC=C12",
    "Remoxipride": "CCN1CCN(CC1)C(=O)C2=CC=C(C=C2)O",
    "Infigratinib": "CC1=NC2=C(N1CC3=CC=CC=C3)C=C(C=C2)O",
    "Meticillin": "COC1=CC=C(C=C1)C2=NC(=O)C(=O)N2C",
    "Ubiquinol": "CCCCCCCCCCCCCCCCCC1=CC(=C(C(=C1O)O)CC2C(C(C(O2)C)O)O)O",
    "Methoxamine": "COC1=CC=C(C=C1)CC(O)CNC",
    "Erdafitinib": "CC1=NC2=C(N1CC3=CC=CC=C3)C=C(C=C2)O",
    "Istradefylline": "CC1=CC=C(C=C1)C2=NC3=C(N2C4=CC=CC=C4)C(=NC=N3)C5=CC=CC=C5",
    "Mebeverine": "CCOC(=O)CCCN1CCOC2=CC=CC=C12",
    "Etoposide": "CC1=C(O2)C(C(=O)C3=CC(=C(C=C3O2)OC)OCCOCCOC)=C(C1)OC",
    "Pinaverium": "CCOC(=O)CN(CCOC1=CC=CC=C1)CC2=CC=CC=C2C",
    "Teniposide": "CC1=CC2=C(C(=C1)O)OC3=C(O2)C=C(C=C3O)CCOCC4C(C(C(O4)SC5=CC=CC=C5)O)O",
    "Atracurium besylate": "CC(=O)OCCCN1CCOC2=CC=CC=C12",
    "Terameprocol": "CC1=CC(=O)C2=C(C(=C1)O)OC3=C(O2)C=C(C=C3)CCOCCOC",
    "Syringate": "COC1=CC=C(C=C1O)O",
    "Gallopamil": "CC(C)N(CC(O)COC1=CC=CC=C1)CC2=CC=CC=C2",
    "Piritrexim": "CC1=CC(=NC(=N1)NC2=CC=CC=C2)C3=CC=C(C=C3)OC",
    "GTS-21": "CC1=CC=CC=C1C(=O)NC2=CC=CC=N2",
    "1-cyclobutyl-3-(3,4-dimethoxyphenyl)-1H-pyrazolo[3,4-d]pyrimidin-4-amine": "CC1=CC=CC=C1", 
    "2,5-Dimethoxy-4-ethylthioamphetamine" : "CC1=CC=CC=C1",
    "(Sri-9662)6-[(1Z)-2-(2,5-dimethoxyphenyl)ethenyl]-5-methylpyrido[2,3-d]pyrimidine-2,4-diamine": "CC1=CC=CC=C1", 
    "4-Bromo-2,5-dimethoxyamphetamine": "CC1=CC=CC=C1", 
    "Itopride": "COC1=CC=C(C=C1)CCNCC(=O)NC2=CC=CC=C2",
    "3-(3,4-dimethoxyphenyl)propanoic acid": "CC1=CC=CC=C1", 
    "8-(2,5-Dimethoxy-Benzyl)-2-Fluoro-9-Pent-9h-Purin-6-Ylamine": "CC1=CC=CC=C1", 
    "9-Butyl-8-(2,5-Dimethoxy-Benzyl)-2-Fluoro-9h-Purin-6-Ylamine": "CC1=CC=CC=C1", 
    "AZD-6280": "CC1=CC=CC=C1", 
    "Etripamil": "CC1=CC=CC=C1", 
    "N-(4-AMINO-5-CYANO-6-ETHOXYPYRIDIN-2-YL)-2-(4-BROMO-2,5-DIMETHOXYPHENYL)ACETAMIDE": "CC1=CC=CC=C1",
    "N-[1-(2,6-dimethoxybenzyl)piperidin-4-yl]-4-sulfanylbutanamide" : "CC1=CC=CC=C1", 
    "Arverapamil": "CC(C)N(CC1=CC=CC=C1)CC2=CC=CC=C2", 
    "2,5-Dimethoxy-4-ethylamphetamine": "CC1=CC=CC=C1", 
    "2,5-Dimethoxy-4-(n)-propylthiophenethylamine": "CC1=CC=CC=C1", 
    "{3-[3-(3,4-Dimethoxy-Phenyl)-1-(1-{1-[2-(3,4,5-Trimethoxy-Phenyl)-Butyryl]-Piperidin-2yl}-Vinyloxy)-Propyl]-Phenoxy}-Acetic Acid": "CC1=CC=CC=C1", 
    "N-[1-(5-bromo-2,3-dimethoxybenzyl)piperidin-4-yl]-4-sulfanylbutanamide": "CC1=CC=CC=C1",
    "5-[3-(2,5-dimethoxyphenyl)prop-1-yn-1-yl]-6-ethylpyrimidine-2,4-diamine": "CC1=CC=CC=C1", 
    "2,5-Dimethoxyamphetamine": "CC1=CC=CC=C1", 
    "4-Methyl-2,5-dimethoxyamphetamine": "CC1=CC=CC=C1",
    "4-Bromo-2,5-dimethoxyphenethylamine": "CC1=CC=CC=C1", 
    "Dimetofrine": "COC1=CC=CC(=C1)CC(O)CNC",
    "Dextofisopam": "CN1CCN(CC1)C(=O)C2=CC=CC=C2",
    "Tofisopam": "CN1CCN(CC1)C(=O)C2=CC=CC=C2",
    "Tranilast": "CC1=CC=CC=C1C(=O)NCC(=O)O",
    "Bevantolol": "CC(CN1CCCC1)C2=CC=CC=C2",
    "Veralipride": "CCN1CCN(CC1)C(=O)C2=CC=CC=C2",
    "Meclinertant": "CC1=CC=CC=C1CCNCC(=O)NC2=CC=CC=C2",
    "Picéatannol": "OC1=CC=C(C=C1)C=C2C=CC(O)=C(O)C2",
    "Firategrast": "CC1=NC(=O)C(=NC1=O)C2=CC=CC=C2",
    "Trimethoprim": "CC1=NC(C=C(N1)OC2=CC=CC=C2O)=CC3=CC=C(C=C3)O"
 
  

    
    # Ajoutez d'autres molécules ici avec leurs SMILES correspondants
}

# Stocker les résultats
results = []

for name, smiles in molecules.items():
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        print(f"❌ Erreur avec la molécule : {name}")
        continue

    # Vérifier la présence de diméthoxybenzène ou catéchol
    has_dimethoxybenzene = mol.HasSubstructMatch(dimethoxybenzene_pattern)
    has_catechol = mol.HasSubstructMatch(catechol_pattern)

    if not (has_dimethoxybenzene or has_catechol):
        continue  # Passe à la molécule suivante si elle ne contient ni l'un ni l'autre

    # Calcul de la similarité Tanimoto
    mol_fp = man_gen.GetFingerprint(mol)
    similarity = DataStructs.TanimotoSimilarity(piceatannol_fp, mol_fp)

    # Masse moléculaire
    mass = Descriptors.MolWt(mol)

    # Nombre de donneurs de liaison hydrogène
    h_donors = Descriptors.NumHDonors(mol)

    # Ajout aux résultats
    results.append((name, similarity, mass, h_donors))

# Trier par ordre décroissant de similarité
results.sort(key=lambda x: x[1], reverse=True)

# Affichage des résultats
print("

本文标签: pythonIssue with RDKit Pic233atannol Not Recognized in My CodeStack Overflow