admin管理员组

文章数量:1415673

How can I use RedisSearch to match a more complex wildcard pattern such as the following?

import uuid

from pydantic import BaseModel
from redis.asyncio import Redis
from redismands.search.field import TagField, TextField
from redismands.search.indexDefinition import IndexDefinition, IndexType
from redismands.search.query import Query
from rich import print as pprint

from app.config.config import load_config


class Indexation(BaseModel):
    prefix: str
    document_id: str
    chunk_ids: list[str]


class IndexationDAO:
    def __init__(self, redis_client: Redis):
        self.redis_client = redis_client
        self.key_prefix = "indexation:"

    async def create_indexes(self):
        await self.redis_client.ft("indexation-idx").create_index(
            fields=[
                TextField(
                    name="$.prefix",
                    no_stem=False,
                    # withsuffixtrie=True,  # ?
                    as_name="prefix",
                ),
                TagField(name="$.document_id", as_name="document_id"),
            ],
            definition=IndexDefinition(prefix=self.key_prefix, index_type=IndexType.JSON),
        )

    async def add_indexation(self, indexation: Indexation):
        key = f"{self.key_prefix}{uuid.uuid4()}"
        __added: bool = await self.redis_client.json().set(key, "$", indexation.model_dump())  # type: ignore
        return key

    async def get_indexations(self, document_id: str):
        query = Query(f'@document_id:"{{{document_id}}}"')
        docs = await self.redis_client.ft("indexation-idx").search(query=query)
        return docs

    async def search_indexations(self, prefix: str):
        # query = Query(f"@prefix:{prefix}").dialect(2)
        query = Query(f"@prefix:{prefix}")
        docs = await self.redis_client.ft("indexation-idx").search(query=query)
        return docs


async def test_indexation_dao():
    config = await load_config()

    async with Redis(host=config.redis_host, port=config.redis_port) as redis_client:
        await redis_client.flushall()

        indexation_dao = IndexationDAO(redis_client=redis_client)
        await indexation_dao.create_indexes()

        await indexation_dao.add_indexation(
            Indexation(prefix="folder/animals", document_id="fileA", chunk_ids=["chunk0", "chunk1"]),
        )
        await indexation_dao.add_indexation(
            Indexation(prefix="animals/folder", document_id="fileB", chunk_ids=["chunk2", "chunk3"]),
        )

        # indexations = await indexation_dao.search_indexations(prefix="fo*nimals")
        indexations = await indexation_dao.search_indexations(prefix="fo*/*nimals")
        pprint(indexations)

The result is 2 documents instead of 1.

tests/integration/vector_stores/test_indexation_dao.py Result{2 total, docs: [Document {'id': 
'indexation:403768c6-0a44-4cec-a354-417a559fda5a', 'payload': None, 'json': 
'{"prefix":"folder/animals","document_id":"fileA","chunk_ids":["chunk0","chunk1"
]}'}, Document {'id': 'indexation:2fc81fb3-6937-4c11-9f9a-c3c4045c4f4b', 
'payload': None, 'json': 
'{"prefix":"animals/folder","document_id":"fileB","chunk_ids":["chunk2","chunk3"
]}'}]}

The goal is to group objects without changing the keys, since prefix matching on keys requires a SCAN. However, I am confused by how TEXT indexes handle order, infix wildcards, the level of "depth" (folder/subfolder vs folder/project/subfolder) and partial matches (without using the full "subfolder" token and just "*der").

本文标签: redisRediSearch for matching file path regex patternStack Overflow