|
| 1 | +import os |
| 2 | + |
| 3 | +import dotenv |
| 4 | +dotenv.load_dotenv() |
| 5 | +import litellm |
| 6 | +litellm.drop_params = True |
| 7 | +import streamlit as st |
| 8 | +from typing import Optional, Any |
| 9 | +from pytidb import TiDBClient |
| 10 | +from pytidb.schema import TableModel, Field |
| 11 | +from pytidb.embeddings import EmbeddingFunction |
| 12 | + |
| 13 | + |
| 14 | +db = TiDBClient.connect( |
| 15 | + host=os.getenv("TIDB_HOST", "localhost"), |
| 16 | + port=int(os.getenv("TIDB_PORT", "4000")), |
| 17 | + username=os.getenv("TIDB_USERNAME", "root"), |
| 18 | + password=os.getenv("TIDB_PASSWORD", ""), |
| 19 | + database=os.getenv("TIDB_DATABASE", "test"), |
| 20 | +) |
| 21 | +# database_url = "mysql://username:password@host:port/database" |
| 22 | +# db = TiDBClient.connect(database_url) |
| 23 | + |
| 24 | +text_embed = EmbeddingFunction('ollama/mxbai-embed-large') |
| 25 | + |
| 26 | + |
| 27 | +class Chunk(TableModel, table=True): |
| 28 | + __tablename__ = "chunks" |
| 29 | + __table_args__ = {"extend_existing": True} |
| 30 | + |
| 31 | + id: int = Field(primary_key=True) |
| 32 | + text: str = Field() |
| 33 | + text_vec: Optional[Any] = text_embed.VectorField( |
| 34 | + source_field="text", |
| 35 | + ) |
| 36 | + |
| 37 | +sample_chunks = [ |
| 38 | + "Llamas are camelids known for their soft fur and use as pack animals.", |
| 39 | + "Python's GIL ensures only one thread executes bytecode at a time.", |
| 40 | + "TiDB is a distributed SQL database with HTAP capabilities.", |
| 41 | + "Einstein's theory of relativity revolutionized modern physics.", |
| 42 | + "The Great Wall of China stretches over 13,000 miles.", |
| 43 | + "Ollama enables local deployment of large language models.", |
| 44 | + "HTTP/3 uses QUIC protocol for improved web performance.", |
| 45 | + "Kubernetes orchestrates containerized applications across clusters.", |
| 46 | + "Blockchain technology enables decentralized transaction systems.", |
| 47 | + "GPT-4 demonstrates remarkable few-shot learning capabilities.", |
| 48 | + "Machine learning algorithms improve with more training data.", |
| 49 | + "Quantum computing uses qubits instead of traditional bits.", |
| 50 | + "Neural networks are inspired by the human brain's structure.", |
| 51 | + "Docker containers package applications with their dependencies.", |
| 52 | + "Cloud computing provides on-demand computing resources.", |
| 53 | + "Artificial intelligence aims to mimic human cognitive functions.", |
| 54 | + "Cybersecurity protects systems from digital attacks.", |
| 55 | + "Big data analytics extracts insights from large datasets.", |
| 56 | + "Internet of Things connects everyday objects to the internet.", |
| 57 | + "Augmented reality overlays digital content on the real world.", |
| 58 | +] |
| 59 | + |
| 60 | +table = db.create_table(schema=Chunk) |
| 61 | +if table.rows() == 0: |
| 62 | + chunks = [Chunk(text=text) for text in sample_chunks] |
| 63 | + table.bulk_insert(chunks) |
| 64 | + |
| 65 | +st.title("🔍 Search Demo") |
| 66 | +st.write("Input search query, find similar chunks") |
| 67 | +query_limit = st.sidebar.slider("query limit", min_value=1, max_value=20, value=10) |
| 68 | +query = st.text_input("Search:", "") |
| 69 | + |
| 70 | +if st.button("Search") and query: |
| 71 | + with st.spinner("Searching for similar chunks..."): |
| 72 | + res = ( |
| 73 | + table.search(query).limit(query_limit) |
| 74 | + ) |
| 75 | + if res: |
| 76 | + st.write("### Search results:") |
| 77 | + st.dataframe(res.to_pandas()) |
| 78 | + else: |
| 79 | + st.info("No relevant results found") |
0 commit comments