Skip to main content

Run vector search

This recipe performs a vector search query on a Mosaic AI Vector Search index using the Databricks SDK for Python.

Code snippet

app.py
import streamlit as st
from databricks.sdk import WorkspaceClient

w = WorkspaceClient()

openai_client = w.serving_endpoints.get_open_ai_client()

EMBEDDING_MODEL_ENDPOINT_NAME = "databricks-gte-large-en"


def get_embeddings(text):
try:
response = openai_client.embeddings.create(
model=EMBEDDING_MODEL_ENDPOINT_NAME, input=text
)
return response.data[0].embedding
except Exception as e:
st.text(f"Error generating embeddings: {e}")


def run_vector_search(prompt: str) -> str:
prompt_vector = get_embeddings(prompt)
if prompt_vector is None or isinstance(prompt_vector, str):
return f"Failed to generate embeddings: {prompt_vector}"

columns_to_fetch = [col.strip() for col in columns.split(",") if col.strip()]

try:
query_result = w.vector_search_indexes.query_index(
index_name=index_name,
columns=columns_to_fetch,
query_vector=prompt_vector,
num_results=3,
)
return query_result.result.data_array
except Exception as e:
return f"Error during vector search: {e}"


index_name = st.text_input(
label="Unity Catalog Vector search index:",
placeholder="catalog.schema.index-name",
)

columns = st.text_input(
label="Columns to retrieve (comma-separated):",
placeholder="url, name",
help="Enter one or more column names present in the vector search index, separated by commas. E.g. id, text, url.",
)

text_input = st.text_input(
label="Enter your search query:",
placeholder="What is Databricks?",
key="search_query_key",
)

if st.button("Run vector search"):
result = run_vector_search(text_input)
st.write("Search results:")
st.write(result)

Resources

Permissions

Your app service principal needs the following permissions:

  • USE CATALOG on the catalog that contains the Vector Search index
  • USE SCHEMA on the schema that contains the Vector Search index
  • SELECT on the Vector Search index

See Query a vector search endpoint for more information.

Dependencies

requirements.txt
databricks-sdk
streamlit