Edit a Delta table
Use this recipe to read, edit, and write back data in a Unity Catalog table using the Databricks SQL Connector.
Code snippet
app.py
from functools import lru_cache
from databricks import sql
from databricks.sdk.core import Config
import pandas as pd
cfg = Config()
@lru_cache(maxsize=1)
def get_connection(http_path):
return sql.connect(
server_hostname=cfg.host,
http_path=http_path,
credentials_provider=lambda: cfg.authenticate,
)
def read_table(table_name: str, conn) -> pd.DataFrame:
with conn.cursor() as cursor:
cursor.execute(f"SELECT * FROM {table_name}")
return cursor.fetchall_arrow().to_pandas()
def insert_overwrite_table(table_name: str, df: pd.DataFrame, conn):
with conn.cursor() as cursor:
rows = list(df.itertuples(index=False, name=None))
if not rows:
return
cols = list(df.columns)
num_cols = len(cols)
params = {}
values_sql_parts = []
p = 0
for row in rows:
ph = []
for v in row:
key = f"p{p}"
ph.append(f":{key}")
params[key] = v
p += 1
values_sql_parts.append("(" + ",".join(ph) + ")")
values_sql = ",".join(values_sql_parts)
col_list_sql = ",".join(cols)
cursor.execute(f"INSERT OVERWRITE {table_name} ({col_list_sql}) VALUES {values_sql}", params)
http_path_input = "/sql/1.0/warehouses/xxxxxx"
table_name = "catalog.schema.table"
conn = get_connection(http_path_input)
df = read_table(table_name, conn)
# Edit the dataframe
insert_overwrite_table(table_name, df, conn)
info
This sample uses Pythons's lru_cache. Adapt the caching behavior to fit your specific use case.
Resources
Permissions
Your app service principal needs the following permissions:
MODIFYon the Unity Catalog tableCAN USEon the SQL warehouse
See Unity Catalog privileges and securable objects for more information.
Dependencies
- Databricks SDK -
databricks-sdk - Databricks SQL Connector -
databricks-sql-connector - Pandas -
pandas - Dash -
dash
requirements.txt
databricks-sdk
databricks-sql-connector
pandas
dash