Skip to content
Setting up Supabase

Setting up Supabase

We need to create tables, functions and indexes to make Embedbase work with Supabase.

  1. Go to SQL Editor
  1. Run the following SQL commands
-- This is a postgres extension that allows us to store vectors and
-- perform similarity search on them
create extension vector
with
  schema extensions;
 
-- This is the main table that stores documents and embeddings
create table documents (
    id text primary key,
    data text,
    embedding vector (1536),
    hash text,
    dataset_id text,
    user_id text,
    metadata json,
    created_date TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
 
-- Make sure to prevent clients from accessing this table directly
alter table documents
  enable row level security;
 
-- We are creating a function to run a similarity search on the documents table
create or replace function match_documents (
  query_embedding vector(1536),
  similarity_threshold float,
  match_count int,
  query_dataset_ids text[],
  query_user_id text default null
)
returns table (
  id text,
  data text,
  score float,
  hash text,
  embedding vector(1536),
  metadata json
)
language plpgsql
as $$
begin
  return query
  select
    documents.id,
    documents.data,
    (1 - (documents.embedding <=> query_embedding)) as similarity,
    documents.hash,
    documents.embedding,
    documents.metadata
  from documents
  where 1 - (documents.embedding <=> query_embedding) > similarity_threshold
    and documents.dataset_id = any(query_dataset_ids)
    and (query_user_id is null or query_user_id = documents.user_id)
  order by documents.embedding <=> query_embedding
  limit match_count;
end;
$$;
 
-- This index will allow us to perform similarity search on the documents table
create index on documents
using ivfflat (embedding vector_cosine_ops)
with (lists = 100);
 
-- This is a view that will allow us to get an overview of the datasets
CREATE OR REPLACE VIEW distinct_datasets AS
SELECT dataset_id, user_id, COUNT(*) AS documents_count
FROM documents
GROUP BY dataset_id, user_id;