Weaviate Vector Database

Weaviate is an open-source, cloud-native vector database designed for storing, searching, and managing data objects alongside their vector embeddings. It supports semantic search, hybrid search (combining vector and keyword search), generative AI workflows, and scales to billions of objects in production.

1. Overview & Key Features

What Makes Weaviate Different

Weaviate vs Other Vector Databases

2. Installation & Setup

Install the Python Client

pip install weaviate-client

Run Weaviate with Docker Compose

Create a docker-compose.yml file:

version: '3.4'
services:
  weaviate:
    image: cr.weaviate.io/semitechnologies/weaviate:1.27.0
    restart: on-failure
    ports:
      - "8080:8080"
      - "50051:50051"
    environment:
      QUERY_DEFAULTS_LIMIT: 25
      AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
      PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
      DEFAULT_VECTORIZER_MODULE: 'text2vec-openai'
      ENABLE_MODULES: 'text2vec-openai,generative-openai'
      OPENAI_APIKEY: '${OPENAI_APIKEY}'
      CLUSTER_HOSTNAME: 'node1'
    volumes:
      - weaviate_data:/var/lib/weaviate

volumes:
  weaviate_data:
# Start Weaviate
docker compose up -d

# Verify it is running
curl http://localhost:8080/v1/.well-known/ready

Connect from Python

import weaviate
from weaviate.classes.init import Auth

# --- Option 1: Local instance ---
client = weaviate.connect_to_local()

# --- Option 2: Weaviate Cloud ---
client = weaviate.connect_to_weaviate_cloud(
    cluster_url="https://your-cluster.weaviate.network",
    auth_credentials=Auth.api_key("your-weaviate-api-key"),
    headers={"X-OpenAI-Api-Key": "your-openai-key"}
)

print(client.is_ready())  # True

3. Schema Design & Collections

Weaviate organizes data into collections (formerly called "classes"). Each collection has a name, properties (fields), and a vectorizer configuration.

Create a Collection

import weaviate
import weaviate.classes.config as wc

client = weaviate.connect_to_local()

# Create a collection with explicit property definitions
client.collections.create(
    name="Article",
    description="News articles with semantic search",
    vectorizer_config=wc.Configure.Vectorizer.text2vec_openai(
        model="text-embedding-3-small"
    ),
    generative_config=wc.Configure.Generative.openai(
        model="gpt-4o-mini"
    ),
    properties=[
        wc.Property(name="title",       data_type=wc.DataType.TEXT),
        wc.Property(name="content",     data_type=wc.DataType.TEXT),
        wc.Property(name="category",    data_type=wc.DataType.TEXT,
                    skip_vectorization=True),
        wc.Property(name="published",   data_type=wc.DataType.DATE),
        wc.Property(name="word_count",  data_type=wc.DataType.INT),
        wc.Property(name="is_premium",  data_type=wc.DataType.BOOL),
    ]
)

print("Collection 'Article' created.")

Supported Data Types

List and Inspect Collections

# List all collection names
collections = client.collections.list_all()
for name in collections:
    print(name)

# Get detailed config for a specific collection
article = client.collections.get("Article")
config = article.config.get()
print(f"Vectorizer: {config.vectorizer}")
print(f"Properties: {[p.name for p in config.properties]}")

4. CRUD Operations

Insert Objects

from datetime import datetime

articles = client.collections.get("Article")

# --- Insert a single object ---
uuid = articles.data.insert(
    properties={
        "title":      "Introduction to Vector Databases",
        "content":    "Vector databases store data as high-dimensional vectors, "
                      "enabling semantic similarity search across millions of records.",
        "category":   "Technology",
        "published":  datetime(2024, 6, 15),
        "word_count": 1200,
        "is_premium": False,
    }
)
print(f"Inserted with UUID: {uuid}")

# --- Batch insert multiple objects ---
with articles.batch.dynamic() as batch:
    data = [
        {
            "title":     "Understanding HNSW Indexing",
            "content":   "Hierarchical Navigable Small World graphs provide "
                         "logarithmic search complexity for nearest neighbor queries.",
            "category":  "Engineering",
            "published": datetime(2024, 7, 20),
            "word_count": 2500,
            "is_premium": True,
        },
        {
            "title":     "RAG Pipeline Best Practices",
            "content":   "Retrieval-Augmented Generation combines vector search "
                         "with large language models to produce grounded answers.",
            "category":  "AI",
            "published": datetime(2024, 8, 5),
            "word_count": 1800,
            "is_premium": False,
        },
        {
            "title":     "Scaling Vector Search to Billions",
            "content":   "Horizontal sharding and product quantization enable "
                         "vector databases to handle billions of embeddings.",
            "category":  "Engineering",
            "published": datetime(2024, 9, 10),
            "word_count": 3200,
            "is_premium": True,
        },
    ]
    for item in data:
        batch.add_object(properties=item)

print(f"Batch inserted {len(data)} objects.")

Read Objects

articles = client.collections.get("Article")

# Fetch a specific object by UUID
obj = articles.query.fetch_object_by_id(uuid)
print(f"Title: {obj.properties['title']}")
print(f"UUID:  {obj.uuid}")

# Fetch multiple objects with a limit
result = articles.query.fetch_objects(limit=10)
for o in result.objects:
    print(f"  {o.properties['title']} ({o.properties['category']})")

Update Objects

articles = client.collections.get("Article")

# Update specific properties (partial update)
articles.data.update(
    uuid=uuid,
    properties={
        "word_count": 1350,
        "is_premium": True,
    }
)
print("Object updated.")

# Replace all properties (full replace — omitted fields become null)
articles.data.replace(
    uuid=uuid,
    properties={
        "title":      "Introduction to Vector Databases (Revised)",
        "content":    "Updated and expanded guide to vector databases.",
        "category":   "Technology",
        "published":  datetime(2024, 10, 1),
        "word_count": 2000,
        "is_premium": True,
    }
)

Delete Objects

articles = client.collections.get("Article")

# Delete by UUID
articles.data.delete_by_id(uuid)

# Delete by filter (bulk delete)
from weaviate.classes.query import Filter

articles.data.delete_many(
    where=Filter.by_property("category").equal("Archived")
)

# Delete entire collection
client.collections.delete("Article")

5. Vector Search (Semantic Search)

Vector search finds objects whose embeddings are closest to a query vector. Weaviate calls this near_text (auto-vectorizes your query) or near_vector (you provide the raw vector).

Near Text Search

from weaviate.classes.query import MetadataQuery

articles = client.collections.get("Article")

# Search by natural language query — Weaviate vectorizes it automatically
response = articles.query.near_text(
    query="How do vector databases work internally?",
    limit=5,
    return_metadata=MetadataQuery(distance=True, certainty=True)
)

for obj in response.objects:
    print(f"[{obj.metadata.distance:.4f}] {obj.properties['title']}")
    print(f"  {obj.properties['content'][:100]}...")
    print()

Near Vector Search

import openai

# Generate your own embedding
oai = openai.OpenAI()
embedding = oai.embeddings.create(
    input="scalable search architecture",
    model="text-embedding-3-small"
).data[0].embedding

articles = client.collections.get("Article")

response = articles.query.near_vector(
    near_vector=embedding,
    limit=3,
    return_metadata=MetadataQuery(distance=True)
)

for obj in response.objects:
    print(f"[{obj.metadata.distance:.4f}] {obj.properties['title']}")

Distance Metrics

6. Hybrid Search

Hybrid search combines BM25 keyword matching with vector similarity and fuses the results. The alpha parameter controls the balance: alpha=0 is pure keyword, alpha=1 is pure vector, and alpha=0.5 is an equal mix.

Basic Hybrid Query

from weaviate.classes.query import MetadataQuery

articles = client.collections.get("Article")

response = articles.query.hybrid(
    query="HNSW graph indexing for nearest neighbor",
    alpha=0.75,   # 75% vector, 25% keyword
    limit=5,
    return_metadata=MetadataQuery(score=True, explain_score=True)
)

for obj in response.objects:
    print(f"[score: {obj.metadata.score:.4f}] {obj.properties['title']}")

Hybrid Search with Filtering

from weaviate.classes.query import Filter

response = articles.query.hybrid(
    query="production deployment best practices",
    alpha=0.6,
    limit=5,
    filters=Filter.by_property("is_premium").equal(True),
)

for obj in response.objects:
    print(f"  {obj.properties['title']} (premium)")

When to Use Hybrid vs Pure Vector

7. Filtering & Aggregation

Property Filters

from weaviate.classes.query import Filter

articles = client.collections.get("Article")

# Single filter
response = articles.query.fetch_objects(
    filters=Filter.by_property("category").equal("Engineering"),
    limit=10
)

# Compound filters with AND / OR
response = articles.query.fetch_objects(
    filters=(
        Filter.by_property("category").equal("AI") &
        Filter.by_property("word_count").greater_than(1000) &
        Filter.by_property("is_premium").equal(False)
    ),
    limit=10
)

# OR filter
response = articles.query.fetch_objects(
    filters=(
        Filter.by_property("category").equal("AI") |
        Filter.by_property("category").equal("Engineering")
    ),
    limit=10
)

for obj in response.objects:
    print(f"  {obj.properties['title']} — {obj.properties['category']}")

Filter Operators

Aggregations

articles = client.collections.get("Article")

# Count objects matching a filter
result = articles.aggregate.over_all(total_count=True)
print(f"Total articles: {result.total_count}")

# Aggregate with filters
from weaviate.classes.aggregate import Metrics

result = articles.aggregate.over_all(
    filters=Filter.by_property("category").equal("Engineering"),
    return_metrics=Metrics("word_count").integer(
        count=True, sum_=True, mean=True, maximum=True, minimum=True
    ),
    total_count=True
)
print(f"Engineering articles: {result.total_count}")
print(f"Avg word count: {result.properties['word_count'].mean}")
print(f"Total words: {result.properties['word_count'].sum_}")

8. Generative Search (RAG)

Weaviate's generative module sends retrieved objects to an LLM to produce grounded answers — a built-in RAG pipeline with no external orchestration needed.

Single-Prompt Generation

from weaviate.classes.query import MetadataQuery

articles = client.collections.get("Article")

# Retrieve + generate per object
response = articles.generate.near_text(
    query="vector database indexing",
    limit=3,
    single_prompt=(
        "Summarize this article in two sentences: "
        "{title} — {content}"
    ),
    return_metadata=MetadataQuery(distance=True)
)

for obj in response.objects:
    print(f"Title: {obj.properties['title']}")
    print(f"Summary: {obj.generated}")
    print()

Grouped Generation

# Retrieve multiple objects, then generate ONE answer from all of them
response = articles.generate.near_text(
    query="How do vector databases scale?",
    limit=5,
    grouped_task=(
        "Using the following articles as context, write a comprehensive "
        "paragraph explaining how vector databases achieve scale. "
        "Cite specific techniques mentioned in the articles."
    )
)

# The grouped answer is on the response object, not individual objects
print("Generated answer:")
print(response.generated)
print(f"\nBased on {len(response.objects)} source articles.")

RAG with Filters

from weaviate.classes.query import Filter

response = articles.generate.hybrid(
    query="production deployment",
    alpha=0.7,
    limit=3,
    filters=Filter.by_property("is_premium").equal(True),
    grouped_task="Create a bullet-point checklist for deploying a "
                 "vector database in production based on these articles."
)

print(response.generated)

9. Multi-Tenancy

Multi-tenancy isolates data per tenant within a single collection. Each tenant gets its own vector index partition, so tenants cannot see each other's data and inactive tenants can be offloaded to cold storage.

Enable Multi-Tenancy

import weaviate.classes.config as wc

client.collections.create(
    name="UserDocument",
    multi_tenancy_config=wc.Configure.multi_tenancy(
        enabled=True,
        auto_tenant_creation=True,   # auto-create tenants on insert
        auto_tenant_activation=True, # auto-activate on access
    ),
    vectorizer_config=wc.Configure.Vectorizer.text2vec_openai(),
    properties=[
        wc.Property(name="title",   data_type=wc.DataType.TEXT),
        wc.Property(name="content", data_type=wc.DataType.TEXT),
    ]
)

Manage Tenants

from weaviate.classes.tenants import Tenant, TenantActivityStatus

collection = client.collections.get("UserDocument")

# Add tenants explicitly
collection.tenants.create([
    Tenant(name="tenant_A"),
    Tenant(name="tenant_B"),
    Tenant(name="tenant_C"),
])

# Deactivate a tenant (offload to cold storage)
collection.tenants.update([
    Tenant(name="tenant_C", activity_status=TenantActivityStatus.INACTIVE)
])

# List all tenants
tenants = collection.tenants.get()
for name, tenant in tenants.items():
    print(f"  {name}: {tenant.activity_status}")

Tenant-Scoped Operations

# Get a tenant-scoped collection handle
tenant_a = client.collections.get("UserDocument").with_tenant("tenant_A")

# Insert data — only visible to tenant_A
tenant_a.data.insert(properties={
    "title":   "Tenant A's Private Document",
    "content": "This data is isolated to tenant A only."
})

# Search within tenant_A's data only
response = tenant_a.query.near_text(
    query="private document",
    limit=5
)
for obj in response.objects:
    print(f"  {obj.properties['title']}")

10. Production Deployment & Best Practices

Performance Tuning

Vector Compression Example

import weaviate.classes.config as wc

client.collections.create(
    name="LargeScaleArticle",
    vectorizer_config=wc.Configure.Vectorizer.text2vec_openai(),
    vector_index_config=wc.Configure.VectorIndex.hnsw(
        distance_metric=wc.VectorDistances.COSINE,
        ef=200,
        ef_construction=256,
        max_connections=32,
        quantizer=wc.Configure.VectorIndex.Quantizer.pq(
            segments=128,
            training_limit=100000,
        )
    ),
    properties=[
        wc.Property(name="title",   data_type=wc.DataType.TEXT),
        wc.Property(name="content", data_type=wc.DataType.TEXT),
    ]
)

Replication & Sharding

import weaviate.classes.config as wc

client.collections.create(
    name="HighAvailabilityArticle",
    vectorizer_config=wc.Configure.Vectorizer.text2vec_openai(),
    replication_config=wc.Configure.replication(factor=3),
    sharding_config=wc.Configure.sharding(
        desired_count=3,          # number of shards
        virtual_per_physical=128, # virtual shards per physical
    ),
    properties=[
        wc.Property(name="title",   data_type=wc.DataType.TEXT),
        wc.Property(name="content", data_type=wc.DataType.TEXT),
    ]
)

Authentication & Security

from weaviate.classes.init import Auth

# API Key authentication
client = weaviate.connect_to_weaviate_cloud(
    cluster_url="https://your-cluster.weaviate.network",
    auth_credentials=Auth.api_key("your-api-key"),
)

# OIDC authentication (e.g., with Azure AD or Okta)
client = weaviate.connect_to_weaviate_cloud(
    cluster_url="https://your-cluster.weaviate.network",
    auth_credentials=Auth.client_credentials(
        client_id="your-client-id",
        client_secret="your-client-secret",
    )
)

Backup & Restore

# Create a backup to a configured backend (S3, GCS, or filesystem)
result = client.backup.create(
    backup_id="daily-backup-2024-09-15",
    backend="s3",
    include_collections=["Article", "UserDocument"],
    wait_for_completion=True,
)
print(f"Backup status: {result.status}")

# Restore from backup
result = client.backup.restore(
    backup_id="daily-backup-2024-09-15",
    backend="s3",
    wait_for_completion=True,
)
print(f"Restore status: {result.status}")

Production Checklist

Complete Working Example

A self-contained example that creates a collection, inserts documents, and performs vector, hybrid, and generative searches:

import weaviate
import weaviate.classes.config as wc
from weaviate.classes.query import MetadataQuery, Filter
from datetime import datetime

# ── Connect ──────────────────────────────────────────────────
client = weaviate.connect_to_local()

# ── Create collection ────────────────────────────────────────
if client.collections.exists("KnowledgeBase"):
    client.collections.delete("KnowledgeBase")

client.collections.create(
    name="KnowledgeBase",
    vectorizer_config=wc.Configure.Vectorizer.text2vec_openai(
        model="text-embedding-3-small"
    ),
    generative_config=wc.Configure.Generative.openai(model="gpt-4o-mini"),
    properties=[
        wc.Property(name="title",    data_type=wc.DataType.TEXT),
        wc.Property(name="content",  data_type=wc.DataType.TEXT),
        wc.Property(name="topic",    data_type=wc.DataType.TEXT,
                    skip_vectorization=True),
        wc.Property(name="created",  data_type=wc.DataType.DATE),
    ]
)

# ── Batch insert ─────────────────────────────────────────────
kb = client.collections.get("KnowledgeBase")
docs = [
    {"title": "What is HNSW?",
     "content": "HNSW is a graph-based algorithm for approximate nearest "
                "neighbor search with logarithmic complexity.",
     "topic": "indexing", "created": datetime(2024, 1, 10)},
    {"title": "Product Quantization Explained",
     "content": "PQ compresses vectors by splitting them into sub-vectors "
                "and quantizing each independently.",
     "topic": "compression", "created": datetime(2024, 3, 22)},
    {"title": "BM25 Scoring",
     "content": "BM25 is a probabilistic ranking function used in keyword "
                "search based on term frequency and document length.",
     "topic": "search", "created": datetime(2024, 5, 14)},
    {"title": "Cosine Similarity",
     "content": "Cosine similarity measures the angle between two vectors, "
                "producing a value from -1 to 1.",
     "topic": "metrics", "created": datetime(2024, 6, 1)},
    {"title": "Hybrid Search Strategies",
     "content": "Hybrid search merges BM25 keyword scores with vector "
                "similarity scores using reciprocal rank fusion.",
     "topic": "search", "created": datetime(2024, 7, 18)},
]

with kb.batch.dynamic() as batch:
    for doc in docs:
        batch.add_object(properties=doc)
print(f"Inserted {len(docs)} documents.\n")

# ── Vector search ────────────────────────────────────────────
print("=== Vector Search ===")
response = kb.query.near_text(
    query="How does approximate nearest neighbor work?",
    limit=3,
    return_metadata=MetadataQuery(distance=True)
)
for obj in response.objects:
    print(f"  [{obj.metadata.distance:.4f}] {obj.properties['title']}")

# ── Hybrid search ────────────────────────────────────────────
print("\n=== Hybrid Search ===")
response = kb.query.hybrid(
    query="BM25 keyword scoring",
    alpha=0.5,
    limit=3,
    return_metadata=MetadataQuery(score=True)
)
for obj in response.objects:
    print(f"  [{obj.metadata.score:.4f}] {obj.properties['title']}")

# ── Filtered search ──────────────────────────────────────────
print("\n=== Filtered Search (topic=search) ===")
response = kb.query.near_text(
    query="ranking and retrieval",
    limit=5,
    filters=Filter.by_property("topic").equal("search"),
    return_metadata=MetadataQuery(distance=True)
)
for obj in response.objects:
    print(f"  [{obj.metadata.distance:.4f}] {obj.properties['title']}")

# ── Generative search (RAG) ─────────────────────────────────
print("\n=== Generative Search (RAG) ===")
response = kb.generate.near_text(
    query="vector search algorithms",
    limit=3,
    grouped_task="Based on these articles, explain in 3 sentences how "
                 "modern vector databases achieve fast similarity search."
)
print(f"  Generated: {response.generated}")

# ── Cleanup ──────────────────────────────────────────────────
client.close()
print("\nDone.")