PostgreSQL Full-Text Search
tsvector, tsquery, GIN index, ranking, Chinese text search, pg_trgm similarity.
postgresqlsearchdatabasebackend
# PostgreSQL Full-Text Search
## Core types
- `tsvector`: preprocessed document (stemmed, normalized tokens + positions)
- `tsquery`: search query with operators
## Basic search
```sql
-- Convert text to tsvector
SELECT to_tsvector('english', 'The quick brown fox jumps');
-- 'brown':3 'fox':4 'jump':5 'quick':2
-- Convert query to tsquery
SELECT to_tsquery('english', 'quick & fox');
SELECT plainto_tsquery('english', 'quick brown fox'); -- auto & between words
SELECT phraseto_tsquery('english', 'quick brown'); -- must be adjacent
-- Match
SELECT to_tsvector('english', 'quick fox') @@ to_tsquery('english', 'fox');
-- true
```
## Table setup with search column
```sql
ALTER TABLE articles ADD COLUMN search_vec tsvector;
-- Populate
UPDATE articles SET search_vec =
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(content, '')), 'B');
-- A = highest weight (title), B = lower (content)
-- GIN index for fast search
CREATE INDEX articles_search_idx ON articles USING GIN(search_vec);
-- Auto-update with trigger
CREATE FUNCTION update_search_vec() RETURNS trigger AS $$
BEGIN
NEW.search_vec :=
setweight(to_tsvector('english', coalesce(NEW.title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(NEW.content, '')), 'B');
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trg_search BEFORE INSERT OR UPDATE ON articles
FOR EACH ROW EXECUTE FUNCTION update_search_vec();
```
## Search query with ranking
```sql
SELECT
id,
title,
ts_rank(search_vec, query) AS rank,
ts_headline('english', content, query,
'MaxFragments=2, MaxWords=20, MinWords=10') AS snippet
FROM articles,
plainto_tsquery('english', 'machine learning') AS query
WHERE search_vec @@ query
ORDER BY rank DESC
LIMIT 10;
```
## Multi-language (Chinese with zhparser)
```sql
-- Install zhparser extension for Chinese
CREATE EXTENSION zhparser;
CREATE TEXT SEARCH CONFIGURATION chinese (PARSER = zhparser);
ALTER TEXT SEARCH CONFIGURATION chinese ADD MAPPING FOR n,v,a WITH simple;
SELECT to_tsvector('chinese', '自然语言处理技术');
SELECT to_tsquery('chinese', '自然语言 & 处理');
```
## pg_trgm (fuzzy / LIKE search)
```sql
CREATE EXTENSION pg_trgm;
-- GIN index for LIKE queries
CREATE INDEX articles_trgm_idx ON articles USING GIN(title gin_trgm_ops);
-- Similarity search
SELECT title, similarity(title, 'javascrpit') AS sim
FROM articles
WHERE title % 'javascrpit' -- typo tolerance
ORDER BY sim DESC;
-- LIKE now uses the index
SELECT * FROM articles WHERE title ILIKE '%javascript%';
```
## Combined: full-text + vector (hybrid search)
```sql
-- Add pgvector
CREATE EXTENSION vector;
ALTER TABLE articles ADD COLUMN embedding vector(1536);
CREATE INDEX ON articles USING hnsw(embedding vector_cosine_ops);
-- Hybrid: keyword score + semantic score
WITH semantic AS (
SELECT id, 1 - (embedding <=> $1) AS vscore FROM articles ORDER BY embedding <=> $1 LIMIT 20
),
keyword AS (
SELECT id, ts_rank(search_vec, query) AS kscore FROM articles,
plainto_tsquery('english', $2) AS query WHERE search_vec @@ query
)
SELECT a.id, a.title,
coalesce(s.vscore, 0) * 0.7 + coalesce(k.kscore, 0) * 0.3 AS score
FROM articles a
LEFT JOIN semantic s ON a.id = s.id
LEFT JOIN keyword k ON a.id = k.id
WHERE s.id IS NOT NULL OR k.id IS NOT NULL
ORDER BY score DESC LIMIT 10;
```API: /api/skills/postgresql-fulltext