Modern Database Design Patterns for Web Applications
Database design is the foundation of any successful web application. Poor database design can lead to performance bottlenecks, data inconsistencies, and maintenance nightmares. This guide covers modern patterns and best practices for designing scalable databases.
Relational Database Design Patterns
1. Normalized Design with Denormalization Strategy
Start with proper normalization, then strategically denormalize for performance:
-- Normalized tables
CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
email VARCHAR(255) UNIQUE NOT NULL,
username VARCHAR(100) UNIQUE NOT NULL,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE user_profiles (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID REFERENCES users(id) ON DELETE CASCADE,
first_name VARCHAR(100),
last_name VARCHAR(100),
bio TEXT,
avatar_url VARCHAR(500),
location VARCHAR(255)
);
CREATE TABLE posts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
author_id UUID REFERENCES users(id) ON DELETE CASCADE,
title VARCHAR(255) NOT NULL,
content TEXT,
status VARCHAR(20) DEFAULT 'draft',
published_at TIMESTAMP,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
-- Denormalized for performance (author info cached in posts)
ALTER TABLE posts ADD COLUMN author_username VARCHAR(100);
ALTER TABLE posts ADD COLUMN author_name VARCHAR(200);
-- Trigger to maintain denormalized data
CREATE OR REPLACE FUNCTION update_post_author_info()
RETURNS TRIGGER AS $$
BEGIN
UPDATE posts
SET
author_username = u.username,
author_name = CONCAT(up.first_name, ' ', up.last_name)
FROM users u
LEFT JOIN user_profiles up ON u.id = up.user_id
WHERE posts.author_id = u.id;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER user_update_trigger
AFTER UPDATE ON users
FOR EACH ROW
EXECUTE FUNCTION update_post_author_info();
2. Event Sourcing Pattern
Store all changes as a sequence of events:
-- Event store table
CREATE TABLE events (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
aggregate_id UUID NOT NULL,
aggregate_type VARCHAR(100) NOT NULL,
event_type VARCHAR(100) NOT NULL,
event_data JSONB NOT NULL,
event_version INTEGER NOT NULL,
occurred_at TIMESTAMP DEFAULT NOW(),
UNIQUE(aggregate_id, event_version)
);
-- Index for efficient querying
CREATE INDEX idx_events_aggregate ON events(aggregate_id, event_version);
CREATE INDEX idx_events_type ON events(aggregate_type, event_type);
-- Snapshot table for performance
CREATE TABLE snapshots (
aggregate_id UUID PRIMARY KEY,
aggregate_type VARCHAR(100) NOT NULL,
snapshot_data JSONB NOT NULL,
snapshot_version INTEGER NOT NULL,
created_at TIMESTAMP DEFAULT NOW()
);
-- Example: User aggregate events
INSERT INTO events (aggregate_id, aggregate_type, event_type, event_data, event_version)
VALUES
('123e4567-e89b-12d3-a456-426614174000', 'User', 'UserCreated',
'{"email": "john@example.com", "username": "johndoe"}', 1),
('123e4567-e89b-12d3-a456-426614174000', 'User', 'ProfileUpdated',
'{"firstName": "John", "lastName": "Doe"}', 2),
('123e4567-e89b-12d3-a456-426614174000', 'User', 'EmailChanged',
'{"oldEmail": "john@example.com", "newEmail": "john.doe@example.com"}', 3);
3. Soft Delete Pattern
Maintain data history and enable recovery:
-- Add soft delete columns to all tables
ALTER TABLE users ADD COLUMN deleted_at TIMESTAMP DEFAULT NULL;
ALTER TABLE posts ADD COLUMN deleted_at TIMESTAMP DEFAULT NULL;
-- Create views for active records
CREATE VIEW active_users AS
SELECT * FROM users WHERE deleted_at IS NULL;
CREATE VIEW active_posts AS
SELECT * FROM posts WHERE deleted_at IS NULL;
-- Soft delete function
CREATE OR REPLACE FUNCTION soft_delete(table_name TEXT, record_id UUID)
RETURNS VOID AS $$
BEGIN
EXECUTE format('UPDATE %I SET deleted_at = NOW() WHERE id = $1', table_name)
USING record_id;
END;
$$ LANGUAGE plpgsql;
-- Usage
SELECT soft_delete('users', '123e4567-e89b-12d3-a456-426614174000');
-- Restore function
CREATE OR REPLACE FUNCTION restore_record(table_name TEXT, record_id UUID)
RETURNS VOID AS $$
BEGIN
EXECUTE format('UPDATE %I SET deleted_at = NULL WHERE id = $1', table_name)
USING record_id;
END;
$$ LANGUAGE plpgsql;
NoSQL Design Patterns
1. Document-Based User Profiles
Flexible schema for varying user data:
// MongoDB document design
{
_id: ObjectId("..."),
userId: "123e4567-e89b-12d3-a456-426614174000",
email: "john@example.com",
username: "johndoe",
profile: {
personal: {
firstName: "John",
lastName: "Doe",
dateOfBirth: "1990-05-15",
location: {
city: "New York",
country: "USA",
coordinates: [40.7128, -74.0060]
}
},
preferences: {
theme: "dark",
language: "en",
notifications: {
email: true,
push: false,
sms: true
}
},
social: {
twitter: "@johndoe",
linkedin: "linkedin.com/in/johndoe",
website: "johndoe.com"
}
},
metadata: {
lastLogin: new Date("2025-02-07T10:30:00Z"),
loginCount: 42,
accountVerified: true,
twoFactorEnabled: true
},
tags: ["developer", "javascript", "react"],
createdAt: new Date("2024-01-15T08:00:00Z"),
updatedAt: new Date("2025-02-07T10:30:00Z")
}
// Optimized indexes
db.users.createIndex({ "email": 1 }, { unique: true })
db.users.createIndex({ "username": 1 }, { unique: true })
db.users.createIndex({ "profile.personal.location.coordinates": "2dsphere" })
db.users.createIndex({ "tags": 1 })
db.users.createIndex({ "createdAt": 1 })
2. Time-Series Data Pattern
Efficient storage for analytics and monitoring:
// Time-bucketed documents for user activity
{
_id: ObjectId("..."),
userId: "123e4567-e89b-12d3-a456-426614174000",
date: "2025-02-07", // Bucket by day
hour: 10, // Sub-bucket by hour
events: [
{
timestamp: new Date("2025-02-07T10:15:23Z"),
type: "page_view",
data: {
page: "/dashboard",
referrer: "/login",
userAgent: "Mozilla/5.0...",
sessionId: "sess_123abc"
}
},
{
timestamp: new Date("2025-02-07T10:18:45Z"),
type: "button_click",
data: {
element: "export_button",
page: "/dashboard",
sessionId: "sess_123abc"
}
}
],
metrics: {
totalEvents: 2,
uniquePages: 1,
sessionDuration: 180000 // milliseconds
}
}
// Indexes for time-series queries
db.user_activity.createIndex({ "userId": 1, "date": 1 })
db.user_activity.createIndex({ "date": 1, "hour": 1 })
db.user_activity.createIndex({ "events.timestamp": 1 })
3. Aggregation Pipeline for Analytics
Complex data transformations and aggregations:
// User engagement analytics pipeline
const engagementPipeline = [
// Match date range
{
$match: {
date: {
$gte: "2025-02-01",
$lte: "2025-02-07",
},
},
},
// Unwind events array
{ $unwind: "$events" },
// Group by user and event type
{
$group: {
_id: {
userId: "$userId",
eventType: "$events.type",
},
count: { $sum: 1 },
firstEvent: { $min: "$events.timestamp" },
lastEvent: { $max: "$events.timestamp" },
},
},
// Calculate engagement score
{
$addFields: {
engagementScore: {
$switch: {
branches: [
{ case: { $eq: ["$_id.eventType", "page_view"] }, then: { $multiply: ["$count", 1] } },
{ case: { $eq: ["$_id.eventType", "button_click"] }, then: { $multiply: ["$count", 2] } },
{ case: { $eq: ["$_id.eventType", "form_submit"] }, then: { $multiply: ["$count", 5] } },
],
default: 0,
},
},
},
},
// Group back by user
{
$group: {
_id: "$_id.userId",
totalEngagement: { $sum: "$engagementScore" },
eventTypes: {
$push: {
type: "$_id.eventType",
count: "$count",
score: "$engagementScore",
},
},
},
},
// Sort by engagement
{ $sort: { totalEngagement: -1 } },
// Limit results
{ $limit: 100 },
];
// Execute aggregation
const results = await db.user_activity.aggregate(engagementPipeline).toArray();
Hybrid Approaches
1. Polyglot Persistence
Using different databases for different data types:
// Service layer handling multiple databases
class UserService {
constructor(
private postgresClient: Pool,
private mongoClient: MongoClient,
private redisClient: RedisClient
) {}
async createUser(userData: CreateUserData) {
// Store core user data in PostgreSQL
const userResult = await this.postgresClient.query(
`
INSERT INTO users (email, username, password_hash)
VALUES ($1, $2, $3)
RETURNING id, email, username, created_at
`,
[userData.email, userData.username, userData.passwordHash]
);
const user = userResult.rows[0];
// Store flexible profile data in MongoDB
await this.mongoClient.db("app").collection("user_profiles").insertOne({
userId: user.id,
profile: userData.profile,
preferences: userData.preferences,
createdAt: new Date(),
});
// Cache frequently accessed data in Redis
await this.redisClient.setex(
`user:${user.id}`,
3600, // 1 hour TTL
JSON.stringify({
id: user.id,
email: user.email,
username: user.username,
})
);
return user;
}
async getUserById(userId: string) {
// Try cache first
const cached = await this.redisClient.get(`user:${userId}`);
if (cached) {
return JSON.parse(cached);
}
// Fetch from both databases
const [userResult, profileResult] = await Promise.all([
this.postgresClient.query("SELECT * FROM users WHERE id = $1", [userId]),
this.mongoClient.db("app").collection("user_profiles").findOne({ userId }),
]);
if (!userResult.rows[0]) return null;
const user = {
...userResult.rows[0],
profile: profileResult?.profile,
preferences: profileResult?.preferences,
};
// Cache the result
await this.redisClient.setex(`user:${userId}`, 3600, JSON.stringify(user));
return user;
}
}
2. CQRS (Command Query Responsibility Segregation)
Separate read and write models:
// Write model (commands)
interface CreatePostCommand {
authorId: string;
title: string;
content: string;
tags: string[];
}
class PostWriteService {
async createPost(command: CreatePostCommand) {
// Validate command
await this.validateCommand(command);
// Store in write database (normalized)
const postId = await this.writeDb.transaction(async (trx) => {
const postResult = await trx.query(
`
INSERT INTO posts (author_id, title, content, status)
VALUES ($1, $2, $3, 'draft')
RETURNING id
`,
[command.authorId, command.title, command.content]
);
const postId = postResult.rows[0].id;
// Insert tags
for (const tag of command.tags) {
await trx.query(
`
INSERT INTO post_tags (post_id, tag_name)
VALUES ($1, $2)
ON CONFLICT DO NOTHING
`,
[postId, tag]
);
}
return postId;
});
// Publish event for read model update
await this.eventBus.publish("PostCreated", {
postId,
authorId: command.authorId,
title: command.title,
content: command.content,
tags: command.tags,
createdAt: new Date(),
});
return postId;
}
}
// Read model (queries)
class PostReadService {
async getPostsWithAuthors(filters: PostFilters) {
// Query optimized read model
return await this.readDb
.collection("post_views")
.find({
...this.buildFilters(filters),
status: "published",
})
.sort({ publishedAt: -1 })
.limit(20)
.toArray();
}
async getPostDetails(postId: string) {
// Get from read model with all data denormalized
return await this.readDb.collection("post_details").findOne({ postId });
}
}
// Event handler to update read models
class PostEventHandler {
async handlePostCreated(event: PostCreatedEvent) {
const author = await this.getAuthor(event.authorId);
// Create denormalized read model
await this.readDb.collection("post_views").insertOne({
postId: event.postId,
title: event.title,
excerpt: this.createExcerpt(event.content),
authorId: event.authorId,
authorName: author.name,
authorUsername: author.username,
authorAvatar: author.avatar,
tags: event.tags,
createdAt: event.createdAt,
status: "draft",
});
}
}
Performance Optimization Patterns
1. Database Connection Pooling
// PostgreSQL connection pool configuration
const pool = new Pool({
host: import.meta.env.DB_HOST,
port: parseInt(import.meta.env.DB_PORT || "5432"),
database: import.meta.env.DB_NAME,
user: import.meta.env.DB_USER,
password: import.meta.env.DB_PASSWORD,
// Pool configuration
min: 5, // Minimum connections
max: 20, // Maximum connections
acquireTimeoutMillis: 30000, // Time to wait for connection
idleTimeoutMillis: 30000, // Time before closing idle connection
// Health checks
connectionTimeoutMillis: 2000,
query_timeout: 20000,
// SSL configuration
ssl: import.meta.env.NODE_ENV === "production" ? { rejectUnauthorized: false } : false,
});
// Connection health monitoring
pool.on("connect", (client) => {
console.log("New client connected to database");
});
pool.on("error", (err) => {
console.error("Database pool error:", err);
// Implement alerting/monitoring
});
// Graceful shutdown
import.meta.on("SIGINT", async () => {
console.log("Closing database pool...");
await pool.end();
import.meta.exit(0);
});
2. Query Optimization Strategies
-- Before: N+1 query problem
-- SELECT * FROM posts WHERE author_id = ?;
-- For each post: SELECT * FROM users WHERE id = ?;
-- After: Single optimized query with JOIN
SELECT
p.id,
p.title,
p.content,
p.created_at,
u.username,
u.email,
up.first_name,
up.last_name,
up.avatar_url
FROM posts p
JOIN users u ON p.author_id = u.id
LEFT JOIN user_profiles up ON u.id = up.user_id
WHERE p.status = 'published'
ORDER BY p.created_at DESC
LIMIT 20;
-- Add covering index for optimal performance
CREATE INDEX idx_posts_published_with_author ON posts(status, created_at)
INCLUDE (id, title, content, author_id)
WHERE status = 'published';
-- Materialized view for complex aggregations
CREATE MATERIALIZED VIEW popular_posts AS
SELECT
p.id,
p.title,
p.author_id,
u.username,
COUNT(pl.id) as like_count,
COUNT(pc.id) as comment_count,
COUNT(pl.id) + COUNT(pc.id) * 2 as popularity_score
FROM posts p
JOIN users u ON p.author_id = u.id
LEFT JOIN post_likes pl ON p.id = pl.post_id
LEFT JOIN post_comments pc ON p.id = pc.post_id
WHERE p.status = 'published'
GROUP BY p.id, p.title, p.author_id, u.username
ORDER BY popularity_score DESC;
-- Refresh strategy (can be automated)
REFRESH MATERIALIZED VIEW CONCURRENTLY popular_posts;
3. Caching Patterns
class CacheService {
private redis: RedisClient;
private localCache: LRUCache<string, any>;
constructor(redis: RedisClient) {
this.redis = redis;
this.localCache = new LRUCache({
max: 1000,
ttl: 1000 * 60 * 5, // 5 minutes local cache
});
}
// Multi-level cache: Local -> Redis -> Database
async get<T>(key: string, fetchFn: () => Promise<T>, ttl: number = 3600): Promise<T> {
// Level 1: Local cache
let value = this.localCache.get(key);
if (value) return value;
// Level 2: Redis cache
const cached = await this.redis.get(key);
if (cached) {
value = JSON.parse(cached);
this.localCache.set(key, value);
return value;
}
// Level 3: Database
value = await fetchFn();
// Cache in both levels
await this.redis.setex(key, ttl, JSON.stringify(value));
this.localCache.set(key, value);
return value;
}
// Cache-aside pattern with tags for invalidation
async getWithTags<T>(key: string, tags: string[], fetchFn: () => Promise<T>, ttl: number = 3600): Promise<T> {
const value = await this.get(key, fetchFn, ttl);
// Associate cache key with tags for mass invalidation
const multi = this.redis.multi();
tags.forEach((tag) => {
multi.sadd(`tag:${tag}`, key);
multi.expire(`tag:${tag}`, ttl);
});
await multi.exec();
return value;
}
// Invalidate by tags
async invalidateByTag(tag: string): Promise<void> {
const keys = await this.redis.smembers(`tag:${tag}`);
if (keys.length > 0) {
const multi = this.redis.multi();
keys.forEach((key) => {
multi.del(key);
this.localCache.delete(key);
});
multi.del(`tag:${tag}`);
await multi.exec();
}
}
}
// Usage example
const cacheService = new CacheService(redisClient);
async function getUser(userId: string) {
return await cacheService.getWithTags(
`user:${userId}`,
["users", `user:${userId}`],
() => database.getUserById(userId),
3600 // 1 hour TTL
);
}
// Invalidate when user is updated
async function updateUser(userId: string, updates: UserUpdates) {
await database.updateUser(userId, updates);
await cacheService.invalidateByTag(`user:${userId}`);
}
Conclusion
Modern database design requires a thoughtful approach that balances normalization with performance, consistency with scalability, and simplicity with flexibility. Key principles to remember:
- Start with good normalization, then denormalize strategically
- Choose the right tool for each data type and access pattern
- Plan for scale from the beginning with proper indexing and caching
- Monitor and optimize continuously based on actual usage patterns
- Design for maintainability with clear patterns and documentation
The patterns covered in this guide provide a solid foundation for building robust, scalable database systems that can grow with your application's needs.