#SQL详解之DML - 数据操作语言完整指南
#引言
数据操作语言(Data Manipulation Language,DML)是SQL语言的核心组成部分,用于查询、插入、更新和删除数据库中的数据。DML包括SELECT、INSERT、UPDATE、DELETE等命令,是日常数据库操作中最频繁使用的部分。本文将详细介绍DML的各种命令,并对比MySQL和PostgreSQL在DML实现上的差异。
#1. DML基础概念
#1.1 什么是DML?
DML(Data Manipulation Language)是SQL语言中用于操作数据库中数据的部分,主要包括:
- SELECT:查询数据
- INSERT:插入数据
- UPDATE:更新数据
- DELETE:删除数据
DML语句通常在事务中执行,可以回滚(取决于数据库配置)。
#1.2 DML与DDL的区别
| 语言类型 | 主要命令 | 用途 | 事务特性 |
|---|---|---|---|
| DML | SELECT, INSERT, UPDATE, DELETE | 操作数据 | 需手动提交 |
| DDL | CREATE, ALTER, DROP | 定义/修改数据库结构 | 自动提交 |
#2. SELECT语句详解
#2.1 基础查询
#MySQL和PostgreSQL通用语法:
-- 基本查询
SELECT column1, column2, column3
FROM table_name;
-- 查询所有列
SELECT *
FROM employees;
-- 使用别名
SELECT
first_name AS fname,
last_name AS lname,
salary AS monthly_salary
FROM employees;
-- 去重查询
SELECT DISTINCT department_id
FROM employees;
-- 条件查询
SELECT employee_id, first_name, last_name
FROM employees
WHERE salary > 50000
AND hire_date >= '2020-01-01';#2.2 高级查询功能
#MySQL特定功能:
-- LIMIT子句(MySQL原生支持)
SELECT employee_id, first_name, last_name
FROM employees
ORDER BY salary DESC
LIMIT 10 OFFSET 5; -- 跳过5行,取10行
-- 联合查询
SELECT first_name, last_name FROM employees
UNION
SELECT supplier_name, contact_name FROM suppliers;
-- MySQL特有的全文搜索
SELECT * FROM articles
WHERE MATCH(title, body) AGAINST('database' IN NATURAL LANGUAGE MODE);#PostgreSQL特定功能:
-- LIMIT和OFFSET(PostgreSQL支持)
SELECT employee_id, first_name, last_name
FROM employees
ORDER BY salary DESC
LIMIT 10 OFFSET 5;
-- PostgreSQL特有的窗口函数
SELECT
employee_id,
first_name,
last_name,
salary,
ROW_NUMBER() OVER (PARTITION BY department_id ORDER BY salary DESC) as rank_in_dept
FROM employees;
-- PostgreSQL的CTE(公用表表达式)
WITH high_earners AS (
SELECT employee_id, first_name, last_name, salary
FROM employees
WHERE salary > 75000
)
SELECT h.*, d.department_name
FROM high_earners h
JOIN departments d ON h.employee_id = d.manager_id;
-- PostgreSQL数组操作
SELECT employee_id, skills[1] as primary_skill
FROM employee_profiles
WHERE 'Python' = ANY(skills);#2.3 连接查询
-- 内连接(MySQL和PostgreSQL通用)
SELECT e.first_name, e.last_name, d.department_name
FROM employees e
INNER JOIN departments d ON e.department_id = d.department_id;
-- 左连接
SELECT e.first_name, e.last_name, COALESCE(d.department_name, 'No Department') as dept_name
FROM employees e
LEFT JOIN departments d ON e.department_id = d.department_id;
-- 复杂连接查询
SELECT
e.first_name,
e.last_name,
d.department_name,
p.project_name,
a.assignment_date
FROM employees e
LEFT JOIN departments d ON e.department_id = d.department_id
LEFT JOIN assignments a ON e.employee_id = a.employee_id
LEFT JOIN projects p ON a.project_id = p.project_id
WHERE e.status = 'active'
ORDER BY d.department_name, e.last_name;#3. INSERT语句详解
#3.1 基础插入
#MySQL插入示例:
-- 单行插入
INSERT INTO employees (first_name, last_name, email, hire_date, salary, department_id)
VALUES ('John', 'Doe', 'john.doe@company.com', '2024-01-15', 65000.00, 1);
-- 批量插入
INSERT INTO employees (first_name, last_name, email, hire_date, salary, department_id)
VALUES
('Jane', 'Smith', 'jane.smith@company.com', '2024-01-16', 62000.00, 2),
('Bob', 'Johnson', 'bob.johnson@company.com', '2024-01-17', 58000.00, 1),
('Alice', 'Brown', 'alice.brown@company.com', '2024-01-18', 70000.00, 3);
-- 从查询结果插入
INSERT INTO employee_backup (employee_id, first_name, last_name, salary)
SELECT employee_id, first_name, last_name, salary
FROM employees
WHERE hire_date >= '2024-01-01';
-- MySQL特有:ON DUPLICATE KEY UPDATE
INSERT INTO employees (employee_id, first_name, last_name, salary)
VALUES (1, 'Updated', 'Name', 70000)
ON DUPLICATE KEY UPDATE
first_name = VALUES(first_name),
last_name = VALUES(last_name),
salary = VALUES(salary);#PostgreSQL插入示例:
-- 单行插入
INSERT INTO employees (first_name, last_name, email, hire_date, salary, department_id)
VALUES ('John', 'Doe', 'john.doe@company.com', '2024-01-15', 65000.00, 1);
-- 批量插入
INSERT INTO employees (first_name, last_name, email, hire_date, salary, department_id)
VALUES
('Jane', 'Smith', 'jane.smith@company.com', '2024-01-16', 62000.00, 2),
('Bob', 'Johnson', 'bob.johnson@company.com', '2024-01-17', 58000.00, 1),
('Alice', 'Brown', 'alice.brown@company.com', '2024-01-18', 70000.00, 3);
-- PostgreSQL特有:RETURNING子句
INSERT INTO employees (first_name, last_name, email, hire_date, salary, department_id)
VALUES ('New', 'Employee', 'new.employee@company.com', '2024-01-19', 55000.00, 1)
RETURNING employee_id, first_name, last_name;
-- PostgreSQL特有:ON CONFLICT
INSERT INTO employees (employee_id, first_name, last_name, salary)
VALUES (1, 'Updated', 'Name', 70000)
ON CONFLICT (employee_id)
DO UPDATE SET
first_name = EXCLUDED.first_name,
last_name = EXCLUDED.last_name,
salary = EXCLUDED.salary;#3.2 高级插入技术
-- MySQL:批量插入优化
INSERT INTO large_table (col1, col2, col3)
SELECT col1, col2, col3
FROM source_table
WHERE condition = 'value';
-- PostgreSQL:COPY命令(高性能批量插入)
-- 注意:这是在psql命令行中的用法
-- COPY employees FROM '/path/to/file.csv' DELIMITER ',' CSV HEADER;
-- 条件插入
INSERT INTO audit_log (table_name, operation, record_id, timestamp)
SELECT 'employees', 'INSERT', employee_id, NOW()
FROM inserted_records
WHERE action = 'insert';#4. UPDATE语句详解
#4.1 基础更新
#MySQL更新示例:
-- 简单更新
UPDATE employees
SET salary = salary * 1.1
WHERE department_id = 1;
-- 多列更新
UPDATE employees
SET
salary = salary * 1.05,
last_modified = NOW()
WHERE status = 'active';
-- 条件更新
UPDATE employees
SET bonus = CASE
WHEN salary > 80000 THEN 5000
WHEN salary > 60000 THEN 3000
ELSE 1000
END
WHERE hire_date <= '2023-01-01';
-- 基于其他表的更新(MySQL 5.7+)
UPDATE employees e
JOIN departments d ON e.department_id = d.department_id
SET e.salary = e.salary * 1.05
WHERE d.department_name = 'Engineering';#PostgreSQL更新示例:
-- 简单更新
UPDATE employees
SET salary = salary * 1.1
WHERE department_id = 1;
-- 多列更新
UPDATE employees
SET
salary = salary * 1.05,
last_modified = NOW()
WHERE status = 'active';
-- 条件更新
UPDATE employees
SET bonus = CASE
WHEN salary > 80000 THEN 5000
WHEN salary > 60000 THEN 3000
ELSE 1000
END
WHERE hire_date <= '2023-01-01';
-- 基于其他表的更新(PostgreSQL风格)
UPDATE employees
SET salary = employees.salary * 1.05
FROM departments
WHERE employees.department_id = departments.department_id
AND departments.department_name = 'Engineering';
-- PostgreSQL特有:RETURNING子句
UPDATE employees
SET last_login = NOW()
WHERE employee_id = 123
RETURNING employee_id, first_name, last_name, last_login;#4.2 高级更新技术
-- 行转列更新示例
UPDATE sales_summary
SET
Q1_sales = (SELECT SUM(amount) FROM sales WHERE quarter = 'Q1' AND year = 2024),
Q2_sales = (SELECT SUM(amount) FROM sales WHERE quarter = 'Q2' AND year = 2024),
Q3_sales = (SELECT SUM(amount) FROM sales WHERE quarter = 'Q3' AND year = 2024),
Q4_sales = (SELECT SUM(amount) FROM sales WHERE quarter = 'Q4' AND year = 2024)
WHERE year = 2024;
-- 使用CTE进行复杂更新
WITH updated_salaries AS (
SELECT
employee_id,
salary * (1 + bonus_percentage) as new_salary
FROM employees e
JOIN performance_reviews pr ON e.employee_id = pr.employee_id
WHERE pr.review_year = 2024
)
UPDATE employees
SET salary = updated_salaries.new_salary
FROM updated_salaries
WHERE employees.employee_id = updated_salaries.employee_id;#5. DELETE语句详解
#5.1 基础删除
#MySQL删除示例:
-- 简单删除
DELETE FROM employees
WHERE status = 'terminated';
-- 带条件删除
DELETE FROM employees
WHERE hire_date < '2020-01-01' AND salary < 40000;
-- 基于其他表的删除
DELETE e FROM employees e
JOIN departments d ON e.department_id = d.department_id
WHERE d.department_name = 'Closed Department';
-- 清空表(保留结构)
TRUNCATE TABLE temp_data;
-- 等同于 DELETE FROM temp_data; 但更快且重置AUTO_INCREMENT#PostgreSQL删除示例:
-- 简单删除
DELETE FROM employees
WHERE status = 'terminated';
-- 带条件删除
DELETE FROM employees
WHERE hire_date < '2020-01-01' AND salary < 40000;
-- 基于其他表的删除
DELETE FROM employees
USING departments
WHERE employees.department_id = departments.department_id
AND departments.department_name = 'Closed Department';
-- PostgreSQL特有:RETURNING子句
DELETE FROM employees
WHERE status = 'terminated'
RETURNING employee_id, first_name, last_name, termination_date;
-- 清空表
TRUNCATE TABLE temp_data RESTART IDENTITY CASCADE;#5.2 高级删除技术
-- 分批删除大数据集(防止锁表时间过长)
-- MySQL示例
DELETE FROM logs
WHERE log_date < '2023-01-01'
LIMIT 10000;
-- 重复执行直到删除完所有记录
-- PostgreSQL分批删除
DO $$
DECLARE
deleted_count INTEGER;
BEGIN
LOOP
DELETE FROM logs
WHERE log_date < '2023-01-01'
LIMIT 10000;
GET DIAGNOSTICS deleted_count = ROW_COUNT;
EXIT WHEN deleted_count = 0;
PERFORM pg_sleep(0.1); -- 短暂休眠,减少系统负载
END LOOP;
END $$;#6. 事务处理
#6.1 事务基础
-- MySQL事务示例
START TRANSACTION;
UPDATE accounts
SET balance = balance - 1000
WHERE account_id = 1;
UPDATE accounts
SET balance = balance + 1000
WHERE account_id = 2;
-- 检查操作结果
SELECT balance FROM accounts WHERE account_id IN (1, 2);
-- 如果一切正常则提交
COMMIT;
-- 如果出现问题则回滚
-- ROLLBACK;
-- 设置事务隔离级别
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;-- PostgreSQL事务示例
BEGIN;
UPDATE accounts
SET balance = balance - 1000
WHERE account_id = 1;
UPDATE accounts
SET balance = balance + 1000
WHERE account_id = 2;
-- 检查操作结果
SELECT balance FROM accounts WHERE account_id IN (1, 2);
-- 提交事务
COMMIT;
-- 或者回滚事务
-- ROLLBACK;
-- PostgreSQL特有:保存点
BEGIN;
INSERT INTO employees VALUES (1, 'John', 'Doe', 50000);
SAVEPOINT sp1;
UPDATE employees SET salary = 55000 WHERE employee_id = 1;
ROLLBACK TO sp1; -- 回滚到保存点
COMMIT;#6.2 锁机制
-- MySQL锁示例
-- 共享锁
SELECT * FROM employees WHERE employee_id = 1 LOCK IN SHARE MODE;
-- 排他锁
SELECT * FROM employees WHERE employee_id = 1 FOR UPDATE;
-- PostgreSQL锁示例
-- FOR UPDATE锁
BEGIN;
SELECT * FROM employees WHERE employee_id = 1 FOR UPDATE;
-- 其他会话将等待直到当前事务结束
-- FOR SHARE锁
SELECT * FROM employees WHERE employee_id = 1 FOR SHARE;#7. 性能优化技巧
#7.1 查询优化
-- 使用EXPLAIN分析查询性能
-- MySQL
EXPLAIN SELECT * FROM employees WHERE department_id = 1 AND salary > 50000;
-- PostgreSQL
EXPLAIN ANALYZE SELECT * FROM employees WHERE department_id = 1 AND salary > 50000;
-- 优化技巧
-- 1. 使用适当的索引
CREATE INDEX idx_emp_dept_salary ON employees(department_id, salary);
-- 2. 避免SELECT *
SELECT employee_id, first_name, last_name FROM employees WHERE department_id = 1;
-- 3. 优化JOIN操作
SELECT e.first_name, e.last_name, d.department_name
FROM employees e
INNER JOIN departments d ON e.department_id = d.department_id
WHERE e.salary > 50000;
-- 4. 使用LIMIT限制结果集
SELECT * FROM employees ORDER BY hire_date DESC LIMIT 100;#7.2 批量操作优化
-- MySQL批量操作优化
-- 1. 关闭自动提交进行批量操作
SET autocommit = 0;
START TRANSACTION;
INSERT INTO employees (first_name, last_name, email) VALUES ('A', 'B', 'a@b.com');
INSERT INTO employees (first_name, last_name, email) VALUES ('C', 'D', 'c@d.com');
-- ... 更多插入操作
COMMIT;
SET autocommit = 1;
-- 2. 使用批量插入
INSERT INTO employees (first_name, last_name, email)
SELECT first_name, last_name, email FROM staging_table;
-- PostgreSQL批量操作优化
-- 1. 使用COPY命令进行大数据量插入
-- COPY employees FROM '/path/to/data.csv' WITH (FORMAT csv, HEADER true);
-- 2. 使用批量更新
UPDATE employees
SET salary = new_values.new_salary
FROM (VALUES
(1, 60000),
(2, 65000),
(3, 70000)
) AS new_values(emp_id, new_salary)
WHERE employees.employee_id = new_values.emp_id;#8. MySQL与PostgreSQL DML差异总结
| 特性 | MySQL | PostgreSQL |
|---|---|---|
| 分页查询 | LIMIT | LIMIT + OFFSET |
| 插入冲突处理 | ON DUPLICATE KEY UPDATE | ON CONFLICT |
| 返回数据 | 不支持 | RETURNING子句 |
| 数组支持 | 不支持 | 原生支持 |
| 窗口函数 | 8.0+支持 | 原生支持 |
| CTE | 8.0+支持 | 原生支持 |
| 全文搜索 | MATCH...AGAINST | to_tsvector/to_tsquery |
| UPSERT | INSERT...ON DUPLICATE KEY | INSERT...ON CONFLICT |
#9. 最佳实践
#9.1 安全实践
-- 使用参数化查询防止SQL注入
-- Python示例
cursor.execute(
"SELECT * FROM employees WHERE department_id = %s AND salary > %s",
(dept_id, min_salary)
)
-- 避免在WHERE子句中使用函数
-- 不好的做法
SELECT * FROM employees WHERE YEAR(hire_date) = 2024;
-- 好的做法
SELECT * FROM employees WHERE hire_date >= '2024-01-01' AND hire_date < '2025-01-01';#9.2 性能实践
-- 1. 合理使用索引
-- 2. 避免在大表上使用SELECT *
-- 3. 优化WHERE条件顺序(将选择性高的条件放在前面)
-- 4. 使用EXISTS替代IN(在某些情况下)
SELECT * FROM employees e
WHERE EXISTS (
SELECT 1 FROM departments d
WHERE d.department_id = e.department_id
AND d.location = 'New York'
);
-- 5. 使用JOIN替代子查询(在某些情况下)
SELECT e.*, d.department_name
FROM employees e
JOIN departments d ON e.department_id = d.department_id
WHERE d.location = 'New York';#相关教程
#总结
DML是数据库操作的核心,掌握各种DML命令对于数据库开发和管理至关重要。MySQL和PostgreSQL在DML实现上各有特色:
- MySQL:语法简洁,批量操作性能好,全文搜索功能强
- PostgreSQL:功能丰富,标准兼容性好,高级特性多
在实际应用中,应该根据具体需求和数据库特性选择最适合的DML语句,并遵循性能优化和安全最佳实践。理解两种数据库系统的差异有助于编写更高效的数据库应用程序。

