Slides: juanignaciosl.github.io/sql-performance-explained/slides
CREATE TABLE employees (
employee_id integer not null PRIMARY key,
subsidiary_id integer not null,
first_name text,
last_name text,
date_of_birth DATE NOT NULL,
phone_number character varying(1000) NOT NULL,
enabled boolean default true
);
create unique index on employees (employee_id, subsidiary_id);
create index employees_last_name on employees(last_name);
Table "public.employees"
Column | Type | Modifiers
---------------+-------------------------+--------------
employee_id | integer | not null
subsidiary_id | integer | not null
first_name | text |
last_name | text |
date_of_birth | date | not null
phone_number | character varying(1000) | not null
enabled | boolean | default true
Indexes:
"employees_pkey" PRIMARY KEY, btree (employee_id)
"enabled_employees" btree (last_name) WHERE enabled = true
"employees_last_name" btree (last_name)
explain analyze SELECT first_name, last_name
FROM employees WHERE employee_id = 123;
explain analyze SELECT first_name, last_name
FROM employees WHERE employee_id = 123;
Index Scan using employees_pkey on employees
(cost=0.29..8.31 rows=1 width=16)
(actual time=0.009..0.010 rows=1 loops=1)
Index Cond: (employee_id = 123)
Planning time: 0.310 ms
Execution time: 0.051 ms
explain analyze SELECT first_name, last_name
FROM employees WHERE employee_id = 111
and subsidiary_id = 333;
explain analyze SELECT first_name, last_name
FROM employees WHERE employee_id = 111
and subsidiary_id = 333;
Index Scan using employees_employee_id_subsidiary_id_idx
on employees (cost=0.29..8.31 rows=1 width=16)
(actual time=0.023..0.025 rows=1 loops=1)
Index Cond: ((employee_id = 111) AND (subsidiary_id = 333))
Planning time: 0.130 ms
Execution time: 0.056 ms
explain analyze SELECT first_name, last_name
FROM employees WHERE subsidiary_id = 333;
explain analyze SELECT first_name, last_name
FROM employees WHERE subsidiary_id = 333;
Index Scan using employees_employee_id_subsidiary_id_idx
on employees (cost=0.29..1858.30 rows=1 width=16)
(actual time=0.034..3.498 rows=1 loops=1)
Index Cond: (subsidiary_id = 333)
Planning time: 0.101 ms
Execution time: 3.527 ms
explain analyze SELECT first_name, last_name
FROM employees WHERE subsidiary_id = 333;
Index Scan using employees_employee_id_subsidiary_id_idx
on employees (cost=0.29..1858.30 rows=1 width=16)
(actual time=0.034..3.498 rows=1 loops=1)
Index Cond: (subsidiary_id = 333)
Planning time: 0.101 ms
Execution time: 3.527 ms
A multicolumn B-tree index can be used with query conditions that involve any subset of the index's columns, but the index is most efficient when there are constraints on the leading (leftmost) columns.
PostgreSQL manual
select * from employees where date_of_birth > TO_DATE($1, 'YYYY-MM-DD')
and date_of_birth < TO_DATE($2, 'YYYY-MM-DD') and subsidiary_id = $3;
create index employees_date on
employees(date_of_birth, subsidiary_id);
create index employees_date on
employees(subsidiary_id, date_of_birth);
Bitmap Heap Scan on employees
(cost=14.54..22.28 rows=2 width=592) (actual time=0.069..0.071 rows=4 loops=1)
Recheck Cond: ((date_of_birth > '1082-01-01'::date)
AND (date_of_birth < '1083-01-01'::date) AND (subsidiary_id = 50))
Heap Blocks: exact=3
-> Bitmap Index Scan on employees_date (cost=0.00..14.54 rows=2 width=0)
(actual time=0.064..0.064 rows=4 loops=1)
Index Cond: ((date_of_birth > '1082-01-01'::date)
AND (date_of_birth < '1083-01-01'::date) AND (subsidiary_id = 50))
Planning time: 0.365 ms
Execution time: 0.099 ms
Bitmap Heap Scan on employees
(cost=4.34..19.58 rows=4 width=35) (actual time=0.045..0.051 rows=4 loops=1)
Recheck Cond: ((subsidiary_id = 50) AND (date_of_birth > '1082-01-01'::date)
AND (date_of_birth < '1083-01-01'::date))
Heap Blocks: exact=3
-> Bitmap Index Scan on employees_date (cost=0.00..4.34 rows=4 width=0)
(actual time=0.037..0.037 rows=4 loops=1)
Index Cond: ((subsidiary_id = 50) AND (date_of_birth > '1082-01-01'::date)
AND (date_of_birth < '1083-01-01'::date))
Planning time: 0.481 ms
Execution time: 0.084 ms
Bitmap Heap Scan on employees
(cost=14.54..22.28 rows=2 width=592) (actual time=0.069..0.071 rows=4 loops=1)
Recheck Cond: ((date_of_birth > '1082-01-01'::date)
AND (date_of_birth < '1083-01-01'::date) AND (subsidiary_id = 50))
Heap Blocks: exact=3
-> Bitmap Index Scan on employees_date (cost=0.00..14.54 rows=2 width=0)
(actual time=0.064..0.064 rows=4 loops=1)
Index Cond: ((date_of_birth > '1082-01-01'::date)
AND (date_of_birth < '1083-01-01'::date) AND (subsidiary_id = 50))
Planning time: 0.365 ms
Execution time: 0.099 ms
Bitmap Heap Scan on employees
(cost=4.34..19.58 rows=4 width=35) (actual time=0.045..0.051 rows=4 loops=1)
Recheck Cond: ((subsidiary_id = 50) AND (date_of_birth > '1082-01-01'::date)
AND (date_of_birth < '1083-01-01'::date))
Heap Blocks: exact=3
-> Bitmap Index Scan on employees_date (cost=0.00..4.34 rows=4 width=0)
(actual time=0.037..0.037 rows=4 loops=1)
Index Cond: ((subsidiary_id = 50) AND (date_of_birth > '1082-01-01'::date)
AND (date_of_birth < '1083-01-01'::date))
Planning time: 0.481 ms
Execution time: 0.084 ms
The child plan node visits an index to find the locations of rows matching the index condition, and then the upper plan node actually fetches those rows from the table itself. Fetching rows separately is much more expensive than reading them sequentially, but because not all the pages of the table have to be visited, this is still cheaper than a sequential scan. PostgreSQL manual
explain analyze SELECT first_name, last_name
FROM employees WHERE last_name = 'LN 38'
Index Scan using employees_last_name on employees
(cost=0.42..8.44 rows=1 width=16)
(actual time=0.049..0.049 rows=1 loops=1)
Index Cond: (last_name = 'LN 38'::text)
Planning time: 0.286 ms
Execution time: 0.082 ms
explain analyze SELECT first_name, last_name
FROM employees WHERE upper(last_name) = 'LN 38'
Seq Scan on employees
(cost=0.00..2334.00 rows=500 width=16)
(actual time=0.048..46.647 rows=1 loops=1)
Filter: (upper(last_name) = 'LN 38'::text)
Rows Removed by Filter: 99999
Planning time: 0.092 ms
Execution time: 46.672 ms
create index employees_upper_last_name
on employees(upper(last_name));
explain analyze SELECT first_name, last_name
FROM employees WHERE upper(last_name) = 'LN 38'
Bitmap Heap Scan on employees
(cost=12.29..775.05 rows=500 width=16)
(actual time=0.070..0.070 rows=1 loops=1)
Recheck Cond: (upper(last_name) = 'LN 38'::text)
Heap Blocks: exact=1
-> Bitmap Index Scan on employees_upper_last_name
(cost=0.00..12.17 rows=500 width=0)
(actual time=0.065..0.065 rows=1 loops=1)
Index Cond: (upper(last_name) = 'LN 38'::text)
Planning time: 0.360 ms
Execution time: 0.091 ms
explain analyze select * from employees where employee_id = 5;
explain analyze select * from employees where employee_id + 1 = 6;
explain analyze select * from employees where employee_id = 5;
Index Scan using employees_pkey on employees (cost=0.29..8.31 rows=1 width=36) (actual time=0.014..0.015 rows=1 loops=1)
Index Cond: (employee_id = 5)
Planning time: 0.075 ms
Execution time: 0.034 ms
explain analyze select * from employees where employee_id + 1 = 6;
Seq Scan on employees (cost=0.00..2434.00 rows=500 width=36) (actual time=0.014..15.385 rows=1 loops=1)
Filter: ((employee_id + 1) = 6)
Rows Removed by Filter: 99999
Planning time: 0.066 ms
Execution time: 15.407 ms
create index enabled_employees on employees(last_name)
where enabled = 't';
int subsidiary_id;
PreparedStatement command = connection.prepareStatement(
"select first_name from employees where subsidiary_id = ?"
);
command.setInt(1, subsidiary_id);
explain plan
explain plan
SELECT first_name, last_name, subsidiary_id, phone_number
FROM employees
WHERE subsidiary_id = ?
AND UPPER(last_name) LIKE '%INA%';
--------------------------------------------------------------
|Id | Operation | Name | Rows | Cost |
--------------------------------------------------------------
| 0 | SELECT STATEMENT | | 17 | 230 |
|*1 | TABLE ACCESS BY INDEX ROWID| EMPLOYEES | 17 | 230 |
|*2 | INDEX RANGE SCAN | EMPLOYEE_PK| 333 | 2 |
--------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter(UPPER("LAST_NAME") LIKE '%INA%')
2 - access("SUBSIDIARY_ID"=TO_NUMBER(:A))
CREATE INDEX empsubupnam ON employees
(subsidiary_id, UPPER(last_name));
CREATE INDEX empsubupnam ON employees
(subsidiary_id, UPPER(last_name));
--------------------------------------------------------------
|Id | Operation | Name | Rows | Cost |
--------------------------------------------------------------
| 0 | SELECT STATEMENT | | 17 | 20 |
| 1 | TABLE ACCESS BY INDEX ROWID| EMPLOYEES | 17 | 20 |
|*2 | INDEX RANGE SCAN | EMPSUBUPNAM| 17 | 3 |
--------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("SUBSIDIARY_ID"=TO_NUMBER(:A))
filter(UPPER("LAST_NAME") LIKE '%INA%')
explain select subsidiary_id, last_name
from employees where subsidiary_id < 5;
Bitmap Heap Scan on employees
(cost=100.20..1089.58 rows=5150 width=12)
(actual time=1.102..3.429 rows=5000 loops=1)
Recheck Cond: (subsidiary_id < 5)
Heap Blocks: exact=925
-> Bitmap Index Scan on employees_subsidiary_id_employee_id_idx
(cost=0.00..98.92 rows=5150 width=0)
(actual time=0.853..0.853 rows=5000 loops=1)
Index Cond: (subsidiary_id < 5)
Planning time: 0.178 ms
Execution time: 3.867 ms
CREATE INDEX empsubupnam ON employees
(subsidiary_id, last_name);
CREATE INDEX empsubupnam ON employees
(subsidiary_id, last_name);
explain select subsidiary_id, last_name
from employees where subsidiary_id < 5;
Index Only Scan using empsubupnam on employees
(cost=0.42..170.54 rows=5150 width=12)
(actual time=0.109..0.912 rows=5000 loops=1)
Index Cond: (subsidiary_id < 5)
Heap Fetches: 0
Planning time: 0.091 ms
Execution time: 1.244 ms
explain analyze select * from sales order by sale_date;
Sort (cost=122200.04..124450.04 rows=900000 width=24)
(actual time=810.872..1007.881 rows=900000 loops=1)
Sort Key: sale_date
Sort Method: external merge Disk: 29912kB
-> Seq Scan on sales
(cost=0.00..14733.00 rows=900000 width=24)
(actual time=0.006..113.583 rows=900000 loops=1)
Planning time: 0.112 ms
Execution time: 1074.673 ms
create index sales_date on sales(sale_date);
explain analyze select * from sales order by sale_date;
create index sales_date on sales(sale_date);
explain analyze select * from sales order by sale_date;
Index Scan using sales_date on sales
(cost=0.42..29120.43 rows=900000 width=24)
(actual time=0.024..247.604 rows=900000 loops=1)
Planning time: 0.118 ms
Execution time: 315.568 ms
TL;DR: cost is not always an accurate measure
For a query that requires scanning a large fraction of the table, an explicit sort is likely to be faster than using an index because it requires less disk I/O due to following a sequential access pattern
Cost can increase because clustering factor of the new index is worse
select tablename, attname, correlation from pg_stats where
schemaname = 'public';
tablename | attname | correlation
-----------+---------------+-------------
employees | date_of_birth | 0.976956
employees | phone_number | 0.801827
employees | enabled | 0.935313
sales | employee_id | 1
sales | subsidiary_id | 0.0180426
sales | sale_id | 1
sales | amount | 1
sales | sale_date | -1
employees | employee_id | 0.976954
employees | subsidiary_id | -0.022466
employees | first_name | 0.801827
employees | last_name | 0.801827
LIMIT { count | ALL }
OFFSET start
SQL:2008
OFFSET start { ROW | ROWS }
FETCH { FIRST | NEXT } [ count ] { ROW | ROWS } ONLY
SELECT * FROM sales ORDER BY sale_date DESC OFFSET 10
FETCH NEXT 10 ROWS ONLY;
SELECT * FROM sales WHERE sale_date < ?
ORDER BY sale_date DESC FETCH FIRST 10 ROWS ONLY;
PREPARE stmt(int) AS SELECT $1;
PREPARE stmt(int) AS SELECT $1;
EXPLAIN EXECUTE stmt(1);
PREPARE stmt(int) AS SELECT $1;
EXPLAIN EXECUTE stmt(1);
Result (cost=0.00..0.01 rows=1 width=0)
PREPARE stmt(int) AS SELECT $1;
EXPLAIN EXECUTE stmt(1);
Result (cost=0.00..0.01 rows=1 width=0)
DEALLOCATE stmt;
EXPLAIN ANALYZE EXECUTE stmt(1);
Careful, It actually runs the query!
EXPLAIN ANALYZE UPDATE EMPLOYEES
SET LAST_NAME = 'x' WHERE EMPLOYEE_ID = 1;
Update on employees (cost=0.29..8.31 rows=1 width=34)
(actual time=0.261..0.261 rows=0 loops=1)
-> Index Scan using employees_pkey on employees
(cost=0.29..8.31 rows=1 width=34)
(actual time=0.136..0.138 rows=1 loops=1)
Index Cond: (employee_id = 1)
Planning time: 0.095 ms
Execution time: 0.297 ms
PostgreSQL execution plans do not show index access and filter predicates separately—both show up as “Index Cond”. That means the execution plan must be compared to the index definition to differentiate access predicates from index filter predicates.
CREATE TABLE scale_data (
section NUMERIC NOT NULL,
id1 NUMERIC NOT NULL,
id2 NUMERIC NOT NULL
);
CREATE INDEX scale_data_key ON scale_data(section, id1);
PREPARE stmt(int) AS SELECT count(*)
FROM scale_data
WHERE section = 1
AND id2 = $1; EXPLAIN EXECUTE stmt(1);
Aggregate (cost=529346.31..529346.32 rows=1 width=0)
Output: count(*)
-> Index Scan using scale_data_key on scale_data
(cost=0.00..529338.83 rows=2989 width=0)
Index Cond: (scale_data.section = 1::numeric)
Filter: (scale_data.id2 = ($1)::numeric)
CREATE INDEX scale_slow
ON scale_data (section, id1, id2);
Aggregate (cost=14215.98..14215.99 rows=1 width=0)
Output: count(*)
-> Index Scan using scale_slow on scale_data
(cost=0.00..14208.51 rows=2989 width=0)
Index Cond: (section = 1::numeric AND id2 = ($1)::numeric)
Slides & content available soon at juanignaciosl.github.io
Slides & content available soon at juanignaciosl.github.io
Use a spacebar or arrow keys to navigate