PostgreSQL
Connect via single line connection string
postgresql:user:password@host:5432/database
SELECT
Date handling
Filter on date
SELECT * FROM table WHERE created > '2020-06-22 00:00:00+02'; SELECT * FROM table WHERE created > current_timestamp - '3 hours'::interval; SELECT * FROM table WHERE created > current_date - 91; -- days SELECT * FROM table WHERE created > now() at time zone 'utc' - interval '10 minute'; # day of week filter SELECT * FROM table WHERE EXTRACT(DOW FROM created) = 0;
date_trunc: convert date
SELECT date_trunc('hour', t.created) as hour FROM table t;
SELECT date_trunc('day', t.created)::date AS created_date FROM table t;
SELECT date_trunc('week', t.created)::date AS week FROM table t;
SELECT date_trunc('month', t.created)::date AS month FROM table t;
extract
SELECT EXTRACT (hour from datecol) FROM myTable SELECT EXTRACT (isodow from datecol) FROM myTable -- weekno SELECT EXTRACT (dow from datecol) FROM myTable -- weekday SELECT EXTRACT(epoch FROM closed - created) / 3600 AS hours FROM myTable -- difference in hours
other
# gen list of dates
SELECT i::date AS day FROM generate_series('2020-01-01', CURRENT_DATE, '1 day'::interval) i
Group by date and add missing days with zero count
WITH all_days AS (
SELECT generate_series('01.01.2023', current_date, '1 day') :: date AS date
), count_per_day AS (
SELECT
created :: date AS date,
COUNT(*) AS count
FROM my_table
GROUP BY 1
)
SELECT
all_days.date,
COALESCE (count, 0) AS count
FROM all_days
LEFT OUTER JOIN count_per_day ON all_days.date = count_per_day.date
ORDER BY
1 DESC;
cast types
select t.id, 'matched filter1'::character varying(16) from table t -- 1. convert timerange to seconds -- 2. perform average -- 3. cast to nummeric to allow for rounding SELECT round(CAST(EXTRACT(EPOCH FROM avg (s.closure_date - s.date_of_start)) /3600 as numeric),1) AS avg_hours_total_avg from myTable s group by cloumn1 -- convert date range to days SELECT EXTRACT (DAY FROM (current_date - date_due)) AS days FROM myTable
leading zeros for integer values
select TO_CHAR(plant_id, 'fm0000') from myTable
rounding of REALS
round(lower_specification_limit::numeric,6) as lower_specification_limit,
EXCEPT (=MINUS in Oracle)
from [1]
SELECT column_list FROM A WHERE condition_a EXCEPT SELECT column_list FROM B WHERE condition_b;
RowSum / LAG
WITH daily_sum AS ( SELECT created, sum(count_finished) FROM c_report_timeseries WHERE department_id > 0 GROUP BY 1 ORDER BY 1 ) SELECT created, sum, sum - lag(sum) over (order by created) as increase FROM daily_sum;
Grouping and stats: min, max, avg, median
select d.name "Department", count(*) , min(i.modified-i.created), max(i.modified-i.created), avg(i.modified-i.created), PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY (i.modified-i.created) ) as median EXTRACT(EPOCH FROM PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY (i.modified-i.created)))::int AS median_seconds FROM table i
Substring
-- 087249731000011 -> 087249731-0000-11 SELECT id, plant_id, material_number , substring (material_number FROM 1 for 9) || '-' || substring (material_number FROM 10 for 4) || '-' || substring (material_number FROM 14 for 2) AS "MatNoNew" FROM d_template t WHERE 1=1 AND t.department_id = 50012487 AND material_number NOT LIKE '%-%' AND length(material_number) = 15 ;
sequence.nextval
SELECT nextval('mysequence');
Pivot / longtable to shorttable
WITH data AS (
SELECT
date_trunc('week', created)::date AS "Date",
myType,
count (*)
FROM myTable
GROUP BY 1,2
)
SELECT "Date",
max("count") FILTER (WHERE myType= 'D') AS "cntD",
max("count") FILTER (WHERE myType= 'S') AS "cntS
max("count") FILTER (WHERE myType= 'M') AS "cntM"
FROM data
GROUP BY 1
ORDER BY 1 ASC
Weighted Average
WITH my_table AS ( SELECT id, property, val1, val2, val3 CASE WHEN created>current_date - 365 THEN 3 WHEN created>current_date - 730 THEN 2 ELSE 1 END AS weighting_factor ) SELECT sum(val1*weighting_factor)/sum(weighting_factor) AS "val1w", sum(val2*weighting_factor)/sum(weighting_factor) AS "val2w", sum(val3*weighting_factor)/sum(weighting_factor) AS "val3w" FROM my_table GROUP BY property
INSERT
Insert or Update if conflict
INSERT INTO login_cnt (user_id)
VALUES (%s)
ON CONFLICT (user_id, "date")
DO UPDATE SET count = login_cnt.count + 1;
-- login counter table
CREATE TABLE
login_cnt (
user_id int2 NOT NULL,
"date" date DEFAULT current_date NOT NULL,
count int2 DEFAULT 1 NOT NULL,
CONSTRAINT pkey_login_cnt PRIMARY KEY (user_id, date)
);
Insert with conflicting IDs
insert into machine_group(id, area, number)
values (6, 6, 6),
(7, 6, 7),
(8, 6, 8),
(9, 6, 9),
(10, 6, 10)
on conflict (id) do nothing;
UPDATE
REGEXP and REGEXP_REPLACE
-- replace UPDATE mytable SET remark = REPLACE(remark, '&', '&') WHERE remark LIKE '%&%' -- remove _|_ -> linebreak \n=10 UPDATE mytable SET remark = REPLACE(remark, '_|_', chr(10) ) WHERE remark LIKE '%_|_%'
UPDATE via SELECT
update single column from another table
UPDATE q_user SET card_no = q_upload_user.card_no FROM q_upload_user WHERE q_user.id = q_upload_user.id
using WITH temp table
WITH map_name_id AS ( SELECT short_name, min(id) min_id FROM q_upload_organization GROUP BY short_name ) UPDATE q_upload_user2 SET department_id = map_name_id.min_id FROM map_name_id WHERE q_upload_user2.dpt_short_name_sap = map_name_id.short_name
sync table tab1 from tab2
UPDATE tab1 SET (group_id, error_de, error_en, error_hu) = ( SELECT group_id, error_de, error_en, error_hu FROM tab2 WHERE tab1.code = tab2.code )
pseudonymization
UPDATE user
SET
username = replace( format('u%7s', id), ' ', '0') -- u0000001 for id=1
,first_name = (array['Max', 'Egon', 'Lisa']) [floor(random() * 3 + 1)]
,last_name = (array['Meier', 'Müller', 'Schmidt')[floor(random() * 3 + 1)]
,email = 'no-mail@schaeffler.com'
WHERE id = 1;
DELETE
Performance for DELETE Statement
from [2]
Good: DELETE FROM foo WHERE id IN (select id from rows_to_delete);
Bad: DELETE FROM foo WHERE id NOT IN (select id from rows_to_keep);
Good Alternative for the bad rows_to_keep path:
DELETE FROM foo
WHERE id IN (
SELECT f.id FROM foo f
LEFT JOIN rows_to_keep k ON f.id = k.id
WHERE k.id IS NULL
);
delete based on multiple criteria also in another table
DELETE FROM
table1 t1
WHERE
EXISTS (
SELECT
1
FROM
table2 t2
WHERE
1 = 1
AND t2.date < CURRENT_DATE - 30
AND t1.col1 = t2.col1
AND t1.col2 = t2.col2 )
CREATE TABLE
Create Table with auto-increment id
CREATE TABLE test_auto_id (
id serial PRIMARY KEY,
value TEXT NOT NULL
);
INSERT INTO test_auto_id (value) VALUES ('asdf');
SELECT * FROM test_auto_id ;
Create Table Like
-- without contents CREATE TABLE myNewTable AS SELECT * FROM myTable WHERE 1=2; -- without contents but including index etc (but no permissions) CREATE TABLE myNewTable (like myTable including all);
Create unlogged cache/caching table
inspired from You Don't Need a Dedicated Cache Service - PostgreSQL as a Cache
CREATE UNLOGGED TABLE my_cache (
url text UNIQUE NOT NULL,
value jsonb,
delete_at timestamp WITHOUT TIME ZONE DEFAULT now() + '24 hours'::interval );
CREATE INDEX idx_cache_key ON my_cache (url);
INSERT INTO my_cache (url, value)
VALUES ('myURL2.com', '{"key":"value"}');
FUNCTION in plpgsql
CREATE OR REPLACE FUNCTION drm.tmp_change_plant_id(id_old int, id_new int) RETURNS void AS $$ BEGIN UPDATE myTable SET col = 123 WHERE col = 456; END; $$ LANGUAGE plpgsql; ;
Users
grant permission to user
-- as user myuser GRANT USAGE ON SCHEMA myschema TO myuser2; GRANT SELECT, INSERT ON TABLE mytable TO myuser2;
read only user
CREATE USER myUser PASSWORD 'myPassword'; GRANT USAGE ON SCHEMA mySchema TO myUser GRANT SELECT ON ALL TABLES IN SCHEMA mySchema TO myUser -- needed for views that are added later ALTER DEFAULT PRIVILEGES IN SCHEMA mySchema GRANT SELECT ON TABLES TO myUser; -- default to search in another schema ALTER ROLE myUser SET search_path TO mySchema, public; -- set default for transactions to read-only ALTER USER myUser SET default_transaction_read_only = on; -- check via SELECT * FROM pg_db_role_setting -- -> unfold the results
Disable user
ALTER USER myuser WITH NOLOGIN
Change password
ALTER USER user_name WITH PASSWORD 'new_password';
Schema Ownership
GRANT myuser TO postgresadmin; ALTER SCHEMA myschema OWNER TO myuser; GRANT ALL ON SCHEMA myschema TO myuser; REVOKE myuser FROM postgresadmin;
Other
sprintf-alternative
goal
sprintf 'u%07d', 1
achieved via
SELECT replace( format('u%7s', 1), ' ', '0');
random item of list
SELECT (array['Yes', 'No', 'Maybe'])[floor(random() * 3 + 1)];
Count the occurrences of a substring in a string
SELECT
(length(str) - length(replace(str, replacestr, )) )::int
/ length(replacestr)
FROM ( VALUES
('foobarbaz', 'ba')
) AS t(str, replacestr);
becomes
SELECT
sub1.id,
(length(sub1.str) - length(replace(sub1.str, sub1.replacestr, )) )::int / length(sub1.replacestr) AS times
FROM
(
SELECT id, checklist AS str, '{"id":'::text AS replacestr FROM d_department
) sub1
which counts the number of (json) ids '{"id":' in a textfield
Get table column type / describe alternative
SELECT
table_name,
column_name,
data_type,
character_maximum_length
FROM
INFORMATION_SCHEMA.COLUMNS
WHERE
1 = 1
AND table_schema IN ('schema1','schema2')
AND (table_name LIKE 'c\_%' OR table_name LIKE 'd\_%')
AND column_name = 'order'
ORDER BY
table_name,
column_name
Concatenate / string_agg columns of multiple rows
SELECT id, STRING_AGG(c.comment, ' ----- ' ORDER BY c.created ASC) AS comment FROM comment c GROUP BY id
Performance Tuning
use this to check the SQL speed:
EXPLAIN (analyze,buffers,timing) SELECT * FROM table WHERE ...
EXISTS
SELECT 1 WHERE EXISTS ( SELECT 1 FROM table WHERE field = :field ) --Is much faster than performing a COUNT(*) and later check if greater than 0
UNION vs UNION ALL
UNION does include a DISTINCT, while UNION ALL does not, so if you know the data is unique, use UNION ALL
SELECT WHERE IN () Statement
when passing a long list of values into a statement like
SELECT * FROM myTable where id in (1,2,3,4,5,6,7,8,...);
the statement becomes very slow, see [5]
Better use a WITH clause (=CTE = common table expressions):
WITH value_list AS ( VALUES (1),(2),... ) SELECT * FROM myTable WHERE id IN (SELECT * FROM value_list);
Find Missing Index/Indices
Read https://use-the-index-luke.com/
from https://www.cybertec-postgresql.com/en/index-your-foreign-key/
SELECT c.conrelid::regclass AS "table",
/* list of key column names in order */
string_agg(a.attname, ',' ORDER BY x.n) AS columns,
pg_catalog.pg_size_pretty(
pg_catalog.pg_relation_size(c.conrelid)
) AS size,
c.conname AS constraint,
c.confrelid::regclass AS referenced_table
FROM pg_catalog.pg_constraint c
/* enumerated key column numbers per foreign key */
CROSS JOIN LATERAL
unnest(c.conkey) WITH ORDINALITY AS x(attnum, n)
/* name for each key column */
JOIN pg_catalog.pg_attribute a
ON a.attnum = x.attnum
AND a.attrelid = c.conrelid
WHERE NOT EXISTS
/* is there a matching index for the constraint? */
(SELECT 1 FROM pg_catalog.pg_index i
WHERE i.indrelid = c.conrelid
/* it must not be a partial index */
AND i.indpred IS NULL
/* the first index columns must be the same as the
key columns, but order doesn't matter */
AND (i.indkey::smallint[])[0:cardinality(c.conkey)-1]
OPERATOR(pg_catalog.@>) c.conkey)
AND c.contype = 'f'
GROUP BY c.conrelid, c.conname, c.confrelid
ORDER BY pg_catalog.pg_relation_size(c.conrelid) DESC;
Admin
SELECT version();
PostgreSQL Monitoring Cheatsheet
see [6]
Disconnect all sessions
SELECT pg_terminate_backend(pg_stat_activity.pid) FROM pg_stat_activity WHERE pg_stat_activity.datname = 'myDB' AND pid <> pg_backend_pid();
Drop and recreate DB via psql
SET PGPASSWORD=myPassword psql --port=5432 --username=myUser DROP database IF EXISTS myDB; CREATE database myDB; \c myDB; DROP SCHEMA IF EXISTS public; CREATE SCHEMA public; \q
Table size including Indices
SELECT
table_name,
pg_size_pretty(table_size) AS table_size,
pg_size_pretty(indexes_size) AS indexes_size,
pg_size_pretty(total_size) AS total_size
FROM (
SELECT
table_name,
pg_table_size(table_name) AS table_size,
pg_indexes_size(table_name) AS indexes_size,
pg_total_relation_size(table_name) AS total_size
FROM (
SELECT ('"' || table_schema || '"."' || table_name || '"') AS table_name
FROM information_schema.tables
) AS all_tables
ORDER BY total_size DESC
) AS pretty_sizes;
Find table column candidates for type enum (less than 10 different values)
DO $$
DECLARE
r RECORD;
query TEXT;
all_queries TEXT := ;
my_table_name TEXT := 'd_template';
BEGIN
FOR r IN
SELECT column_name, data_type
FROM information_schema.COLUMNS
WHERE table_name = my_table_name
AND data_type NOT IN ('bigint', 'integer', 'timestamp without time zone', 'boolean', 'USER-DEFINED')
ORDER BY column_name
LOOP
query := 'SELECT || r.column_name || AS column_name, count(*) FROM (SELECT DISTINCT ' || r.column_name || ' FROM ' || my_table_name || ' LIMIT 10) s1 HAVING count(*)<10';
--RAISE NOTICE '%', query;
IF all_queries <> THEN
all_queries := all_queries || ' UNION ALL ';
END IF;
all_queries := all_queries || query;
END LOOP;
RAISE NOTICE '%', all_queries;
--EXECUTE all_queries;
END $$;
Vacuum / Autovacuum
Manual
VACUUM VERBOSE ANALYZE myTable;
or with exclusive lock:
VACUUM FULL VERBOSE ANALYZE myTable;
AutoVacuum
SELECT name, setting FROM pg_settings WHERE name='autovacuum'; -- should be on SELECT * from pg_settings where category like 'Autovacuum'; --overview by SELECT schemaname, relname, last_autoanalyze, last_analyze, last_autovacuum, last_vacuum, n_live_tup, n_dead_tup,* FROM "pg_catalog"."pg_stat_all_tables" WHERE schemaname = 'mySchema' ORDER BY last_autoanalyze DESC
Performance Analysis
List of currently running Queries
SELECT pid, datname, usename, state, query, query_start, now()-query_start AS runtime, client_addr, application_name, * FROM pg_stat_activity WHERE 1=1 ORDER BY query_start ASC ;
pg_stat_statements module
pg_stat_statements module is very nice, as it displays which queries run how often and how long, see for example [7]
Installation
- postgresql.conf
in : section # - Kernel Resource Usage - add: shared_preload_libraries = 'pg_stat_statements'
- server parameter 'pg_stat_statements.track' must be set to 'top' or 'all'
- restart PostgreSQL
CREATE EXTENSION pg_stat_statements (in each DB)
- check in available extensions via
SELECT * FROM pg_available_extensions WHERE name = 'pg_stat_statements'
finally run (recent postgres version)
SELECT round((total_exec_time / 1000 / 3600)::numeric,2) as total_hours, round((total_exec_time / 1000 / calls)::numeric,3) as avg_sec, calls num_calls, query FROM pg_stat_statements WHERE 1=1 AND query != '<insufficient privilege>' -- only by current user -- AND lower(query) LIKE '%select *%join%' ORDER BY 2 DESC LIMIT 100;
finally run (old postgres version)
SELECT round((total_time / 1000 / 3600)::numeric,2) as total_hours, round((total_time / 1000 / calls)::numeric,3) as avg_sec, calls num_calls, query FROM pg_stat_statements WHERE 1=1 AND query != '<insufficient privilege>' -- only by current user -- AND lower(query) LIKE '%select *%join%' ORDER BY 2 DESC LIMIT 100;
- Grant access to non-admin users:
GRANT USAGE ON SCHEMA public TO myUser; GRANT SELECT ON pg_stat_statements TO myUser;
- Reset stats (as admin user)
SELECT pg_stat_statements_reset()
Check/analyze why a certain statement runs slow
from [8]
EXPLAIN (analyze,buffers,timing) DELETE FROM myTable WHERE id IN ( SELECT id FROM myTableToDelete ) AND id > 2150 AND id < 2200 ;
EXPLAIN ANALYZE SELECT id FROM myTableToDelete
analyze via
psql -qAt -f explain.sql > analyze.json http://tatiyants.com/pev/#/plans/new
Find and Delete Locks / Processes
GRANT pg_signal_backend TO postgresadmin WITH ADMIN OPTION;
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE pid <> pg_backend_pid();
-- Display PIDs and Queries running on a table, from [9]
SELECT pid, state, usename, query, query_start
FROM pg_stat_activity
WHERE pid IN (
SELECT pid FROM pg_locks l
JOIN pg_class t ON l.relation = t.oid AND t.relkind = 'r'
WHERE t.relname = 'myTable'
);
-- show all processes on current DB
SELECT datname, pid, state, query, age(clock_timestamp(), query_start) AS age
FROM pg_stat_activity
WHERE 1=1
AND state <> 'idle'
AND query NOT LIKE '% FROM pg_stat_activity %'
AND datname = current_database()
ORDER BY age;
-- show locks
SELECT t.relname, *
FROM pg_locks l
JOIN pg_class t ON l.relation = t.oid AND t.relkind = 'r'
WHERE t.relname != 'pg_class';
-- cancel process
SELECT pg_cancel_backend(<my_pid>);
COMMIT;
-- force cancel process
SELECT pg_terminate_backend(<my_pid>);
COMMIT;
-- force cancel all processes
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE pid <> pg_backend_pid() AND datname = current_database();
COMMIT;
Backup and Restore
Backup
SET PGPASSWORD=myPassword pg_dump.exe --host=localhost --port=5432 --username=myUser --dbname=myDB --schema=mySchema --Format c --blobs --verbose --file myDB.dmp
Restore (as admin user postgres)
ALTER SCHEMA "myDB" RENAME TO "myDB.old"; SET PGPASSWORD=myPassword pg_restore --host=localhost --port=5432 --username=postgres --create --dbname myDB --verbose --exit-on-error myDB.dmp DROP SCHEMA "myDB.old" CASCADE;
Export and import single table
SET PGPASSWORD=myPassword # export single report table # use of pg_dump / restore is problematic, since it requires to have the same schema at the target DB, hence using COPY instead psql.exe --host=myHost1 --port=5432 --username=myUser --dbname=myDB --schema=mySchema ^ --command="COPY myTable TO STDOUT DELIMITER '|' CSV HEADER;" > myTable.csv # or psql.exe --host=myHost1 --port=5432 --username=myUser --dbname=myDB --schema=mySchema ^ --command="COPY (SELECT id,name FROM myTable) TO STDOUT DELIMITER '|' CSV HEADER;" > myTable.csv # import using COPY psql.exe --host=myHost1 --port=5432 --username=myUser --dbname=myDB ^ -v ON_ERROR_STOP=1 --command="TRUNCATE TABLE myTable; COPY myTable FROM STDIN DELIMITER '|' CSV HEADER;" < myTable.csv