PostgreSQL: Difference between revisions
Line 456: | Line 456: | ||
http://tatiyants.com/pev/#/plans/new | http://tatiyants.com/pev/#/plans/new | ||
====Find and Delete Locks==== | ====Find and Delete Locks / Processes==== | ||
GRANT pg_signal_backend TO postgresadmin WITH ADMIN OPTION; | GRANT pg_signal_backend TO postgresadmin WITH ADMIN OPTION; | ||
SELECT pg_terminate_backend(pid) | SELECT pg_terminate_backend(pid) | ||
Line 463: | Line 463: | ||
Display PIDs and Queries running on a table, from [https://jaketrent.com/post/find-kill-locks-postgres/] | -- Display PIDs and Queries running on a table, from [https://jaketrent.com/post/find-kill-locks-postgres/] | ||
SELECT pid, state, usename, query, query_start | SELECT pid, state, usename, query, query_start | ||
FROM pg_stat_activity | FROM pg_stat_activity | ||
Line 472: | Line 472: | ||
); | ); | ||
-- show all processes on current DB | |||
SELECT pg_cancel_backend( | SELECT datname, pid, state, query, age(clock_timestamp(), query_start) AS age | ||
FROM pg_stat_activity | |||
WHERE 1=1 -- state <> 'idle' | |||
SELECT pg_terminate_backend( | AND query NOT LIKE '% FROM pg_stat_activity %' | ||
AND datname = current_database() | |||
ORDER BY age; | |||
-- show locks | |||
SELECT t.relname, * | |||
FROM pg_locks l | |||
JOIN pg_class t ON l.relation = t.oid AND t.relkind = 'r' | |||
WHERE t.relname != 'pg_class'; | |||
-- cancel process | |||
SELECT pg_cancel_backend(<my_pid>); | |||
COMMIT; | |||
-- force cancel process | |||
SELECT pg_terminate_backend(<my_pid>); | |||
COMMIT; | |||
-- force cancel all processes | |||
SELECT pg_terminate_backend(pid) | |||
FROM pg_stat_activity | |||
WHERE pid <> pg_backend_pid() AND datname = current_database(); | |||
COMMIT; | |||
===Backup and Restore=== | ===Backup and Restore=== |
Revision as of 12:13, 26 April 2024
date handling
SELECT date_trunc('hour', t.created) as hour FROM table t; SELECT date_trunc('month', t.created)::date AS month FROM table t; -- covert datetime to date SELECT * FROM table WHERE created > '2020-06-22 00:00:00+02' SELECT * FROM table WHERE created > current_timestamp - '3 hours'::interval SELECT * FROM table WHERE created > CURRENT_DATE - 91 -- days SELECT * FROM table WHERE created > now() at time zone 'utc' - interval '10 minute' # gen list of dates SELECT i::date AS day FROM generate_series('2020-01-01', CURRENT_DATE, '1 day'::interval) i # extract hour or day from date SELECT EXTRACT ('hour' from datecol) FROM myTable SELECT EXTRACT ('isodow' from datecol) FROM myTable # day of week filter SELECT date_trunc('day',created)::date, count (*) FROM myTable WHERE EXTRACT(DOW FROM created) = 0 GROUP BY 1 ORDER BY 1 DESC;
Group by date and add missing days with zero count
WITH all_days AS ( SELECT generate_series('01.01.2023', current_date, '1 day') :: date AS date ), count_per_day AS ( SELECT created :: date AS date, COUNT(*) AS count FROM my_table GROUP BY 1 ) SELECT all_days.date, COALESCE (count, 0) AS count FROM all_days LEFT OUTER JOIN count_per_day ON all_days.date = count_per_day.date ORDER BY 1 DESC;
date formatting
see [1]
SELECT to_char(last_login, 'DD.MM.YYYY') as last_login FROM mytable
convert/truncate date to month/week etc. (::data casts datetime to date)
SELECT date_trunc('week', import_date)::date, count (production_order) FROM cosp.c_upload_sap_order group by 1 order by 1 desc
cast types
select t.id, 'matched filter1'::character varying(16) from table t -- 1. convert timerange to seconds -- 2. perform average -- 3. cast to nummeric to allow for rounding SELECT round(CAST(EXTRACT(EPOCH FROM avg (s.closure_date - s.date_of_start)) /3600 as numeric),1) AS avg_hours_total_avg from myTable s group by cloumn1 -- convert date range to days SELECT EXTRACT (DAY FROM (current_date - date_due)) AS days FROM myTable
leading zeros for integer values
select TO_CHAR(plant_id, 'fm0000') from myTable
rounding of REALS
round(lower_specification_limit::numeric,6) as lower_specification_limit,
Select
EXCEPT (=MINUS in Oracle)
from [2]
SELECT column_list FROM A WHERE condition_a EXCEPT SELECT column_list FROM B WHERE condition_b;
RowSum / LAG
WITH daily_sum AS ( SELECT created, sum(count_finished) FROM c_report_timeseries WHERE department_id > 0 GROUP BY 1 ORDER BY 1 ) SELECT created, sum, sum - lag(sum) over (order by created) as increase FROM daily_sum;
Grouping and stats: min, max, avg, median
select d.name "Department", count(*) , min(i.modified-i.created), max(i.modified-i.created), avg(i.modified-i.created), PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY (i.modified-i.created) ) as median FROM table i
Substring
-- 087249731000011 -> 087249731-0000-11 SELECT id, plant_id, material_number , substring (material_number FROM 1 for 9) || '-' || substring (material_number FROM 10 for 4) || '-' || substring (material_number FROM 14 for 2) AS "MatNoNew" FROM d_template t WHERE 1=1 AND t.department_id = 50012487 AND material_number NOT LIKE '%-%' AND length(material_number) = 15 ;
sequence.nextval
SELECT nextval('mysequence');
Pivot / longtable to shorttable
WITH data AS ( SELECT date_trunc('week', created)::date AS "Date", myType, count (*) FROM myTable GROUP BY 1,2 ) SELECT "Date", max("count") FILTER (WHERE myType= 'D') AS "cntD", max("count") FILTER (WHERE myType= 'S') AS "cntS max("count") FILTER (WHERE myType= 'M') AS "cntM" FROM data GROUP BY 1 ORDER BY 1 ASC
Weighted Average
WITH my_table AS ( SELECT id, property, val1, val2, val3 CASE WHEN created>current_date - 365 THEN 3 WHEN created>current_date - 730 THEN 2 ELSE 1 END AS weighting_factor ) SELECT sum(val1*weighting_factor)/sum(weighting_factor) AS "val1w", sum(val2*weighting_factor)/sum(weighting_factor) AS "val2w", sum(val3*weighting_factor)/sum(weighting_factor) AS "val3w" FROM my_table GROUP BY property
Functions in plpgsql
CREATE OR REPLACE FUNCTION drm.tmp_change_plant_id(id_old int, id_new int) RETURNS void AS $$ BEGIN UPDATE myTable SET col = 123 WHERE col = 456; END; $$ LANGUAGE plpgsql; ;
Insert Update
Insert with conflicting IDs
insert into machine_group(id, area, number) values (6, 6, 6), (7, 6, 7), (8, 6, 8), (9, 6, 9), (10, 6, 10) on conflict (id) do nothing;
Users
grant permission to user
-- as user myuser GRANT USAGE ON SCHEMA myschema TO myuser2; GRANT SELECT, INSERT ON TABLE mytable TO myuser2;
read only user
CREATE USER myUser PASSWORD 'myPassword'; GRANT USAGE ON SCHEMA mySchema TO myUser GRANT SELECT ON ALL TABLES IN SCHEMA mySchema TO myUser -- needed for views that are added later ALTER DEFAULT PRIVILEGES IN SCHEMA mySchema GRANT SELECT ON TABLES TO myUser;
Disable user
ALTER USER myuser WITH NOLOGIN
Change password
ALTER USER user_name WITH PASSWORD 'new_password';
Schema Ownership
GRANT myuser TO postgresadmin; ALTER SCHEMA myschema OWNER TO myuser; GRANT ALL ON SCHEMA myschema TO myuser; REVOKE myuser FROM postgresadmin;
Other
REGEXP and REGEXP_REPLACE
-- replace UPDATE mytable SET remark = REPLACE(remark, '&', '&') WHERE remark LIKE '%&%' -- remove _|_ -> linebreak \n=10 UPDATE mytable SET remark = REPLACE(remark, '_|_', chr(10) ) WHERE remark LIKE '%_|_%'
sprintf-alternative
goal
sprintf 'u%07d', 1
achieved via
SELECT replace( format('u%7s', 1), ' ', '0');
random item of list
SELECT (array['Yes', 'No', 'Maybe'])[floor(random() * 3 + 1)];
UPDATE via SELECT
update single column from another table
UPDATE q_user SET card_no = q_upload_user.card_no FROM q_upload_user WHERE q_user.id = q_upload_user.id
using WITH temp table
WITH map_name_id AS ( SELECT short_name, min(id) min_id FROM q_upload_organization GROUP BY short_name ) UPDATE q_upload_user2 SET department_id = map_name_id.min_id FROM map_name_id WHERE q_upload_user2.dpt_short_name_sap = map_name_id.short_name
sync table tab1 from tab2
UPDATE tab1 SET (group_id, error_de, error_en, error_hu) = ( SELECT group_id, error_de, error_en, error_hu FROM tab2 WHERE tab1.code = tab2.code )
pseudonymization
UPDATE user SET username = replace( format('u%7s', id), ' ', '0') -- u0000001 for id=1 ,first_name = (array['Max', 'Egon', 'Lisa']) [floor(random() * 3 + 1)] ,last_name = (array['Meier', 'Müller', 'Schmidt')[floor(random() * 3 + 1)] ,email = 'no-mail@schaeffler.com' WHERE id = 1;
Count the occurrences of a substring in a string
SELECT (length(str) - length(replace(str, replacestr, )) )::int / length(replacestr) FROM ( VALUES ('foobarbaz', 'ba') ) AS t(str, replacestr);
becomes
SELECT sub1.id, (length(sub1.str) - length(replace(sub1.str, sub1.replacestr, )) )::int / length(sub1.replacestr) AS times FROM ( SELECT id, checklist AS str, '{"id":'::text AS replacestr FROM d_department ) sub1
which counts the number of (json) ids '{"id":' in a textfield
Get table column type / describe alternative
SELECT table_name, column_name, data_type, character_maximum_length FROM INFORMATION_SCHEMA.COLUMNS WHERE 1 = 1 AND table_schema IN ('schema1','schema2') AND (table_name LIKE 'c\_%' OR table_name LIKE 'd\_%') AND column_name = 'order' ORDER BY table_name, column_name
Create Table Like
# without contents CREATE TABLE myNewTable AS SELECT * FROM myTable WHERE 1=2 # without contents but including index etc (but no permissions) CREATE TABLE myNewTable (like myTable including all);
Concatenate / string_agg columns of multiple rows
SELECT id, STRING_AGG(c.comment, ' ----- ' ORDER BY c.created ASC) AS comment FROM comment c GROUP BY id
Performance Tuning
use this to check the SQL speed:
EXPLAIN (analyze,buffers,timing) SELECT * FROM table WHERE ...
EXISTS
SELECT 1 WHERE EXISTS ( SELECT 1 FROM table WHERE field = :field ) --Is much faster than performing a COUNT(*) and later check if greater than 0
UNION vs UNION ALL
UNION does include a DISTINCT, while UNION ALL does not, so if you know the data is unique, use UNION ALL
SELECT WHERE IN () Statement
when passing a long list of values into a statement like
SELECT * FROM myTable where id in (1,2,3,4,5,6,7,8,...);
the statement becomes very slow, see [5]
Better use a WITH clause (=CTE = common table expressions):
WITH value_list AS ( VALUES (1),(2),... ) SELECT * FROM myTable WHERE id IN (SELECT * FROM value_list);
DELETE Statement
from [6]
Good: DELETE FROM foo WHERE id IN (select id from rows_to_delete); Bad: DELETE FROM foo WHERE id NOT IN (select id from rows_to_keep); Good Alternative for the bad rows_to_keep path: DELETE FROM foo WHERE id IN ( SELECT f.id FROM foo f LEFT JOIN rows_to_keep k ON f.id = k.id WHERE k.id IS NULL );
delete based on multiple criteria also in another table
DELETE FROM table1 t1 WHERE EXISTS ( SELECT 1 FROM table2 t2 WHERE 1 = 1 AND t2.date < CURRENT_DATE - 30 AND t1.col1 = t2.col1 AND t1.col2 = t2.col2 )
Find Missing Index/Indices
from https://www.cybertec-postgresql.com/en/index-your-foreign-key/
SELECT c.conrelid::regclass AS "table", /* list of key column names in order */ string_agg(a.attname, ',' ORDER BY x.n) AS columns, pg_catalog.pg_size_pretty( pg_catalog.pg_relation_size(c.conrelid) ) AS size, c.conname AS constraint, c.confrelid::regclass AS referenced_table FROM pg_catalog.pg_constraint c /* enumerated key column numbers per foreign key */ CROSS JOIN LATERAL unnest(c.conkey) WITH ORDINALITY AS x(attnum, n) /* name for each key column */ JOIN pg_catalog.pg_attribute a ON a.attnum = x.attnum AND a.attrelid = c.conrelid WHERE NOT EXISTS /* is there a matching index for the constraint? */ (SELECT 1 FROM pg_catalog.pg_index i WHERE i.indrelid = c.conrelid /* it must not be a partial index */ AND i.indpred IS NULL /* the first index columns must be the same as the key columns, but order doesn't matter */ AND (i.indkey::smallint[])[0:cardinality(c.conkey)-1] OPERATOR(pg_catalog.@>) c.conkey) AND c.contype = 'f' GROUP BY c.conrelid, c.conname, c.confrelid ORDER BY pg_catalog.pg_relation_size(c.conrelid) DESC;
Admin
SELECT version();
PostgreSQL Monitoring Cheatsheet
see [7]
Vacuum / Autovacuum
Manual
VACUUM VERBOSE ANALYZE myTable;
or with exclusive lock:
VACUUM FULL VERBOSE ANALYZE myTable;
AutoVacuum
SELECT name, setting FROM pg_settings WHERE name='autovacuum'; -- should be on SELECT * from pg_settings where category like 'Autovacuum'; --overview by SELECT schemaname, relname, last_autoanalyze, last_analyze, last_autovacuum, last_vacuum, n_live_tup, n_dead_tup,* FROM "pg_catalog"."pg_stat_all_tables" WHERE schemaname = 'mySchema' ORDER BY last_autoanalyze DESC
Performance Analysis
List of currently running Queries
SELECT pid, datname, usename, state, query, query_start, now()-query_start AS runtime, client_addr, * FROM pg_stat_activity WHERE 1=1 ORDER BY query_start ASC ;
pg_stat_statements module
pg_stat_statements module is very nice, as it displays which queries run how often and how long, see for example [8]
Installation
- postgresql.conf
in : section # - Kernel Resource Usage - add: shared_preload_libraries = 'pg_stat_statements'
- restart PostgreSQL
CREATE EXTENSION pg_stat_statements (in each DB)
- check in available extensions via
SELECT * FROM pg_available_extensions WHERE name = 'pg_stat_statements'
finally run (recent postgres version)
SELECT round((total_exec_time / 1000 / 3600)::numeric,2) as total_hours, round((total_exec_time / 1000 / calls)::numeric,3) as avg_sec, calls num_calls, query FROM pg_stat_statements WHERE 1=1 AND query != '<insufficient privilege>' -- only by current user -- AND lower(query) LIKE '%select *%join%' ORDER BY 2 DESC LIMIT 100;
finally run (old postgres version)
SELECT round((total_time / 1000 / 3600)::numeric,2) as total_hours, round((total_time / 1000 / calls)::numeric,3) as avg_sec, calls num_calls, query FROM pg_stat_statements WHERE 1=1 AND query != '<insufficient privilege>' -- only by current user -- AND lower(query) LIKE '%select *%join%' ORDER BY 2 DESC LIMIT 100;
- Grant access to non-admin users:
GRANT USAGE ON SCHEMA public TO myUser; GRANT SELECT ON pg_stat_statements TO myUser;
- Reset stats (as admin user)
SELECT pg_stat_statements_reset()
Check/analyze why a certain statement runs slow
from [9]
EXPLAIN (analyze,buffers,timing) DELETE FROM myTable WHERE id IN ( SELECT id FROM myTableToDelete ) AND id > 2150 AND id < 2200 ;
EXPLAIN ANALYZE SELECT id FROM myTableToDelete
analyze via
psql -qAt -f explain.sql > analyze.json http://tatiyants.com/pev/#/plans/new
Find and Delete Locks / Processes
GRANT pg_signal_backend TO postgresadmin WITH ADMIN OPTION; SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE pid <> pg_backend_pid();
-- Display PIDs and Queries running on a table, from [10]
SELECT pid, state, usename, query, query_start FROM pg_stat_activity WHERE pid IN ( SELECT pid FROM pg_locks l JOIN pg_class t ON l.relation = t.oid AND t.relkind = 'r' WHERE t.relname = 'myTable' );
-- show all processes on current DB SELECT datname, pid, state, query, age(clock_timestamp(), query_start) AS age FROM pg_stat_activity WHERE 1=1 -- state <> 'idle' AND query NOT LIKE '% FROM pg_stat_activity %' AND datname = current_database() ORDER BY age; -- show locks SELECT t.relname, * FROM pg_locks l JOIN pg_class t ON l.relation = t.oid AND t.relkind = 'r' WHERE t.relname != 'pg_class'; -- cancel process SELECT pg_cancel_backend(<my_pid>); COMMIT; -- force cancel process SELECT pg_terminate_backend(<my_pid>); COMMIT; -- force cancel all processes SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE pid <> pg_backend_pid() AND datname = current_database(); COMMIT;
Backup and Restore
Backup
SET PGPASSWORD=myPassword pg_dump.exe --host=localhost --port=5432 --username=myUser --dbname=myDB --schema=mySchema --Format c --blobs --verbose --file myDB.dmp
Restore (as admin user postgres)
ALTER SCHEMA "myDB" RENAME TO "myDB.old"; SET PGPASSWORD=myPassword pg_restore --host=localhost --port=5432 --username=postgres --create --dbname myDB --verbose --exit-on-error myDB.dmp DROP SCHEMA "myDB.old" CASCADE;
Disconnect all sessions
SELECT pg_terminate_backend(pg_stat_activity.pid) FROM pg_stat_activity WHERE pg_stat_activity.datname = 'myDB' AND pid <> pg_backend_pid();
Drop and recreate DB via psql
SET PGPASSWORD=myPassword psql --port=5432 --username=myUser DROP database IF EXISTS myDB; CREATE database myDB; \c myDB; DROP SCHEMA IF EXISTS public; CREATE SCHEMA public; \q
Export and import single table
SET PGPASSWORD=myPassword # export single report table # use of pg_dump / restore is problematic, since it requires to have the same schema at the target DB, hence using COPY instead psql.exe --host=myHost1 --port=5432 --username=myUser --dbname=myDB --schema=mySchema ^ --command="COPY myTable TO STDOUT DELIMITER '|' CSV HEADER;" > myTable.csv # or psql.exe --host=myHost1 --port=5432 --username=myUser --dbname=myDB --schema=mySchema ^ --command="COPY (SELECT id,name FROM myTable) TO STDOUT DELIMITER '|' CSV HEADER;" > myTable.csv # import using COPY psql.exe --host=myHost1 --port=5432 --username=myUser --dbname=myDB ^ -v ON_ERROR_STOP=1 --command="TRUNCATE TABLE myTable; COPY myTable FROM STDIN DELIMITER '|' CSV HEADER;" < myTable.csv