PostgreSQL

From Torben's Wiki
Jump to navigationJump to search

date handling

SELECT date_trunc('hour', t.created) as hour FROM table t;
SELECT date_trunc('month', t.created)::date AS month FROM  table t; -- covert datetime to date
SELECT * FROM table WHERE created > '2020-06-22 00:00:00+02'
SELECT * FROM table WHERE created > current_timestamp - '3 hours'::interval
SELECT * FROM table WHERE created > CURRENT_DATE - 91  -- days
SELECT * FROM table WHERE created > now() at time zone 'utc' - interval '10 minute'
# gen list of dates
SELECT i::date AS day FROM generate_series('2020-01-01', CURRENT_DATE, '1 day'::interval) i

date formatting

see [1]

SELECT to_char(last_login, 'DD.MM.YYYY') as last_login FROM mytable

convert/truncate date to month/week etc. (::data casts datetime to date)

SELECT date_trunc('week', import_date)::date, count (production_order)
  FROM cosp.c_upload_sap_order
  group by 1
  order by 1 desc

cast types

select t.id, 'matched filter1'::character varying(16) from table t
-- 1. convert timerange to seconds
-- 2. perform average
-- 3. cast to nummeric to allow for rounding
SELECT round(CAST(EXTRACT(EPOCH FROM avg (s.closure_date - s.date_of_start)) /3600 as numeric),1) AS avg_hours_total_avg from myTable s group by cloumn1

rounding of REALS

round(lower_specification_limit::numeric,6) as lower_specification_limit,

Select

EXCEPT (=MINUS in Oracle)

from [2]

SELECT column_list
FROM A
WHERE condition_a
EXCEPT 
SELECT column_list
FROM B
WHERE condition_b;

RowSum / LAG

WITH daily_sum AS (
SELECT created, sum(count_finished)
FROM c_report_timeseries
WHERE department_id > 0 
GROUP BY 1
ORDER BY 1
)
SELECT created, sum, sum - lag(sum) over (order by created) as increase
FROM daily_sum;

Grouping and stats: min, max, avg, median

select d.name "Department",
count(*) ,
min(i.modified-i.created),
max(i.modified-i.created),
avg(i.modified-i.created),
PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY (i.modified-i.created) ) as median 
FROM table i

Users

grant permission to user

-- as user myuser
GRANT USAGE ON SCHEMA myschema TO myuser2;
GRANT SELECT, INSERT ON TABLE mytable TO myuser2;

read only user

CREATE USER myUser PASSWORD 'myPassword';
GRANT USAGE ON SCHEMA mySchema TO myUser
GRANT SELECT ON ALL TABLES IN SCHEMA mySchema TO myUser

Disable user

ALTER USER myuser WITH NOLOGIN

Other

REGEXP and REGEXP_REPLACE

-- replace 
UPDATE mytable
SET remark = REPLACE(remark, '&', '&')
WHERE remark LIKE '%&%'

-- remove _|_ -> linebreak \n=10
UPDATE mytable
SET remark = REPLACE(remark, '_|_', chr(10) )
WHERE remark LIKE '%_|_%'

sprintf-alternative

goal

sprintf 'u%07d', 1

achieved via

SELECT replace( format('u%7s', 1), ' ', '0');

random item of list

SELECT (array['Yes', 'No', 'Maybe'])[floor(random() * 3 + 1)];

UPDATE via SELECT

sync table tab1 from tab2

UPDATE tab1
SET (group_id, error_de, error_en, error_hu) = (
 SELECT group_id, error_de, error_en, error_hu FROM tab2 WHERE tab1.code = tab2.code
)

update single column from another table

UPDATE q_user
SET    card_no = q_upload_user.card_no
FROM   q_upload_user  
WHERE  q_user.id = q_upload_user.id

using WITH temp table

WITH map_name_id AS 
(
SELECT short_name, min(id) min_id
 FROM   q_upload_organization
 GROUP BY short_name 
)
UPDATE q_upload_user2
SET    department_id = map_name_id.min_id
FROM   map_name_id  
WHERE  q_upload_user2.dpt_short_name_sap = map_name_id.short_name

pseudonymization

UPDATE user
SET
username = replace( format('u%7s', id), ' ', '0') -- u0000001 for id=1
,first_name = (array['Max', 'Egon', 'Lisa']) [floor(random() * 3 + 1)]
,last_name = (array['Meier', 'Müller', 'Schmidt')[floor(random() * 3 + 1)]
,email = 'no-mail@schaeffler.com'
WHERE id = 1;

Count the occurrences of a substring in a string

solution based on [3] and [4]

SELECT
 (length(str) - length(replace(str, replacestr, )) )::int
 / length(replacestr)
FROM ( VALUES
 ('foobarbaz', 'ba')
) AS t(str, replacestr);

becomes

SELECT
  sub1.id, 
  (length(sub1.str) - length(replace(sub1.str, sub1.replacestr, )) )::int  / length(sub1.replacestr) AS times
FROM
(
SELECT id, checklist AS str, '{"id":'::text AS replacestr FROM d_department
) sub1

which counts the number of (json) ids '{"id":' in a textfield

Get table column type / describe alternative

SELECT
  table_name,
  column_name,
  data_type,
  character_maximum_length
FROM
  INFORMATION_SCHEMA.COLUMNS
WHERE
  1 = 1
  AND table_schema IN ('schema1','schema2')
  AND (table_name LIKE 'c\_%' OR table_name LIKE 'd\_%')
  AND column_name = 'order'
ORDER BY
  table_name,
  column_name

Create Table Like

# without contents
CREATE TABLE myNewTable AS SELECT * FROM myTable WHERE 1=2
# without contents but including index etc (but no permissions)
CREATE TABLE myNewTable (like myTable including all);

Performance Tuning

use this to check the SQL speed:

EXPLAIN (analyze,buffers,timing) 
SELECT * FROM table WHERE ...

EXISTS

SELECT 1 WHERE EXISTS ( SELECT 1 FROM table WHERE field = :field )
--Is much faster than performing a COUNT(*) and later check if greater than 0

UNION vs UNION ALL

UNION does include a DISTINCT, while UNION ALL does not, so if you know the data is unique, use UNION ALL

SELECT WHERE IN () Statement

when passing a long list of values into a statement like

SELECT * FROM myTable where id in (1,2,3,4,5,6,7,8,...);

the statement becomes very slow, see [5]

Better use a WITH clause (=CTE = common table expressions):

WITH value_list AS (
  VALUES (1),(2),...
)
SELECT * FROM myTable WHERE id IN (SELECT * FROM value_list);

DELETE Statement

from [6]

Good: DELETE FROM foo WHERE id IN (select id from rows_to_delete);
Bad:  DELETE FROM foo WHERE id NOT IN (select id from rows_to_keep);
Good Alternative for the bad rows_to_keep path:
DELETE FROM foo 
 WHERE id IN (
  SELECT f.id FROM foo f 
  LEFT JOIN rows_to_keep k ON f.id = k.id
             WHERE k.id IS NULL
 );

delete based on multiple criteria also in another table

DELETE FROM
  table1 t1
WHERE
  EXISTS (
  SELECT
    1
  FROM
    table2 t2
  WHERE
    1 = 1
    AND t2.date < CURRENT_DATE - 30
    AND t1.col1 = t2.col1
    AND t1.col2 = t2.col2 )

Admin

SELECT version();

PostgreSQL Monitoring Cheatsheet

see [7]

Vacuum / Autovacuum

Manual

VACUUM VERBOSE ANALYZE myTable;

or with exclusive lock:

VACUUM FULL VERBOSE ANALYZE myTable;

AutoVacuum

SELECT name, setting FROM pg_settings WHERE name='autovacuum';
-- should be on
SELECT * from pg_settings where category like 'Autovacuum';
--overview by 
SELECT schemaname, relname, last_autoanalyze, last_analyze, last_autovacuum, last_vacuum, n_live_tup, n_dead_tup,*
FROM "pg_catalog"."pg_stat_all_tables"
WHERE schemaname = 'mySchema' 
ORDER BY last_autoanalyze DESC

Performance Analysis

List of currently running Queries

SELECT pid, datname, usename, state, query, query_start, now()-query_start AS runtime, client_addr, *
FROM pg_stat_activity
WHERE 1=1
ORDER BY query_start ASC ;

pg_stat_statements module

pg_stat_statements module is very nice, as it displays which queries run how often and how long, see for example [8]
Installation

  • postgresql.conf
in : section # - Kernel Resource Usage -
add: shared_preload_libraries = 'pg_stat_statements'
  • restart PostgreSQL
CREATE EXTENSION pg_stat_statements
(in each DB)
  • check in available extensions via
SELECT * FROM pg_available_extensions WHERE name = 'pg_stat_statements'

finally run

SELECT 
  (total_time / 1000 / 3600) as total_hours,
  (total_time / 1000 / calls) as avg_sec, 
  calls num_calls,
  query
FROM pg_stat_statements 
WHERE 1=1
AND query != '<insufficient privilege>' -- only by current user
-- AND lower(query) LIKE '%select *%join%'
ORDER BY 2 DESC LIMIT 100;
  • Grant access to non-admin users:
GRANT USAGE ON SCHEMA public TO myUser;
GRANT SELECT ON pg_stat_statements TO myUser;
  • Reset stats (as admin user)
SELECT pg_stat_statements_reset()

Check/analyze why a certain statement runs slow

from [9]

EXPLAIN (analyze,buffers,timing) 
 DELETE FROM myTable WHERE id IN (
  SELECT id FROM myTableToDelete
) AND id > 2150 
  AND id < 2200 ;

Find and Delete Locks

Display PIDs and Queries running on a table, from [10]

SELECT pid, state, usename, query, query_start 
  FROM pg_stat_activity 
 WHERE pid IN (
  SELECT pid FROM pg_locks l 
    JOIN pg_class t ON l.relation = t.oid AND t.relkind = 'r' 
   WHERE t.relname = 'myTable'
 );

Cancel PID (soft)

SELECT pg_cancel_backend(12345); 

Cancel PID (force)

SELECT pg_terminate_backend(12345);

Backup and Restore

Backup

pg_dump.exe -h localhost -p 5432 -U myUser --password --schema=mySchema --Format c --blobs --verbose -f myDB.dmp myDB

Restore (as admin user postgres)

ALTER SCHEMA "myDB" RENAME TO "myDB.old";
pg_restore  -h localhost -p 5432 -U postgres --password --create --dbname myDB --verbose --exit-on-error myDB.dmp
DROP SCHEMA "myDB.old" CASCADE;

Disconnect all sessions

SELECT	pg_terminate_backend(pg_stat_activity.pid)
FROM pg_stat_activity
WHERE pg_stat_activity.datname = 'myDB'
  AND pid <> pg_backend_pid();

Drop and recreate DB via psql

psql --port=5432 --username=myUser --password
DROP database IF EXISTS myDB;
CREATE database myDB;
\c myDB;
DROP SCHEMA IF EXISTS public;
CREATE SCHEMA public;
\q