PostgreSQL

From Torben's Wiki
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

date handling

SELECT date_trunc('hour', t.created) as hour FROM table t;
SELECT date_trunc('month', t.created)::date AS month FROM  table t; -- covert datetime to date
SELECT * FROM table WHERE created > '2020-06-22 00:00:00+02'
SELECT * FROM table WHERE created > current_timestamp - '3 hours'::interval
SELECT * FROM table WHERE created > CURRENT_DATE - 91  -- days
SELECT * FROM table WHERE created > now() at time zone 'utc' - interval '10 minute'
# gen list of dates
SELECT i::date AS day FROM generate_series('2020-01-01', CURRENT_DATE, '1 day'::interval) i
# extract hour or day from date
SELECT EXTRACT ('hour' from datecol) FROM myTable
SELECT EXTRACT ('isodow' from datecol) FROM myTable
# day of week filter
SELECT date_trunc('day',created)::date, count (*) FROM myTable
WHERE EXTRACT(DOW FROM created) = 0
GROUP BY 1 ORDER BY 1 DESC; 


Group by date and add missing days with zero count

WITH all_days AS (
    SELECT generate_series('01.01.2023', current_date, '1 day') :: date AS date
), count_per_day AS (
    SELECT
        created :: date AS date,
        COUNT(*) AS count
    FROM my_table
    GROUP BY 1
)
SELECT
    all_days.date,
    COALESCE (count, 0) AS count
FROM all_days
    LEFT OUTER JOIN count_per_day ON all_days.date = count_per_day.date
ORDER BY
    1 DESC;

date formatting

see [1]

SELECT to_char(last_login, 'DD.MM.YYYY') as last_login FROM mytable

convert/truncate date to month/week etc. (::data casts datetime to date)

SELECT date_trunc('week', import_date)::date, count (production_order)
  FROM cosp.c_upload_sap_order
  group by 1
  order by 1 desc

cast types

select t.id, 'matched filter1'::character varying(16) from table t

-- 1. convert timerange to seconds
-- 2. perform average
-- 3. cast to nummeric to allow for rounding
SELECT round(CAST(EXTRACT(EPOCH FROM avg (s.closure_date - s.date_of_start)) /3600 as numeric),1) AS avg_hours_total_avg from myTable s group by cloumn1

-- convert date range to days
SELECT EXTRACT (DAY FROM (current_date - date_due)) AS days FROM myTable

leading zeros for integer values

select TO_CHAR(plant_id, 'fm0000') from myTable

rounding of REALS

round(lower_specification_limit::numeric,6) as lower_specification_limit,

Select

EXCEPT (=MINUS in Oracle)

from [2]

SELECT column_list
FROM A
WHERE condition_a
EXCEPT 
SELECT column_list
FROM B
WHERE condition_b;

RowSum / LAG

WITH daily_sum AS (
SELECT created, sum(count_finished)
FROM c_report_timeseries
WHERE department_id > 0 
GROUP BY 1
ORDER BY 1
)
SELECT created, sum, sum - lag(sum) over (order by created) as increase
FROM daily_sum;

Grouping and stats: min, max, avg, median

select d.name "Department",
count(*) ,
min(i.modified-i.created),
max(i.modified-i.created),
avg(i.modified-i.created),
PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY (i.modified-i.created) ) as median 
FROM table i

Substring

-- 087249731000011 -> 087249731-0000-11
SELECT id, plant_id, material_number
, substring (material_number FROM 1 for 9) || '-' || substring (material_number FROM 10 for 4) || '-' || substring (material_number FROM 14 for 2) AS "MatNoNew"
FROM d_template t 
WHERE 1=1
AND t.department_id = 50012487
AND material_number NOT LIKE '%-%'
AND length(material_number) = 15
;

sequence.nextval

SELECT nextval('mysequence');

Pivot / longtable to shorttable

WITH data AS (
    SELECT
    date_trunc('week', created)::date AS "Date",
    myType,
    count (*)
    FROM myTable
    GROUP BY 1,2
)
SELECT "Date",
max("count") FILTER (WHERE myType= 'D') AS "cntD",
max("count") FILTER (WHERE myType= 'S') AS "cntS
max("count") FILTER (WHERE myType= 'M') AS "cntM"
FROM data
GROUP BY 1
ORDER BY 1 ASC

Weighted Average

WITH my_table AS (
SELECT id, property, val1, val2, val3
CASE 
  WHEN created>current_date - 365 THEN 3
  WHEN created>current_date - 730 THEN 2
  ELSE 1
END AS weighting_factor
)
SELECT
sum(val1*weighting_factor)/sum(weighting_factor) AS "val1w",
sum(val2*weighting_factor)/sum(weighting_factor) AS "val2w",
sum(val3*weighting_factor)/sum(weighting_factor) AS "val3w"
FROM my_table
GROUP BY property

Functions in plpgsql

CREATE OR REPLACE FUNCTION drm.tmp_change_plant_id(id_old int, id_new int)
 RETURNS void AS $$   
BEGIN

UPDATE myTable SET col = 123 WHERE col = 456;

END;
$$ LANGUAGE plpgsql;
;

Insert Update

Insert with conflicting IDs

insert into machine_group(id, area, number)
values (6, 6, 6),
       (7, 6, 7),
       (8, 6, 8),
       (9, 6, 9),
       (10, 6, 10)
on conflict (id) do nothing;

Users

grant permission to user

-- as user myuser
GRANT USAGE ON SCHEMA myschema TO myuser2;
GRANT SELECT, INSERT ON TABLE mytable TO myuser2;

read only user

CREATE USER myUser PASSWORD 'myPassword';
GRANT USAGE ON SCHEMA mySchema TO myUser
GRANT SELECT ON ALL TABLES IN SCHEMA mySchema TO myUser

-- needed for views that are added later
ALTER DEFAULT PRIVILEGES IN SCHEMA mySchema GRANT SELECT ON TABLES TO myUser;

Disable user

ALTER USER myuser WITH NOLOGIN

Change password

ALTER USER user_name WITH PASSWORD 'new_password';

Schema Ownership

GRANT myuser TO postgresadmin;
ALTER SCHEMA myschema OWNER TO myuser;
GRANT ALL ON SCHEMA myschema TO myuser;
REVOKE myuser FROM postgresadmin;

Other

REGEXP and REGEXP_REPLACE

-- replace 
UPDATE mytable
SET remark = REPLACE(remark, '&', '&')
WHERE remark LIKE '%&%'

-- remove _|_ -> linebreak \n=10
UPDATE mytable
SET remark = REPLACE(remark, '_|_', chr(10) )
WHERE remark LIKE '%_|_%'

sprintf-alternative

goal

sprintf 'u%07d', 1

achieved via

SELECT replace( format('u%7s', 1), ' ', '0');

random item of list

SELECT (array['Yes', 'No', 'Maybe'])[floor(random() * 3 + 1)];

UPDATE via SELECT

update single column from another table

UPDATE q_user
SET    card_no = q_upload_user.card_no
FROM   q_upload_user  
WHERE  q_user.id = q_upload_user.id

using WITH temp table

WITH map_name_id AS 
(
SELECT short_name, min(id) min_id
 FROM   q_upload_organization
 GROUP BY short_name 
)
UPDATE q_upload_user2
SET    department_id = map_name_id.min_id
FROM   map_name_id  
WHERE  q_upload_user2.dpt_short_name_sap = map_name_id.short_name

sync table tab1 from tab2

UPDATE tab1
SET (group_id, error_de, error_en, error_hu) = (
 SELECT group_id, error_de, error_en, error_hu FROM tab2 WHERE tab1.code = tab2.code
)

pseudonymization

UPDATE user
SET
username = replace( format('u%7s', id), ' ', '0') -- u0000001 for id=1
,first_name = (array['Max', 'Egon', 'Lisa']) [floor(random() * 3 + 1)]
,last_name = (array['Meier', 'Müller', 'Schmidt')[floor(random() * 3 + 1)]
,email = 'no-mail@schaeffler.com'
WHERE id = 1;

Count the occurrences of a substring in a string

solution based on [3] and [4]

SELECT
 (length(str) - length(replace(str, replacestr, )) )::int
 / length(replacestr)
FROM ( VALUES
 ('foobarbaz', 'ba')
) AS t(str, replacestr);

becomes

SELECT
  sub1.id, 
  (length(sub1.str) - length(replace(sub1.str, sub1.replacestr, )) )::int  / length(sub1.replacestr) AS times
FROM
(
SELECT id, checklist AS str, '{"id":'::text AS replacestr FROM d_department
) sub1

which counts the number of (json) ids '{"id":' in a textfield

Get table column type / describe alternative

SELECT
  table_name,
  column_name,
  data_type,
  character_maximum_length
FROM
  INFORMATION_SCHEMA.COLUMNS
WHERE
  1 = 1
  AND table_schema IN ('schema1','schema2')
  AND (table_name LIKE 'c\_%' OR table_name LIKE 'd\_%')
  AND column_name = 'order'
ORDER BY
  table_name,
  column_name

Create Table Like

# without contents
CREATE TABLE myNewTable AS SELECT * FROM myTable WHERE 1=2
# without contents but including index etc (but no permissions)
CREATE TABLE myNewTable (like myTable including all);

Concatenate / string_agg columns of multiple rows

SELECT id, STRING_AGG(c.comment, '
-----
' ORDER BY c.created ASC) AS comment
FROM comment c
GROUP BY id

Performance Tuning

use this to check the SQL speed:

EXPLAIN (analyze,buffers,timing) 
SELECT * FROM table WHERE ...

EXISTS

SELECT 1 WHERE EXISTS ( SELECT 1 FROM table WHERE field = :field )
--Is much faster than performing a COUNT(*) and later check if greater than 0

UNION vs UNION ALL

UNION does include a DISTINCT, while UNION ALL does not, so if you know the data is unique, use UNION ALL

SELECT WHERE IN () Statement

when passing a long list of values into a statement like

SELECT * FROM myTable where id in (1,2,3,4,5,6,7,8,...);

the statement becomes very slow, see [5]

Better use a WITH clause (=CTE = common table expressions):

WITH value_list AS (
  VALUES (1),(2),...
)
SELECT * FROM myTable WHERE id IN (SELECT * FROM value_list);

DELETE Statement

from [6]

Good: DELETE FROM foo WHERE id IN (select id from rows_to_delete);
Bad:  DELETE FROM foo WHERE id NOT IN (select id from rows_to_keep);
Good Alternative for the bad rows_to_keep path:
DELETE FROM foo 
 WHERE id IN (
  SELECT f.id FROM foo f 
  LEFT JOIN rows_to_keep k ON f.id = k.id
             WHERE k.id IS NULL
 );

delete based on multiple criteria also in another table

DELETE FROM
  table1 t1
WHERE
  EXISTS (
  SELECT
    1
  FROM
    table2 t2
  WHERE
    1 = 1
    AND t2.date < CURRENT_DATE - 30
    AND t1.col1 = t2.col1
    AND t1.col2 = t2.col2 )

Find Missing Index/Indices

from https://www.cybertec-postgresql.com/en/index-your-foreign-key/

SELECT c.conrelid::regclass AS "table",
       /* list of key column names in order */
       string_agg(a.attname, ',' ORDER BY x.n) AS columns,
       pg_catalog.pg_size_pretty(
          pg_catalog.pg_relation_size(c.conrelid)
       ) AS size,
       c.conname AS constraint,
       c.confrelid::regclass AS referenced_table
FROM pg_catalog.pg_constraint c
   /* enumerated key column numbers per foreign key */
   CROSS JOIN LATERAL
      unnest(c.conkey) WITH ORDINALITY AS x(attnum, n)
   /* name for each key column */
   JOIN pg_catalog.pg_attribute a
      ON a.attnum = x.attnum
         AND a.attrelid = c.conrelid
WHERE NOT EXISTS
        /* is there a matching index for the constraint? */
        (SELECT 1 FROM pg_catalog.pg_index i
         WHERE i.indrelid = c.conrelid
           /* it must not be a partial index */
           AND i.indpred IS NULL
           /* the first index columns must be the same as the
              key columns, but order doesn't matter */
           AND (i.indkey::smallint[])[0:cardinality(c.conkey)-1]
               OPERATOR(pg_catalog.@>) c.conkey)
  AND c.contype = 'f'
GROUP BY c.conrelid, c.conname, c.confrelid
ORDER BY pg_catalog.pg_relation_size(c.conrelid) DESC;

Admin

SELECT version();

PostgreSQL Monitoring Cheatsheet

see [7]

Vacuum / Autovacuum

Manual

VACUUM VERBOSE ANALYZE myTable;

or with exclusive lock:

VACUUM FULL VERBOSE ANALYZE myTable;

AutoVacuum

SELECT name, setting FROM pg_settings WHERE name='autovacuum';
-- should be on
SELECT * from pg_settings where category like 'Autovacuum';
--overview by 
SELECT schemaname, relname, last_autoanalyze, last_analyze, last_autovacuum, last_vacuum, n_live_tup, n_dead_tup,*
FROM "pg_catalog"."pg_stat_all_tables"
WHERE schemaname = 'mySchema' 
ORDER BY last_autoanalyze DESC

Performance Analysis

List of currently running Queries

SELECT pid, datname, usename, state, query, query_start, now()-query_start AS runtime, client_addr, *
FROM pg_stat_activity
WHERE 1=1
ORDER BY query_start ASC ;

pg_stat_statements module

pg_stat_statements module is very nice, as it displays which queries run how often and how long, see for example [8]
Installation

  • postgresql.conf
in : section # - Kernel Resource Usage -
add: shared_preload_libraries = 'pg_stat_statements'
  • restart PostgreSQL
CREATE EXTENSION pg_stat_statements
(in each DB)
  • check in available extensions via
SELECT * FROM pg_available_extensions WHERE name = 'pg_stat_statements'

finally run (recent postgres version)

SELECT 
  round((total_exec_time / 1000 / 3600)::numeric,2) as total_hours,
  round((total_exec_time / 1000 / calls)::numeric,3) as avg_sec,
  calls num_calls,
  query
FROM pg_stat_statements 
WHERE 1=1
AND query != '<insufficient privilege>' -- only by current user
-- AND lower(query) LIKE '%select *%join%'
ORDER BY 2 DESC LIMIT 100;

finally run (old postgres version)

SELECT 
  round((total_time / 1000 / 3600)::numeric,2) as total_hours,
  round((total_time / 1000 / calls)::numeric,3) as avg_sec,
  calls num_calls,
  query
FROM pg_stat_statements 
WHERE 1=1
AND query != '<insufficient privilege>' -- only by current user
-- AND lower(query) LIKE '%select *%join%'
ORDER BY 2 DESC LIMIT 100;
  • Grant access to non-admin users:
GRANT USAGE ON SCHEMA public TO myUser;
GRANT SELECT ON pg_stat_statements TO myUser;
  • Reset stats (as admin user)
SELECT pg_stat_statements_reset()

Check/analyze why a certain statement runs slow

from [9]

EXPLAIN (analyze,buffers,timing) 
 DELETE FROM myTable WHERE id IN (
  SELECT id FROM myTableToDelete
) AND id > 2150 
  AND id < 2200 ;


EXPLAIN ANALYZE
SELECT id FROM myTableToDelete

analyze via

psql -qAt -f explain.sql > analyze.json
http://tatiyants.com/pev/#/plans/new

Find and Delete Locks

GRANT pg_signal_backend TO postgresadmin WITH ADMIN OPTION;
SELECT pg_terminate_backend(pid)
    FROM pg_stat_activity
    WHERE pid <> pg_backend_pid();


Display PIDs and Queries running on a table, from [10]

SELECT pid, state, usename, query, query_start 
  FROM pg_stat_activity 
 WHERE pid IN (
  SELECT pid FROM pg_locks l 
    JOIN pg_class t ON l.relation = t.oid AND t.relkind = 'r' 
   WHERE t.relname = 'myTable'
 );

Cancel PID (soft)

SELECT pg_cancel_backend(12345); 

Cancel PID (force)

SELECT pg_terminate_backend(12345);

Backup and Restore

Backup

SET PGPASSWORD=myPassword
pg_dump.exe --host=localhost --port=5432 --username=myUser --dbname=myDB --schema=mySchema --Format c --blobs --verbose --file myDB.dmp 

Restore (as admin user postgres)

ALTER SCHEMA "myDB" RENAME TO "myDB.old";
SET PGPASSWORD=myPassword
pg_restore --host=localhost --port=5432 --username=postgres --create --dbname myDB --verbose --exit-on-error myDB.dmp
DROP SCHEMA "myDB.old" CASCADE;

Disconnect all sessions

SELECT	pg_terminate_backend(pg_stat_activity.pid)
FROM pg_stat_activity
WHERE pg_stat_activity.datname = 'myDB'
  AND pid <> pg_backend_pid();

Drop and recreate DB via psql

SET PGPASSWORD=myPassword
psql --port=5432 --username=myUser
DROP database IF EXISTS myDB;
CREATE database myDB;
\c myDB;
DROP SCHEMA IF EXISTS public;
CREATE SCHEMA public;
\q

Export and import single table

SET PGPASSWORD=myPassword

# export single report table
# use of pg_dump / restore is problematic, since it requires to have the same schema at the target DB, hence using COPY instead
psql.exe --host=myHost1 --port=5432 --username=myUser --dbname=myDB --schema=mySchema ^
--command="COPY myTable TO STDOUT DELIMITER '|' CSV HEADER;" > myTable.csv
# or
psql.exe --host=myHost1 --port=5432 --username=myUser --dbname=myDB --schema=mySchema ^
--command="COPY (SELECT id,name FROM myTable) TO STDOUT DELIMITER '|' CSV HEADER;" > myTable.csv

# import using COPY
psql.exe 
--host=myHost1 --port=5432 --username=myUser --dbname=myDB ^
-v ON_ERROR_STOP=1
--command="TRUNCATE TABLE myTable; COPY myTable FROM STDIN DELIMITER '|' CSV HEADER;" < myTable.csv