A report of all the data contained into a postgres schema
Published On May 24, 2016
Need a simple and effective way read the informative content of a postgres schema?
You can use the following procedure that creates for every table, for every attribute a row.
The row contains:
- table_schema: the name of the schema,
- table_name: the name of the table,
- table_attribute: the name of the attribute,
- table_attribute_type: the type of the attribute in both declared and internal representation
- record_number: number of the record in the table,
- distinct_values: number of distinct values for the attributes,
- min_value text: minimum value of the attribute (null not counted) ,
- max_value text:maximum value of the attribute (null not counted),
- null_count: number of null occurences for the attributes,
- not_null_count: number of not null occurences for the attributes
DROP FUNCTION IF EXISTS schema_report(text);
CREATE OR REPLACE FUNCTION schema_report(the_schema text)
RETURNS TABLE(table_schema text, table_name text, table_attribute text, table_attribute_type text,record_number integer, distinct_values integer, min_value text, max_value text, null_count integer, not_null_count integer) AS
$BODY$
DECLARE
get_columns_query text;
get_columns_record record;
report_query text;
BEGIN
get_columns_query := '';
get_columns_query := get_columns_query || 'SELECT c.table_schema, c.table_name, c.column_name, c.data_type, c.udt_name ';
get_columns_query := get_columns_query || 'FROM information_schema.tables t LEFT JOIN information_schema.columns c ON t.table_schema = c.table_schema AND t.table_name=c.table_name ';
get_columns_query := get_columns_query || 'WHERE t.table_schema = ''' || the_schema || ''' AND t.table_type = ''BASE TABLE'' ';
get_columns_query := get_columns_query || 'ORDER BY c.table_schema, c.table_name, c.ordinal_position';
report_query := '';
RAISE NOTICE 'collecting informations' ;
FOR get_columns_record IN EXECUTE get_columns_query
LOOP
report_query := report_query || 'SELECT ''' || get_columns_record.table_schema || '''::text as table_schema, ';
report_query := report_query || '''' || get_columns_record.table_name || '''::text as table_name, ' ;
report_query := report_query || '''' || get_columns_record.column_name || '''::text as table_attribute, ';
report_query := report_query || '''' || get_columns_record.data_type || '/' || get_columns_record.udt_name || '''::text as table_attribute_type, ';
report_query := report_query || 'count(*)::integer as record_number, ';
report_query := report_query || 'count(distinct ' || get_columns_record.column_name || ')::integer as distinct_values, ' ;
if get_columns_record.data_type = 'boolean' then
report_query := report_query || 'min(' || get_columns_record.column_name || '::text)::text as min_value, ';
report_query := report_query || 'max(' || get_columns_record.column_name || '::text)::text as max_value, ';
elsif get_columns_record.udt_name = 'hstore' then
report_query := report_query || '''hstore not supported''::text as min_value, ';
report_query := report_query || '''hstore not supported''::text as max_value, ';
else
report_query := report_query || 'min(' || get_columns_record.column_name || ')::text as min_value, ';
report_query := report_query || 'max(' || get_columns_record.column_name || ')::text as max_value, ';
end if;
report_query := report_query || 'sum(case when ' || get_columns_record.column_name || ' is null then 1 else 0 end )::integer as null_count, ';
report_query := report_query || 'sum(case when ' || get_columns_record.column_name || ' is null then 0 else 1 end )::integer as not_null_count ';
report_query := report_query || 'FROM ' || get_columns_record.table_schema || '.' || get_columns_record.table_name || ' ';
report_query := report_query || 'UNION ALL ';
END LOOP;
report_query := left(report_query,-10);
RAISE NOTICE 'executing report query' ;
RETURN QUERY EXECUTE report_query ;
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 1000
ROWS 1000;
select *
from schema_report('public');
The execution of the procedure is a quite long task, it could be reasonable to materialize a view…
Have you found a better approach? I’m open to your suggestions.
Tags: postgres sql dba report