LineString Merging Updates (#1538)

This PR addresses two main issues introduced by the new ID-Based Merged-LineString Updates

1. Partial Indexes can only be accessed when the query matches more or less exactly and the query-planner will fail to use indexes when targeted via the join-condition and not the where-condition
    - `osm_transportation_merge_linestring_gen_z9`
    - `osm_transportation_name_network`
    - `osm_shipway_linestring`
    - `osm_aerialway_linestring`
    - `osm_waterway_linestring`
2. When intersecting updated Source-LineStrings with the existing Merged-LineStrings we join the Source-IDs of each existing Merged-LineString. This bloats the table unnecessarily and slows down bigger updates considerably.
    - This is addressed by aggregating the Source-IDs of each existing Merged-LineString into an array and concatinating these arrays when grouping them. Afterwards we add the IDs of updated SourceLineStrings and deduplicate the result before adding it to the Source-IDs-Table.
This commit is contained in:
benedikt-brandtner-bikemap
2023-06-19 10:12:57 +02:00
committed by GitHub
parent d8a264cd0c
commit 66731f3544
3 changed files with 230 additions and 138 deletions

View File

@@ -304,6 +304,12 @@ CREATE INDEX IF NOT EXISTS osm_important_waterway_linestring_gen_z9_geometry_idx
-- -- osm_important_waterway_linestring -> osm_important_waterway_linestring_gen_z10
-- -- osm_important_waterway_linestring -> osm_important_waterway_linestring_gen_z9
CREATE OR REPLACE AGGREGATE array_cat_agg(anycompatiblearray) (
SFUNC=array_cat,
STYPE=anycompatiblearray,
INITCOND = '{}'
);
CREATE TABLE IF NOT EXISTS waterway_important.changes
(
osm_id bigint,
@@ -403,7 +409,8 @@ BEGIN
-- Create a table containing all LineStrings which should be merged
CREATE TEMPORARY TABLE linestrings_to_merge AS
-- Add all Source-LineStrings affected by this update
SELECT osm_id, NULL::INTEGER AS id, geometry, name, name_en, name_de, slice_language_tags(tags) as tags
SELECT osm_id, NULL::INTEGER AS id, NULL::BIGINT[] AS source_ids, geometry, name, name_en, name_de,
slice_language_tags(tags) as tags
-- Table containing the IDs of all Source-LineStrings affected by this update
FROM (
-- Get Source-LineString-IDs of deleted or updated elements
@@ -414,23 +421,25 @@ BEGIN
ORDER BY source_id
) affected_source_linestrings
JOIN osm_waterway_linestring ON (
affected_source_linestrings.source_id = osm_waterway_linestring.osm_id AND
name <> '' AND waterway = 'river' AND ST_IsValid(geometry)
);
affected_source_linestrings.source_id = osm_waterway_linestring.osm_id
)
WHERE name <> '' AND waterway = 'river' AND ST_IsValid(geometry);
-- Drop temporary tables early to save resources
DROP TABLE affected_merged_linestrings;
-- Create index on geometry column and analyze the created table to speed up subsequent queries
CREATE INDEX ON linestrings_to_merge USING GIST (geometry);
-- Analyze the created table to speed up subsequent queries
ANALYZE linestrings_to_merge;
-- Add all Merged-LineStrings intersecting with Source-LineStrings affected by this update
INSERT INTO linestrings_to_merge
SELECT s.source_id AS osm_id, m.id, geometry, name, name_en, name_de, tags
FROM osm_important_waterway_linestring m
JOIN osm_important_waterway_linestring_source_ids s ON (m.id = s.id)
WHERE EXISTS(SELECT NULL FROM linestrings_to_merge WHERE ST_Intersects(linestrings_to_merge.geometry, m.geometry));
SELECT NULL::BIGINT AS osm_id, m.id,
ARRAY(
SELECT s.source_id FROM osm_important_waterway_linestring_source_ids s WHERE s.id = m.id
)::BIGINT[] AS source_ids,
m.geometry, m.name, m.name_en, m.name_de, m.tags
FROM linestrings_to_merge
JOIN osm_important_waterway_linestring m ON (ST_Intersects(linestrings_to_merge.geometry, m.geometry));
-- Analyze the created table to speed up subsequent queries
ANALYZE linestrings_to_merge;
@@ -467,13 +476,15 @@ BEGIN
CREATE INDEX ON clustered_linestrings_to_merge (cluster_group, cluster);
ANALYZE clustered_linestrings_to_merge;
-- Create temporary Merged-LineString to Source-LineStrings-ID column to store relations before they have been
-- Create temporary Merged-LineString to Source-LineStrings-ID columns to store relations before they have been
-- intersected
ALTER TABLE osm_important_waterway_linestring ADD COLUMN IF NOT EXISTS source_ids bigint[];
ALTER TABLE osm_important_waterway_linestring ADD COLUMN IF NOT EXISTS new_source_ids BIGINT[];
ALTER TABLE osm_important_waterway_linestring ADD COLUMN IF NOT EXISTS old_source_ids BIGINT[];
WITH inserted_linestrings AS (
-- Merge LineStrings of each cluster and insert them
INSERT INTO osm_important_waterway_linestring (geometry, source_ids, name, name_en, name_de, tags)
INSERT INTO osm_important_waterway_linestring (geometry, new_source_ids, old_source_ids, name, name_en, name_de,
tags)
SELECT (ST_Dump(ST_LineMerge(ST_Union(geometry)))).geom AS geometry,
-- We use St_Union instead of St_Collect to ensure no overlapping points exist within the geometries
-- to merge. https://postgis.net/docs/ST_Union.html
@@ -482,14 +493,15 @@ BEGIN
-- https://postgis.net/docs/ST_LineMerge.html
-- In order to not end up with a mixture of LineStrings and MultiLineStrings we dump eventual
-- MultiLineStrings via ST_Dump. https://postgis.net/docs/ST_Dump.html
array_agg(osm_id) as source_ids,
coalesce( array_agg(osm_id) FILTER (WHERE osm_id IS NOT NULL), '{}' )::BIGINT[] AS new_source_ids,
array_cat_agg(source_ids)::BIGINT[] as old_source_ids,
name,
name_en,
name_de,
tags
FROM clustered_linestrings_to_merge
GROUP BY cluster_group, cluster, name, name_en, name_de, tags
RETURNING id, source_ids, geometry
RETURNING id, new_source_ids, old_source_ids, geometry
)
-- Store OSM-IDs of Source-LineStrings by intersecting Merged-LineStrings with their sources.
-- This is required because ST_LineMerge only merges across singular intersections and groups its output into a
@@ -497,8 +509,14 @@ BEGIN
INSERT INTO osm_important_waterway_linestring_source_ids (id, source_id)
SELECT m.id, source_id
FROM (
SELECT id, unnest(source_ids) AS source_id, geometry
SELECT id, source_id, geometry
FROM inserted_linestrings
CROSS JOIN LATERAL (
SELECT DISTINCT all_source_ids.source_id
FROM unnest(
array_cat(inserted_linestrings.new_source_ids, inserted_linestrings.old_source_ids)
) AS all_source_ids(source_id)
) source_ids
) m
JOIN osm_waterway_linestring s ON (m.source_id = s.osm_id)
WHERE ST_Intersects(s.geometry, m.geometry)
@@ -507,8 +525,9 @@ BEGIN
-- Cleanup remaining table
DROP TABLE clustered_linestrings_to_merge;
-- Drop temporary Merged-LineString to Source-LineStrings-ID column
ALTER TABLE osm_important_waterway_linestring DROP COLUMN IF EXISTS source_ids;
-- Drop temporary Merged-LineString to Source-LineStrings-ID columns
ALTER TABLE osm_important_waterway_linestring DROP COLUMN IF EXISTS new_source_ids;
ALTER TABLE osm_important_waterway_linestring DROP COLUMN IF EXISTS old_source_ids;
-- noinspection SqlWithoutWhere
DELETE FROM waterway_important.changes;