Skip to content

Commit

Permalink
Merge pull request #4280 from rouault/fast_proj_db_build
Browse files Browse the repository at this point in the history
Speed-up dramatically proj.db build time.
  • Loading branch information
rouault authored Oct 22, 2024
2 parents 54a7662 + 842240d commit 98974da
Show file tree
Hide file tree
Showing 9 changed files with 257 additions and 200 deletions.
4 changes: 3 additions & 1 deletion data/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,12 @@ set(ALL_SQL_IN "${CMAKE_CURRENT_BINARY_DIR}/all.sql.in")
set(PROJ_DB "${CMAKE_CURRENT_BINARY_DIR}/proj.db")
include(sql_filelist.cmake)

set(PROJ_DB_SQL_EXPECTED_MD5 "9a6b21de7b18f68719acb2260c3492fb")

add_custom_command(
OUTPUT ${PROJ_DB}
COMMAND ${CMAKE_COMMAND} -E remove -f ${PROJ_DB}
COMMAND ${CMAKE_COMMAND} "-DALL_SQL_IN=${ALL_SQL_IN}" "-DEXE_SQLITE3=${EXE_SQLITE3}" "-DPROJ_DB=${PROJ_DB}" "-DPROJ_VERSION=${PROJ_VERSION}" "-DPROJ_DB_CACHE_DIR=${PROJ_DB_CACHE_DIR}"
COMMAND ${CMAKE_COMMAND} "-DALL_SQL_IN=${ALL_SQL_IN}" "-DEXE_SQLITE3=${EXE_SQLITE3}" "-DPROJ_DB=${PROJ_DB}" "-DPROJ_VERSION=${PROJ_VERSION}" "-DPROJ_DB_CACHE_DIR=${PROJ_DB_CACHE_DIR}" "-DPROJ_DB_SQL_EXPECTED_MD5=${PROJ_DB_SQL_EXPECTED_MD5}"
-P "${CMAKE_CURRENT_SOURCE_DIR}/generate_proj_db.cmake"
COMMAND ${CMAKE_COMMAND} -E copy ${PROJ_DB} ${CMAKE_CURRENT_BINARY_DIR}/for_tests
DEPENDS ${SQL_FILES} "${CMAKE_CURRENT_SOURCE_DIR}/generate_proj_db.cmake"
Expand Down
52 changes: 42 additions & 10 deletions data/generate_proj_db.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,48 @@ function(cat IN_FILE OUT_FILE)
file(APPEND ${OUT_FILE} "${CONTENTS}")
endfunction()

file(WRITE "${ALL_SQL_IN}" "")
include(sql_filelist.cmake)
foreach(SQL_FILE ${SQL_FILES})
cat(${SQL_FILE} "${ALL_SQL_IN}")
endforeach()

# Do ${PROJ_VERSION} substitution
file(READ ${ALL_SQL_IN} CONTENTS)
string(REPLACE "\${PROJ_VERSION}" "${PROJ_VERSION}" CONTENTS_MOD "${CONTENTS}")
file(WRITE "${ALL_SQL_IN}" "${CONTENTS_MOD}")
# Generates a all.sql.in file from all the individual .sql files, taking
# into account if extra validation checks must be done before inserting data
# records
function(generate_all_sql_in ALL_SQL_IN_FILENAME EXTRA_VALIDATION OUT_MD5)
set(PROJ_DB_EXTRA_VALIDATION ${EXTRA_VALIDATION})
file(WRITE "${ALL_SQL_IN_FILENAME}" "")
include(sql_filelist.cmake)
foreach(SQL_FILE ${SQL_FILES})
cat(${SQL_FILE} "${ALL_SQL_IN_FILENAME}")
endforeach()

# Compute the MD5 before PROJ_VERSION substitution to avoid updating the
# expected MD5 if we just bump the PROJ_VERSION
configure_file("${ALL_SQL_IN_FILENAME}" "${ALL_SQL_IN_FILENAME}.tmp" NEWLINE_STYLE UNIX)
file(MD5 "${ALL_SQL_IN_FILENAME}.tmp" OUT_MD5_LOCAL)
set(${OUT_MD5} "${OUT_MD5_LOCAL}" PARENT_SCOPE)

# Do ${PROJ_VERSION} substitution
file(READ ${ALL_SQL_IN_FILENAME} CONTENTS)
string(REPLACE "\${PROJ_VERSION}" "${PROJ_VERSION}" CONTENTS_MOD "${CONTENTS}")
file(WRITE "${ALL_SQL_IN_FILENAME}" "${CONTENTS_MOD}")
endfunction()

generate_all_sql_in("${ALL_SQL_IN}" OFF PROJ_DB_SQL_MD5)

if (NOT "${PROJ_DB_SQL_MD5}" STREQUAL "${PROJ_DB_SQL_EXPECTED_MD5}")
message(WARNING "all.sql.in content has changed. Running extra validation checks when building proj.db...")

set(ALL_SQL_IN_EXTRA_VALIDATION "${ALL_SQL_IN}.extra_validation")
generate_all_sql_in("${ALL_SQL_IN_EXTRA_VALIDATION}" ON PROJ_DB_SQL_EXTRA_VALIDATION_MD5)

set(PROJ_DB_EXTRA_VALIDATION_FILENAME "${PROJ_DB}.extra_validation")
file(REMOVE "${PROJ_DB_EXTRA_VALIDATION_FILENAME}")
execute_process(COMMAND "${EXE_SQLITE3}" "${PROJ_DB_EXTRA_VALIDATION_FILENAME}"
INPUT_FILE "${ALL_SQL_IN_EXTRA_VALIDATION}"
RESULT_VARIABLE STATUS)
if(STATUS AND NOT STATUS EQUAL 0)
message(FATAL_ERROR "Build of proj.db from ${ALL_SQL_IN_EXTRA_VALIDATION} failed")
else()
message(FATAL_ERROR "Update 'set(PROJ_DB_SQL_EXPECTED_MD5 ...)' line in data/CMakeLists.txt with ${PROJ_DB_SQL_MD5} value")
endif()
endif()

if(IS_DIRECTORY ${PROJ_DB_CACHE_DIR})
set(USE_PROJ_DB_CACHE_DIR TRUE)
Expand Down
3 changes: 3 additions & 0 deletions data/sql/analyze_vacuum.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ANALYZE;

VACUUM;
178 changes: 0 additions & 178 deletions data/sql/commit.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,181 +11,3 @@ CREATE INDEX concatenated_operation_idx ON concatenated_operation(source_crs_aut

-- We don't need to select by auth_name, code so nullify them to save space
UPDATE usage SET auth_name = NULL, code = NULL;

-- Final consistency checks
CREATE TABLE dummy(foo);
CREATE TRIGGER final_checks
BEFORE INSERT ON dummy
FOR EACH ROW BEGIN

-- check that view definitions have no error
SELECT RAISE(ABORT, 'corrupt definition of coordinate_operation_view')
WHERE (SELECT 1 FROM coordinate_operation_view LIMIT 1) = 0;
SELECT RAISE(ABORT, 'corrupt definition of crs_view')
WHERE (SELECT 1 FROM crs_view LIMIT 1) = 0;
SELECT RAISE(ABORT, 'corrupt definition of object_view')
WHERE (SELECT 1 FROM object_view LIMIT 1) = 0;
SELECT RAISE(ABORT, 'corrupt definition of authority_list')
WHERE (SELECT 1 FROM authority_list LIMIT 1) = 0;

-- check that the auth_name of all objects in object_view is recorded in builtin_authorities
SELECT RAISE(ABORT, 'One or several authorities referenced in object_view are missing in builtin_authorities')
WHERE EXISTS (
SELECT DISTINCT o.auth_name FROM object_view o WHERE NOT EXISTS (
SELECT 1 FROM builtin_authorities b WHERE o.auth_name = b.auth_name)
);

-- check that a usage is registered for most objects where this is needed
SELECT RAISE(ABORT, 'One or several objects lack a corresponding record in the usage table')
WHERE EXISTS (
SELECT * FROM object_view o WHERE NOT EXISTS (
SELECT 1 FROM usage u WHERE
o.table_name = u.object_table_name AND
o.auth_name = u.object_auth_name AND
o.code = u.object_code)
AND o.table_name NOT IN ('unit_of_measure', 'axis',
'celestial_body', 'ellipsoid', 'prime_meridian', 'extent')
-- the IGNF registry lacks extent for the following objects
AND NOT (o.auth_name = 'IGNF' AND o.table_name IN ('geodetic_datum', 'vertical_datum', 'conversion'))
);

SELECT RAISE(ABORT, 'Geodetic datum ensemble defined, but no ensemble member')
WHERE EXISTS (
SELECT * FROM geodetic_datum d WHERE ensemble_accuracy IS NOT NULL
AND NOT EXISTS (SELECT 1 FROM geodetic_datum_ensemble_member WHERE
d.auth_name = ensemble_auth_name AND d.code = ensemble_code)
);

SELECT RAISE(ABORT, 'Vertical datum ensemble defined, but no ensemble member')
WHERE EXISTS (
SELECT * FROM vertical_datum d WHERE ensemble_accuracy IS NOT NULL
AND NOT EXISTS (SELECT 1 FROM vertical_datum_ensemble_member WHERE
d.auth_name = ensemble_auth_name AND d.code = ensemble_code)
);

SELECT RAISE(ABORT, 'PROJ defines an alias that exists in EPSG')
WHERE EXISTS (
SELECT * FROM (
SELECT count(*) AS count, table_name, auth_name, code, alt_name FROM alias_name
WHERE source in ('EPSG', 'PROJ')
AND NOT (source = 'PROJ' AND alt_name IN ('GGRS87', 'NAD27', 'NAD83'))
GROUP BY table_name, auth_name, code, alt_name) x WHERE count > 1
);

-- test to check that our custom grid transformation overrides are really needed
SELECT RAISE(ABORT, 'PROJ grid_transformation defined whereas EPSG has one')
WHERE EXISTS (SELECT 1 FROM grid_transformation g1
JOIN grid_transformation g2
ON g1.source_crs_auth_name = g2.source_crs_auth_name
AND g1.source_crs_code = g2.source_crs_code
AND g1.target_crs_auth_name = g2.target_crs_auth_name
AND g1.target_crs_code = g2.target_crs_code
WHERE g1.auth_name = 'PROJ' AND g1.code NOT LIKE '%_RESTRICTED_TO_VERTCRS%' AND g2.auth_name = 'EPSG' AND g2.deprecated = 0 AND (
(g1.interpolation_crs_auth_name IS NULL AND g2.interpolation_crs_auth_name IS NULL) OR
(g1.interpolation_crs_auth_name IS NOT NULL AND g2.interpolation_crs_auth_name IS NOT NULL AND
g1.interpolation_crs_auth_name = g2.interpolation_crs_auth_name AND
g1.interpolation_crs_code = g2.interpolation_crs_code)))
OR EXISTS (SELECT 1 FROM grid_transformation g1
JOIN grid_transformation g2
ON g1.source_crs_auth_name = g2.target_crs_auth_name
AND g1.source_crs_code = g2.target_crs_code
AND g1.target_crs_auth_name = g1.source_crs_auth_name
AND g1.target_crs_code = g1.source_crs_code
WHERE g1.auth_name = 'PROJ' AND g1.code NOT LIKE '%_RESTRICTED_TO_VERTCRS%' AND g2.auth_name = 'EPSG' AND g2.deprecated = 0);

SELECT RAISE(ABORT, 'Arg! there is now a EPSG:102100 object. Hack in createFromUserInput() will no longer work')
WHERE EXISTS(SELECT 1 FROM crs_view WHERE auth_name = 'EPSG' AND code = '102100');

-- check coordinate_operation_view "foreign keys"
SELECT RAISE(ABORT, 'One coordinate_operation has a broken source_crs link')
WHERE EXISTS (SELECT * FROM coordinate_operation_view cov WHERE
cov.source_crs_auth_name || cov.source_crs_code NOT IN
(SELECT auth_name || code FROM crs_view));
SELECT RAISE(ABORT, 'One coordinate_operation has a broken target_crs link')
WHERE EXISTS (SELECT * FROM coordinate_operation_view cov WHERE
cov.target_crs_auth_name || cov.target_crs_code NOT IN
(SELECT auth_name || code FROM crs_view));

-- check that transformations intersect the area of use of their source/target CRS
-- EPSG, ESRI and IGNF have cases where this does not hold.
SELECT RAISE(ABORT, 'The area of use of at least one coordinate_operation does not intersect the one of its source CRS')
WHERE EXISTS (SELECT * FROM coordinate_operation_view v, crs_view c, usage vu, extent ve, usage cu, extent ce WHERE
v.deprecated = 0 AND
(v.table_name = 'grid_transformation' OR v.auth_name NOT IN ('EPSG', 'ESRI', 'IGNF')) AND
v.source_crs_auth_name = c.auth_name AND
v.source_crs_code = c.code AND
vu.object_table_name = v.table_name AND
vu.object_auth_name = v.auth_name AND
vu.object_code = v.code AND
vu.extent_auth_name = ve.auth_name AND
vu.extent_code = ve.code AND
cu.object_table_name = c.table_name AND
cu.object_auth_name = c.auth_name AND
cu.object_code = c.code AND
cu.extent_auth_name = ce.auth_name AND
cu.extent_code = ce.code AND
NOT ((ce.south_lat < ve.north_lat AND ve.south_lat < ce.north_lat) OR
(ce.west_lon < ce.east_lon AND ve.west_lon < ve.east_lon AND
NOT (ce.west_lon < ve.east_lon AND ve.west_lon < ce.east_lon))) );
SELECT RAISE(ABORT, 'The area of use of at least one coordinate_operation does not intersect the one of its target CRS')
WHERE EXISTS (SELECT * FROM coordinate_operation_view v, crs_view c, usage vu, extent ve, usage cu, extent ce WHERE
v.deprecated = 0 AND
((v.table_name = 'grid_transformation' AND NOT (v.auth_name = 'IGNF' AND v.code = 'TSG1185'))
OR v.auth_name NOT IN ('EPSG', 'ESRI', 'IGNF')) AND
v.target_crs_auth_name = c.auth_name AND
v.target_crs_code = c.code AND
vu.object_table_name = v.table_name AND
vu.object_auth_name = v.auth_name AND
vu.object_code = v.code AND
vu.extent_auth_name = ve.auth_name AND
vu.extent_code = ve.code AND
cu.object_table_name = c.table_name AND
cu.object_auth_name = c.auth_name AND
cu.object_code = c.code AND
cu.extent_auth_name = ce.auth_name AND
cu.extent_code = ce.code AND
NOT ((ce.south_lat < ve.north_lat AND ve.south_lat < ce.north_lat) OR
(ce.west_lon < ce.east_lon AND ve.west_lon < ve.east_lon AND
NOT (ce.west_lon < ve.east_lon AND ve.west_lon < ce.east_lon))) );

-- check geoid_model table
SELECT RAISE(ABORT, 'missing GEOID99 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID99');
SELECT RAISE(ABORT, 'missing GEOID03 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID03');
SELECT RAISE(ABORT, 'missing GEOID06 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID06');
SELECT RAISE(ABORT, 'missing GEOID09 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID09');
SELECT RAISE(ABORT, 'missing GEOID12A in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID12A');
SELECT RAISE(ABORT, 'missing GEOID12B in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID12B');
SELECT RAISE(ABORT, 'missing GEOID18 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID18');

-- check presence of au_ga_AUSGeoid98.tif
SELECT RAISE(ABORT, 'missing au_ga_AUSGeoid98.tif')
WHERE NOT EXISTS(SELECT 1 FROM grid_alternatives WHERE proj_grid_name = 'au_ga_AUSGeoid98.tif');

-- check PROJ.VERSION value
SELECT RAISE(ABORT, 'Value of PROJ.VERSION entry of metadata tables not substituted by actual value')
WHERE (SELECT 1 FROM metadata WHERE key = 'PROJ.VERSION' AND value LIKE '$%');

-- Only available in sqlite >= 3.16. May be activated as soon as support for ubuntu 16 is dropped
-- check all foreign key contraints have an 'ON DELETE CASCADE'
-- SELECT RAISE(ABORT, 'FK constraint with missing "ON DELETE CASCADE"')
-- WHERE EXISTS (SELECT 1 FROM
-- pragma_foreign_key_list(name),
-- (SELECT name from sqlite_master WHERE type='table')
-- WHERE upper(on_delete) != 'CASCADE');


END;
INSERT INTO dummy DEFAULT VALUES;
DROP TRIGGER final_checks;
DROP TABLE dummy;

ANALYZE;

VACUUM;
9 changes: 9 additions & 0 deletions data/sql/conversion_triggers_hand_written.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

CREATE TRIGGER conversion_method_check_insert_trigger_orthographic
INSTEAD OF INSERT ON conversion
BEGIN

SELECT RAISE(ABORT, 'insert on conversion violates constraint: bad parameters for Orthographic')
WHERE NEW.deprecated != 1 AND NEW.method_auth_name = 'EPSG' AND NEW.method_code = '9840' AND (NEW.method_name != 'Orthographic' OR NEW.param1_auth_name != 'EPSG' OR NEW.param1_code != '8801' OR NEW.param1_name != 'Latitude of natural origin' OR NEW.param1_value IS NULL OR NEW.param1_uom_auth_name IS NULL OR NEW.param1_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param1_uom_auth_name AND code = NEW.param1_uom_code) != 'angle' OR NEW.param2_auth_name != 'EPSG' OR NEW.param2_code != '8802' OR NEW.param2_name != 'Longitude of natural origin' OR NEW.param2_value IS NULL OR NEW.param2_uom_auth_name IS NULL OR NEW.param2_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param2_uom_auth_name AND code = NEW.param2_uom_code) != 'angle' OR NEW.param3_auth_name != 'EPSG' OR NEW.param3_code != '8806' OR NEW.param3_name != 'False easting' OR NEW.param3_value IS NULL OR NEW.param3_uom_auth_name IS NULL OR NEW.param3_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param3_uom_auth_name AND code = NEW.param3_uom_code) != 'length' OR NEW.param4_auth_name != 'EPSG' OR NEW.param4_code != '8807' OR NEW.param4_name != 'False northing' OR NEW.param4_value IS NULL OR NEW.param4_uom_auth_name IS NULL OR NEW.param4_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param4_uom_auth_name AND code = NEW.param4_uom_code) != 'length' OR NEW.param5_auth_name IS NOT NULL OR NEW.param5_code IS NOT NULL OR NEW.param5_name IS NOT NULL OR NEW.param5_value IS NOT NULL OR NEW.param5_uom_auth_name IS NOT NULL OR NEW.param5_uom_code IS NOT NULL OR NEW.param6_auth_name IS NOT NULL OR NEW.param6_code IS NOT NULL OR NEW.param6_name IS NOT NULL OR NEW.param6_value IS NOT NULL OR NEW.param6_uom_auth_name IS NOT NULL OR NEW.param6_uom_code IS NOT NULL OR NEW.param7_auth_name IS NOT NULL OR NEW.param7_code IS NOT NULL OR NEW.param7_name IS NOT NULL OR NEW.param7_value IS NOT NULL OR NEW.param7_uom_auth_name IS NOT NULL OR NEW.param7_uom_code IS NOT NULL);

END;
9 changes: 0 additions & 9 deletions data/sql/customizations_early.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,3 @@ INSERT INTO "scope" VALUES('PROJ','SCOPE_UNKNOWN','Not known.',0);
INSERT INTO celestial_body VALUES('PROJ', 'EARTH', 'Earth', 6378137.0);

INSERT INTO versioned_auth_name_mapping VALUES ('IAU_2015', 'IAU', '2015', 1);

CREATE TRIGGER conversion_method_check_insert_trigger_orthographic
INSTEAD OF INSERT ON conversion
BEGIN

SELECT RAISE(ABORT, 'insert on conversion violates constraint: bad parameters for Orthographic')
WHERE NEW.deprecated != 1 AND NEW.method_auth_name = 'EPSG' AND NEW.method_code = '9840' AND (NEW.method_name != 'Orthographic' OR NEW.param1_auth_name != 'EPSG' OR NEW.param1_code != '8801' OR NEW.param1_name != 'Latitude of natural origin' OR NEW.param1_value IS NULL OR NEW.param1_uom_auth_name IS NULL OR NEW.param1_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param1_uom_auth_name AND code = NEW.param1_uom_code) != 'angle' OR NEW.param2_auth_name != 'EPSG' OR NEW.param2_code != '8802' OR NEW.param2_name != 'Longitude of natural origin' OR NEW.param2_value IS NULL OR NEW.param2_uom_auth_name IS NULL OR NEW.param2_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param2_uom_auth_name AND code = NEW.param2_uom_code) != 'angle' OR NEW.param3_auth_name != 'EPSG' OR NEW.param3_code != '8806' OR NEW.param3_name != 'False easting' OR NEW.param3_value IS NULL OR NEW.param3_uom_auth_name IS NULL OR NEW.param3_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param3_uom_auth_name AND code = NEW.param3_uom_code) != 'length' OR NEW.param4_auth_name != 'EPSG' OR NEW.param4_code != '8807' OR NEW.param4_name != 'False northing' OR NEW.param4_value IS NULL OR NEW.param4_uom_auth_name IS NULL OR NEW.param4_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param4_uom_auth_name AND code = NEW.param4_uom_code) != 'length' OR NEW.param5_auth_name IS NOT NULL OR NEW.param5_code IS NOT NULL OR NEW.param5_name IS NOT NULL OR NEW.param5_value IS NOT NULL OR NEW.param5_uom_auth_name IS NOT NULL OR NEW.param5_uom_code IS NOT NULL OR NEW.param6_auth_name IS NOT NULL OR NEW.param6_code IS NOT NULL OR NEW.param6_name IS NOT NULL OR NEW.param6_value IS NOT NULL OR NEW.param6_uom_auth_name IS NOT NULL OR NEW.param6_uom_code IS NOT NULL OR NEW.param7_auth_name IS NOT NULL OR NEW.param7_code IS NOT NULL OR NEW.param7_name IS NOT NULL OR NEW.param7_value IS NOT NULL OR NEW.param7_uom_auth_name IS NOT NULL OR NEW.param7_uom_code IS NOT NULL);

END;
Loading

0 comments on commit 98974da

Please sign in to comment.