Skip to content

Commit

Permalink
Add a PROJ_DB_FAST_BUILD=ON/OFF CMake option (default OFF)
Browse files Browse the repository at this point in the history
"Trigger" for this (pun intented) is that most of the time spent while
building GDAL Docker image when cross-building to arm64 is spent on building
proj.db (close to 7.5h for a target Ubuntu 24.04 arm64 !). Setting this
new option should cut that to a few minutes.

```

.. option:: PROJ_DB_FAST_BUILD=OFF

    .. versionadded:: 9.5.1

    By default, creation of :file:`proj.db` involves inserting consistency check
    triggers before inserting data records, to be able to catch potential
    inconsistencies. Such checks are useful for core PROJ developers when they
    update the database content, or for advanced PROJ users that customize the
    content of the database. However those checks come with a non-negligible cost.
    On modern hardware, building :file:`proj.db` with those checks enabled takes
    about 50 to 60 seconds (and on scenarios where PROJ is built for other
    architectures with full emulation, several hours). When setting this option
    to ON, those triggers are inserted after data records, which decreases the
    build time to about 3 seconds.
    In short, setting this option to ON is safe if you do not customize yourself
    the .sql files used to build :file:`proj.db`
```

Timings on my machine:

- before this PR:

```
$ time make generate_proj_db
[100%] Generating proj.db
[100%] Built target generate_proj_db

real	0m54,752s
user	0m53,968s
sys	0m0,648s

$ md5sum data/proj.db
beecdc018b4a5131229709b3c7747036  data/proj.db

$ echo ".dump" | sqlite3 data/proj.db | md5sum
64e446efdc5c18e398cc7b6b2e4b3086  -
```

- with this PR, not setting PROJ_DB_FAST_BUILD (so OFF):

Same as above

- with this PR, setting PROJ_DB_FAST_BUILD=ON

```
$ cmake .. -DPROJ_DB_FAST_BUILD=ON

$ time make generate_proj_db
[100%] Generating proj.db
[100%] Built target generate_proj_db

real	0m3,243s
user	0m2,876s
sys	0m0,204s

$ md5sum data/proj.db
1955dfdc3f7abada3890bf9b7592770a  data/proj.db

$ echo ".dump" | sqlite3 data/proj.db | md5sum
64e446efdc5c18e398cc7b6b2e4b3086  -
```

One can notice that the binary content of proj.db is not exactly the
same, however the result of dumping it to SQL is exactly the same. The
reason for the slight difference is that in PROJ_DB_FAST_BUILD=ON we
also skip creating a fake table and trigger, which influences the
"schema version number" of the SQLite3 database, which is a non
significant difference.

Cf the diff of the ``od -x`` output, which shows that only a few bytes
in the SQLite3 header are different.

```
$ diff -u proj.db.slow.txt proj.db.fast.txt
--- proj.db.slow.txt	2024-10-16 08:50:07.211601573 +0200
+++ proj.db.fast.txt	2024-10-16 08:50:16.155615860 +0200
@@ -1,9 +1,9 @@
 0000000 5153 694c 6574 6620 726f 616d 2074 0033
-0000020 0010 0101 4000 2020 0000 1100 0000 d208
-0000040 0000 0000 0000 0000 0000 6700 0000 0400
+0000020 0010 0101 4000 2020 0000 2500 0000 d208
+0000040 0000 0000 0000 0000 0000 6300 0000 0400
 0000060 0000 0000 0000 0000 0000 0100 0000 0000
 0000100 0000 0000 0000 0000 0000 0000 0000 0000
-0000120 0000 0000 0000 0000 0000 0000 0000 1100
+0000120 0000 0000 0000 0000 0000 0000 0000 2500
 0000140 2e00 d93f 0005 0000 0f1a 007e 0000 d208
 0000160 fb0f f60f f10f ec0f e70f e20f dd0f d80f
 0000200 d30f ce0f c90f c40f bf0f ba0f b50f b00f
```
  • Loading branch information
rouault committed Oct 16, 2024
1 parent 54a7662 commit 97c9547
Show file tree
Hide file tree
Showing 10 changed files with 232 additions and 192 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/clang_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ jobs:
id: regular
PROJ_CMAKE_BUILD_OPTIONS: ""

- name: Without TIFF
- name: Without TIFF, with PROJ_DB_FAST_BUILD=ON
id: without_tiff
PROJ_CMAKE_BUILD_OPTIONS: "-DENABLE_TIFF=OFF"
PROJ_CMAKE_BUILD_OPTIONS: "-DENABLE_TIFF=OFF -DPROJ_DB_FAST_BUILD=ON"

if: "!contains(github.event.head_commit.message, '[ci skip]') && !contains(github.event.head_commit.message, '[skip ci]')"
steps:
Expand Down
2 changes: 1 addition & 1 deletion data/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ include(sql_filelist.cmake)
add_custom_command(
OUTPUT ${PROJ_DB}
COMMAND ${CMAKE_COMMAND} -E remove -f ${PROJ_DB}
COMMAND ${CMAKE_COMMAND} "-DALL_SQL_IN=${ALL_SQL_IN}" "-DEXE_SQLITE3=${EXE_SQLITE3}" "-DPROJ_DB=${PROJ_DB}" "-DPROJ_VERSION=${PROJ_VERSION}" "-DPROJ_DB_CACHE_DIR=${PROJ_DB_CACHE_DIR}"
COMMAND ${CMAKE_COMMAND} "-DALL_SQL_IN=${ALL_SQL_IN}" "-DEXE_SQLITE3=${EXE_SQLITE3}" "-DPROJ_DB=${PROJ_DB}" "-DPROJ_VERSION=${PROJ_VERSION}" "-DPROJ_DB_CACHE_DIR=${PROJ_DB_CACHE_DIR}" "-DPROJ_DB_FAST_BUILD=${PROJ_DB_FAST_BUILD}"
-P "${CMAKE_CURRENT_SOURCE_DIR}/generate_proj_db.cmake"
COMMAND ${CMAKE_COMMAND} -E copy ${PROJ_DB} ${CMAKE_CURRENT_BINARY_DIR}/for_tests
DEPENDS ${SQL_FILES} "${CMAKE_CURRENT_SOURCE_DIR}/generate_proj_db.cmake"
Expand Down
3 changes: 3 additions & 0 deletions data/sql/analyze_vacuum.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ANALYZE;

VACUUM;
178 changes: 0 additions & 178 deletions data/sql/commit.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,181 +11,3 @@ CREATE INDEX concatenated_operation_idx ON concatenated_operation(source_crs_aut

-- We don't need to select by auth_name, code so nullify them to save space
UPDATE usage SET auth_name = NULL, code = NULL;

-- Final consistency checks
CREATE TABLE dummy(foo);
CREATE TRIGGER final_checks
BEFORE INSERT ON dummy
FOR EACH ROW BEGIN

-- check that view definitions have no error
SELECT RAISE(ABORT, 'corrupt definition of coordinate_operation_view')
WHERE (SELECT 1 FROM coordinate_operation_view LIMIT 1) = 0;
SELECT RAISE(ABORT, 'corrupt definition of crs_view')
WHERE (SELECT 1 FROM crs_view LIMIT 1) = 0;
SELECT RAISE(ABORT, 'corrupt definition of object_view')
WHERE (SELECT 1 FROM object_view LIMIT 1) = 0;
SELECT RAISE(ABORT, 'corrupt definition of authority_list')
WHERE (SELECT 1 FROM authority_list LIMIT 1) = 0;

-- check that the auth_name of all objects in object_view is recorded in builtin_authorities
SELECT RAISE(ABORT, 'One or several authorities referenced in object_view are missing in builtin_authorities')
WHERE EXISTS (
SELECT DISTINCT o.auth_name FROM object_view o WHERE NOT EXISTS (
SELECT 1 FROM builtin_authorities b WHERE o.auth_name = b.auth_name)
);

-- check that a usage is registered for most objects where this is needed
SELECT RAISE(ABORT, 'One or several objects lack a corresponding record in the usage table')
WHERE EXISTS (
SELECT * FROM object_view o WHERE NOT EXISTS (
SELECT 1 FROM usage u WHERE
o.table_name = u.object_table_name AND
o.auth_name = u.object_auth_name AND
o.code = u.object_code)
AND o.table_name NOT IN ('unit_of_measure', 'axis',
'celestial_body', 'ellipsoid', 'prime_meridian', 'extent')
-- the IGNF registry lacks extent for the following objects
AND NOT (o.auth_name = 'IGNF' AND o.table_name IN ('geodetic_datum', 'vertical_datum', 'conversion'))
);

SELECT RAISE(ABORT, 'Geodetic datum ensemble defined, but no ensemble member')
WHERE EXISTS (
SELECT * FROM geodetic_datum d WHERE ensemble_accuracy IS NOT NULL
AND NOT EXISTS (SELECT 1 FROM geodetic_datum_ensemble_member WHERE
d.auth_name = ensemble_auth_name AND d.code = ensemble_code)
);

SELECT RAISE(ABORT, 'Vertical datum ensemble defined, but no ensemble member')
WHERE EXISTS (
SELECT * FROM vertical_datum d WHERE ensemble_accuracy IS NOT NULL
AND NOT EXISTS (SELECT 1 FROM vertical_datum_ensemble_member WHERE
d.auth_name = ensemble_auth_name AND d.code = ensemble_code)
);

SELECT RAISE(ABORT, 'PROJ defines an alias that exists in EPSG')
WHERE EXISTS (
SELECT * FROM (
SELECT count(*) AS count, table_name, auth_name, code, alt_name FROM alias_name
WHERE source in ('EPSG', 'PROJ')
AND NOT (source = 'PROJ' AND alt_name IN ('GGRS87', 'NAD27', 'NAD83'))
GROUP BY table_name, auth_name, code, alt_name) x WHERE count > 1
);

-- test to check that our custom grid transformation overrides are really needed
SELECT RAISE(ABORT, 'PROJ grid_transformation defined whereas EPSG has one')
WHERE EXISTS (SELECT 1 FROM grid_transformation g1
JOIN grid_transformation g2
ON g1.source_crs_auth_name = g2.source_crs_auth_name
AND g1.source_crs_code = g2.source_crs_code
AND g1.target_crs_auth_name = g2.target_crs_auth_name
AND g1.target_crs_code = g2.target_crs_code
WHERE g1.auth_name = 'PROJ' AND g1.code NOT LIKE '%_RESTRICTED_TO_VERTCRS%' AND g2.auth_name = 'EPSG' AND g2.deprecated = 0 AND (
(g1.interpolation_crs_auth_name IS NULL AND g2.interpolation_crs_auth_name IS NULL) OR
(g1.interpolation_crs_auth_name IS NOT NULL AND g2.interpolation_crs_auth_name IS NOT NULL AND
g1.interpolation_crs_auth_name = g2.interpolation_crs_auth_name AND
g1.interpolation_crs_code = g2.interpolation_crs_code)))
OR EXISTS (SELECT 1 FROM grid_transformation g1
JOIN grid_transformation g2
ON g1.source_crs_auth_name = g2.target_crs_auth_name
AND g1.source_crs_code = g2.target_crs_code
AND g1.target_crs_auth_name = g1.source_crs_auth_name
AND g1.target_crs_code = g1.source_crs_code
WHERE g1.auth_name = 'PROJ' AND g1.code NOT LIKE '%_RESTRICTED_TO_VERTCRS%' AND g2.auth_name = 'EPSG' AND g2.deprecated = 0);

SELECT RAISE(ABORT, 'Arg! there is now a EPSG:102100 object. Hack in createFromUserInput() will no longer work')
WHERE EXISTS(SELECT 1 FROM crs_view WHERE auth_name = 'EPSG' AND code = '102100');

-- check coordinate_operation_view "foreign keys"
SELECT RAISE(ABORT, 'One coordinate_operation has a broken source_crs link')
WHERE EXISTS (SELECT * FROM coordinate_operation_view cov WHERE
cov.source_crs_auth_name || cov.source_crs_code NOT IN
(SELECT auth_name || code FROM crs_view));
SELECT RAISE(ABORT, 'One coordinate_operation has a broken target_crs link')
WHERE EXISTS (SELECT * FROM coordinate_operation_view cov WHERE
cov.target_crs_auth_name || cov.target_crs_code NOT IN
(SELECT auth_name || code FROM crs_view));

-- check that transformations intersect the area of use of their source/target CRS
-- EPSG, ESRI and IGNF have cases where this does not hold.
SELECT RAISE(ABORT, 'The area of use of at least one coordinate_operation does not intersect the one of its source CRS')
WHERE EXISTS (SELECT * FROM coordinate_operation_view v, crs_view c, usage vu, extent ve, usage cu, extent ce WHERE
v.deprecated = 0 AND
(v.table_name = 'grid_transformation' OR v.auth_name NOT IN ('EPSG', 'ESRI', 'IGNF')) AND
v.source_crs_auth_name = c.auth_name AND
v.source_crs_code = c.code AND
vu.object_table_name = v.table_name AND
vu.object_auth_name = v.auth_name AND
vu.object_code = v.code AND
vu.extent_auth_name = ve.auth_name AND
vu.extent_code = ve.code AND
cu.object_table_name = c.table_name AND
cu.object_auth_name = c.auth_name AND
cu.object_code = c.code AND
cu.extent_auth_name = ce.auth_name AND
cu.extent_code = ce.code AND
NOT ((ce.south_lat < ve.north_lat AND ve.south_lat < ce.north_lat) OR
(ce.west_lon < ce.east_lon AND ve.west_lon < ve.east_lon AND
NOT (ce.west_lon < ve.east_lon AND ve.west_lon < ce.east_lon))) );
SELECT RAISE(ABORT, 'The area of use of at least one coordinate_operation does not intersect the one of its target CRS')
WHERE EXISTS (SELECT * FROM coordinate_operation_view v, crs_view c, usage vu, extent ve, usage cu, extent ce WHERE
v.deprecated = 0 AND
((v.table_name = 'grid_transformation' AND NOT (v.auth_name = 'IGNF' AND v.code = 'TSG1185'))
OR v.auth_name NOT IN ('EPSG', 'ESRI', 'IGNF')) AND
v.target_crs_auth_name = c.auth_name AND
v.target_crs_code = c.code AND
vu.object_table_name = v.table_name AND
vu.object_auth_name = v.auth_name AND
vu.object_code = v.code AND
vu.extent_auth_name = ve.auth_name AND
vu.extent_code = ve.code AND
cu.object_table_name = c.table_name AND
cu.object_auth_name = c.auth_name AND
cu.object_code = c.code AND
cu.extent_auth_name = ce.auth_name AND
cu.extent_code = ce.code AND
NOT ((ce.south_lat < ve.north_lat AND ve.south_lat < ce.north_lat) OR
(ce.west_lon < ce.east_lon AND ve.west_lon < ve.east_lon AND
NOT (ce.west_lon < ve.east_lon AND ve.west_lon < ce.east_lon))) );

-- check geoid_model table
SELECT RAISE(ABORT, 'missing GEOID99 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID99');
SELECT RAISE(ABORT, 'missing GEOID03 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID03');
SELECT RAISE(ABORT, 'missing GEOID06 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID06');
SELECT RAISE(ABORT, 'missing GEOID09 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID09');
SELECT RAISE(ABORT, 'missing GEOID12A in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID12A');
SELECT RAISE(ABORT, 'missing GEOID12B in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID12B');
SELECT RAISE(ABORT, 'missing GEOID18 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID18');

-- check presence of au_ga_AUSGeoid98.tif
SELECT RAISE(ABORT, 'missing au_ga_AUSGeoid98.tif')
WHERE NOT EXISTS(SELECT 1 FROM grid_alternatives WHERE proj_grid_name = 'au_ga_AUSGeoid98.tif');

-- check PROJ.VERSION value
SELECT RAISE(ABORT, 'Value of PROJ.VERSION entry of metadata tables not substituted by actual value')
WHERE (SELECT 1 FROM metadata WHERE key = 'PROJ.VERSION' AND value LIKE '$%');

-- Only available in sqlite >= 3.16. May be activated as soon as support for ubuntu 16 is dropped
-- check all foreign key contraints have an 'ON DELETE CASCADE'
-- SELECT RAISE(ABORT, 'FK constraint with missing "ON DELETE CASCADE"')
-- WHERE EXISTS (SELECT 1 FROM
-- pragma_foreign_key_list(name),
-- (SELECT name from sqlite_master WHERE type='table')
-- WHERE upper(on_delete) != 'CASCADE');


END;
INSERT INTO dummy DEFAULT VALUES;
DROP TRIGGER final_checks;
DROP TABLE dummy;

ANALYZE;

VACUUM;
9 changes: 9 additions & 0 deletions data/sql/conversion_triggers_hand_written.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

CREATE TRIGGER conversion_method_check_insert_trigger_orthographic
INSTEAD OF INSERT ON conversion
BEGIN

SELECT RAISE(ABORT, 'insert on conversion violates constraint: bad parameters for Orthographic')
WHERE NEW.deprecated != 1 AND NEW.method_auth_name = 'EPSG' AND NEW.method_code = '9840' AND (NEW.method_name != 'Orthographic' OR NEW.param1_auth_name != 'EPSG' OR NEW.param1_code != '8801' OR NEW.param1_name != 'Latitude of natural origin' OR NEW.param1_value IS NULL OR NEW.param1_uom_auth_name IS NULL OR NEW.param1_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param1_uom_auth_name AND code = NEW.param1_uom_code) != 'angle' OR NEW.param2_auth_name != 'EPSG' OR NEW.param2_code != '8802' OR NEW.param2_name != 'Longitude of natural origin' OR NEW.param2_value IS NULL OR NEW.param2_uom_auth_name IS NULL OR NEW.param2_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param2_uom_auth_name AND code = NEW.param2_uom_code) != 'angle' OR NEW.param3_auth_name != 'EPSG' OR NEW.param3_code != '8806' OR NEW.param3_name != 'False easting' OR NEW.param3_value IS NULL OR NEW.param3_uom_auth_name IS NULL OR NEW.param3_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param3_uom_auth_name AND code = NEW.param3_uom_code) != 'length' OR NEW.param4_auth_name != 'EPSG' OR NEW.param4_code != '8807' OR NEW.param4_name != 'False northing' OR NEW.param4_value IS NULL OR NEW.param4_uom_auth_name IS NULL OR NEW.param4_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param4_uom_auth_name AND code = NEW.param4_uom_code) != 'length' OR NEW.param5_auth_name IS NOT NULL OR NEW.param5_code IS NOT NULL OR NEW.param5_name IS NOT NULL OR NEW.param5_value IS NOT NULL OR NEW.param5_uom_auth_name IS NOT NULL OR NEW.param5_uom_code IS NOT NULL OR NEW.param6_auth_name IS NOT NULL OR NEW.param6_code IS NOT NULL OR NEW.param6_name IS NOT NULL OR NEW.param6_value IS NOT NULL OR NEW.param6_uom_auth_name IS NOT NULL OR NEW.param6_uom_code IS NOT NULL OR NEW.param7_auth_name IS NOT NULL OR NEW.param7_code IS NOT NULL OR NEW.param7_name IS NOT NULL OR NEW.param7_value IS NOT NULL OR NEW.param7_uom_auth_name IS NOT NULL OR NEW.param7_uom_code IS NOT NULL);

END;
9 changes: 0 additions & 9 deletions data/sql/customizations_early.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,3 @@ INSERT INTO "scope" VALUES('PROJ','SCOPE_UNKNOWN','Not known.',0);
INSERT INTO celestial_body VALUES('PROJ', 'EARTH', 'Earth', 6378137.0);

INSERT INTO versioned_auth_name_mapping VALUES ('IAU_2015', 'IAU', '2015', 1);

CREATE TRIGGER conversion_method_check_insert_trigger_orthographic
INSTEAD OF INSERT ON conversion
BEGIN

SELECT RAISE(ABORT, 'insert on conversion violates constraint: bad parameters for Orthographic')
WHERE NEW.deprecated != 1 AND NEW.method_auth_name = 'EPSG' AND NEW.method_code = '9840' AND (NEW.method_name != 'Orthographic' OR NEW.param1_auth_name != 'EPSG' OR NEW.param1_code != '8801' OR NEW.param1_name != 'Latitude of natural origin' OR NEW.param1_value IS NULL OR NEW.param1_uom_auth_name IS NULL OR NEW.param1_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param1_uom_auth_name AND code = NEW.param1_uom_code) != 'angle' OR NEW.param2_auth_name != 'EPSG' OR NEW.param2_code != '8802' OR NEW.param2_name != 'Longitude of natural origin' OR NEW.param2_value IS NULL OR NEW.param2_uom_auth_name IS NULL OR NEW.param2_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param2_uom_auth_name AND code = NEW.param2_uom_code) != 'angle' OR NEW.param3_auth_name != 'EPSG' OR NEW.param3_code != '8806' OR NEW.param3_name != 'False easting' OR NEW.param3_value IS NULL OR NEW.param3_uom_auth_name IS NULL OR NEW.param3_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param3_uom_auth_name AND code = NEW.param3_uom_code) != 'length' OR NEW.param4_auth_name != 'EPSG' OR NEW.param4_code != '8807' OR NEW.param4_name != 'False northing' OR NEW.param4_value IS NULL OR NEW.param4_uom_auth_name IS NULL OR NEW.param4_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param4_uom_auth_name AND code = NEW.param4_uom_code) != 'length' OR NEW.param5_auth_name IS NOT NULL OR NEW.param5_code IS NOT NULL OR NEW.param5_name IS NOT NULL OR NEW.param5_value IS NOT NULL OR NEW.param5_uom_auth_name IS NOT NULL OR NEW.param5_uom_code IS NOT NULL OR NEW.param6_auth_name IS NOT NULL OR NEW.param6_code IS NOT NULL OR NEW.param6_name IS NOT NULL OR NEW.param6_value IS NOT NULL OR NEW.param6_uom_auth_name IS NOT NULL OR NEW.param6_uom_code IS NOT NULL OR NEW.param7_auth_name IS NOT NULL OR NEW.param7_code IS NOT NULL OR NEW.param7_name IS NOT NULL OR NEW.param7_value IS NOT NULL OR NEW.param7_uom_auth_name IS NOT NULL OR NEW.param7_uom_code IS NOT NULL);

END;
Loading

0 comments on commit 97c9547

Please sign in to comment.