-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
emergency services deduplication code #13
base: flooding
Are you sure you want to change the base?
Changes from 10 commits
697dac2
2057991
a66a9a2
d043cad
959be85
4651cd3
740be8f
2b2cea6
7435a18
0ea78ed
f030d5b
558a30a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -322,6 +322,77 @@ calculate_composite_score <- | |
return(data) | ||
} | ||
|
||
#' Takes in Open Street Map data returned from the osmdata library and applies following steps: | ||
#' 1. Retains any multiploygons | ||
#' 2. Retains any polygons which do not intersect with the multipolygons | ||
#' 3. Retains any points which aren't in steps 1 or 2 | ||
#' | ||
#' This process aims to deal with duplication of points relating to the same building. | ||
#' | ||
#' @param osm_data osmdata object osmdata in sf format (i.e. output from osmdata::osmdata_sf()) | ||
|
||
osm_data_reduce <- | ||
function(osm_data) { | ||
|
||
points <- osm_data$osm_points |> | ||
select(osm_id_p = osm_id, name_p = name) | ||
|
||
polygons <- osm_data$osm_polygons |> | ||
select(osm_id_poly = osm_id, name_poly = name) | ||
|
||
multipolygons <- osm_data$osm_multipolygons |> | ||
select(osm_id_mpoly = osm_id, name_mpoly = name) | ||
|
||
# Check if error on joins | ||
tryCatch( | ||
{ | ||
polygons |> | ||
st_join(multipolygons) | ||
}, | ||
error = function(e) { | ||
message("There is a joining error, you may need to turn off s2 processing using sf::sf_use_s2(FALSE)") | ||
} | ||
) | ||
|
||
# Retain polygons not intersecting with multipolygons | ||
if (nrow(multipolygons) != 0) { | ||
polyons_not_multipolygon_overlap <- polygons |> | ||
st_join(multipolygons) |> | ||
filter(is.na(osm_id_mpoly)) |> | ||
distinct(osm_id_poly) | ||
|
||
polygons_to_keep <- polygons |> | ||
inner_join(polyons_not_multipolygon_overlap, by = "osm_id_poly") |> | ||
rename(osm_id = osm_id_poly, name = name_poly) | ||
|
||
polys_multipolys <- multipolygons |> | ||
rename(osm_id = osm_id_mpoly, name = name_mpoly) |> | ||
bind_rows(polygons_to_keep) | ||
} else { | ||
polys_multipolys <- polygons |> | ||
rename(osm_id = osm_id_poly, name = name_poly) | ||
} | ||
|
||
# Keep points not already covered in a multipolygon or kept polygon | ||
if (nrow(polygons) != 0) { | ||
points_not_polygon_multipolygon_overlap <- points |> | ||
st_join(polys_multipolys) |> | ||
Comment on lines
+388
to
+389
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could some defensive programming for this join be introduced to help potential debugging? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added in f030d5b. |
||
filter(is.na(osm_id)) |> | ||
distinct(osm_id_p) | ||
|
||
points_to_keep <- points |> | ||
inner_join(points_not_polygon_multipolygon_overlap, by = "osm_id_p") | ||
} else { | ||
points_to_keep <- points | ||
} | ||
|
||
combined <- points_to_keep |> | ||
rename(osm_id = osm_id_p, name = name_p) |> | ||
bind_rows(polys_multipolys) | ||
|
||
return(combined) | ||
} | ||
|
||
# ---- Themes ---- | ||
theme_map <- | ||
function(...) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,11 @@ library(readr) | |
library(osmdata) | ||
library(sf) | ||
library(geographr) | ||
library(stringr) | ||
library(ggplot2) | ||
library(tidyr) | ||
|
||
source("R/utils.R") # for osm_data_reduce() | ||
|
||
flood_risk_oa <- read_rds("data/vulnerability/disasters-emergencies/flooding/flood-risk-output-areas.rds") | ||
|
||
|
@@ -12,92 +17,194 @@ flood_risk_oa <- read_rds("data/vulnerability/disasters-emergencies/flooding/flo | |
# boundaries_oa <- read_sf("https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Output_Areas_December_2011_Boundaries_EW_BFE/FeatureServer/0/query?where=1%3D1&outFields=OA11CD&outSR=4326&f=json") | ||
boundaries_oa <- read_sf("data/vulnerability/disasters-emergencies/flooding/Output_Areas__December_2011__Boundaries_EW_BFE.shp") | ||
|
||
boundaries_oa <- st_transform(boundaries_oa, crs = 4326) | ||
boundaries_oa_eng <- boundaries_oa |> | ||
filter(str_detect(OA11CD, "^E")) | ||
|
||
# ---- Get map points for each type of emergency service ---- | ||
# Bounding box for England | ||
england_bb <- getbb("England") | ||
|
||
fire <- | ||
opq(england_bb, timeout = 1000) %>% | ||
add_osm_feature("amenity", "fire_station") %>% | ||
fire <- | ||
opq(england_bb, timeout = 1000) |> | ||
add_osm_feature("amenity", "fire_station") |> | ||
osmdata_sf() | ||
|
||
police <- | ||
opq(england_bb, timeout = 1000) %>% | ||
add_osm_feature("amenity", "police") %>% | ||
police <- | ||
opq(england_bb, timeout = 1000) |> | ||
add_osm_feature("amenity", "police") |> | ||
osmdata_sf() | ||
|
||
ambo <- | ||
opq(england_bb, timeout = 1000) %>% | ||
add_osm_feature("emergency", "ambulance_station") %>% | ||
ambo <- | ||
opq(england_bb, timeout = 1000) |> | ||
add_osm_feature("emergency", "ambulance_station") |> | ||
osmdata_sf() | ||
|
||
emergency_services <- | ||
bind_rows( | ||
fire$osm_points, | ||
police$osm_points, | ||
ambo$osm_points | ||
) %>% | ||
select(osm_id, name) | ||
|
||
emergency_services <- st_transform(emergency_services, crs = st_crs(boundaries_oa)) | ||
|
||
# ---- Get Output Areas for each emergency service ---- | ||
st_crs(emergency_services) <- 4326 # Need to do this to make the next block of code work - see https://stackoverflow.com/a/62268361 | ||
|
||
emergency_services_oa <- | ||
emergency_services %>% | ||
st_join(boundaries_oa) | ||
|
||
emergency_services_oa <- | ||
emergency_services_oa %>% | ||
st_drop_geometry() %>% | ||
as_tibble() %>% | ||
count(oa_code = OA11CD) | ||
|
||
# emergency_services_oa %>% | ||
# write_rds("data/vulnerability/disasters-emergencies/flooding/2022-interim/flooding-exposure-emergency-services-oa.rds") | ||
# | ||
# emergency_services_oa <- | ||
# read_rds("data/vulnerability/disasters-emergencies/flooding/2022-interim/flooding-exposure-emergency-services-oa.rds") | ||
|
||
# Which OAs are in flood risk areas? | ||
emergency_services_flood_risk_oa <- | ||
emergency_services_oa %>% | ||
left_join(flood_risk_oa) | ||
# Use function to reduce duplication of points relating to the same building ---- | ||
sf::sf_use_s2(FALSE) # turn off s2 as otherwise get joining errors about spherical geometries | ||
fire_reduce <- osm_data_reduce(fire) | ||
ambo_reduce <- osm_data_reduce(ambo) | ||
police_reduce <- osm_data_reduce(police) | ||
|
||
# Issue in dplyr where can't use id argument when binding sf objects? | ||
# https://github.com/tidyverse/dplyr/issues/5780 | ||
services <- bind_rows( | ||
fire_reduce, | ||
ambo_reduce, | ||
police_reduce | ||
) |> | ||
mutate(service = case_when( | ||
osm_id %in% police_reduce$osm_id & osm_id %in% fire_reduce$osm_id & osm_id %in% ambo_reduce$osm_id ~ "ambo_fire_police", | ||
osm_id %in% fire_reduce$osm_id & osm_id %in% ambo_reduce$osm_id ~ "ambo_fire", | ||
osm_id %in% police_reduce$osm_id & osm_id %in% ambo_reduce$osm_id ~ "ambo_police", | ||
osm_id %in% police_reduce$osm_id & osm_id %in% fire_reduce$osm_id ~ "fire_police", | ||
osm_id %in% fire_reduce$osm_id ~ "fire", | ||
osm_id %in% ambo_reduce$osm_id ~ "ambo", | ||
osm_id %in% police_reduce$osm_id ~ "police", | ||
TRUE ~ NA_character_ | ||
)) | ||
|
||
# Check service counts | ||
services |> | ||
st_drop_geometry() |> | ||
group_by(service) |> | ||
summarise(count = n()) | ||
# ambo: 575, ambo_fire: 12, fire: 1920, police: 2001 | ||
|
||
services |> | ||
st_drop_geometry() |> | ||
group_by(osm_id) |> | ||
mutate(count_id = n()) |> | ||
filter(count_id > 1) | ||
|
||
# Check count of the geometry types | ||
services |> | ||
mutate(area = st_area(geometry)) |> | ||
st_drop_geometry() |> | ||
mutate(point_poly = if_else(area == units::as_units(0, value = "m^2"), "point", "poly")) |> | ||
group_by(point_poly) |> | ||
summarise(count = n()) | ||
|
||
# Keep only those within England boundary ---- | ||
# Not this will cause duplicated polygon/multipolygon rows where it crosses multiple OAs | ||
services_eng <- services |> | ||
st_transform(crs = st_crs(boundaries_oa_eng)) |> | ||
st_join(boundaries_oa_eng, left = FALSE) | ||
|
||
services_eng |> | ||
st_drop_geometry() |> | ||
group_by(service) |> | ||
summarise(count = n_distinct(osm_id)) | ||
# ambo: 502, ambo_fire: 1, fire: 1520, police: 1462 | ||
|
||
# Checking those with no OA match ---------------- | ||
# Are in Ireland, Scot, Wales & France (picked up do to boundary box in line 21) | ||
services |> | ||
st_transform(crs = st_crs(boundaries_oa_eng)) |> | ||
st_join(boundaries_oa_eng) |> | ||
mutate(no_oa_match = if_else(is.na(OA11CD), T, F)) |> | ||
ggplot() + | ||
geom_sf(aes(colour = no_oa_match)) | ||
|
||
# Checking reasonableness of total numbers of each service ----- | ||
# Source: https://www.gov.uk/government/statistical-data-sets/fire-statistics-data-tables | ||
# Fire: 'FIRE1403: Fire stations and appliances, by fire and rescue authority tab' | ||
# In 2020 1,393 Fire Stations in England | ||
|
||
# https://www.ukcrimestats.com/Police_Stations/ | ||
# 355 Police stations | ||
|
||
# https://www.londonambulance.nhs.uk/talking-with-us/how-to-find-us/ | ||
# 70 Ambulance stations in London | ||
|
||
# Check when multiple buildings of same type per OA | ||
# Assumption: unlikely will be building for same service with same OA so assume is duplicate | ||
services_eng_dups <- services_eng |> | ||
group_by(OA11CD, service) |> | ||
mutate(count_id = n()) |> | ||
filter(count_id > 1) |> | ||
arrange(desc(count_id), OA11CD, name) |> | ||
st_transform(crs = 4326) | ||
|
||
# Take top one where has name (if not null for all) and then the largest size | ||
services_eng_dedup <- services_eng_dups |> | ||
mutate(size = st_area(geometry)) |> | ||
group_by(OA11CD, service) |> | ||
arrange(OA11CD, name, desc(size)) |> | ||
slice(1) | ||
|
||
services_eng_dedup <- services_eng |> | ||
filter(!osm_id %in% services_eng_dups$osm_id) |> | ||
bind_rows(services_eng_dedup) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if this method should also be bundled into a separate R function? Are we safe in the assumption that it is unlikely that the same building will be used for the same service in the same OA? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My intuition is that this assumption is sound, and is probably something we want to generically apply to all of the outputs from the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah suppose difficult to validate the assumption and there may be exceptions where is more than 1 of a service in a single OA but thinking was since there are about 171k OAs in England and if take fire (which has more than police or ambulance) there are 1,400 stations of these so if well distributed in theory unlikely to be crossover (and OAs are about ~125 households and have a population of ~300). Suppose balance of this assumption, which may have some exceptions, with the duplicated cases within the OSM data which would need manually checking perhaps. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have put this deduplication part into a separate function |
||
|
||
services_eng_dedup |> | ||
st_drop_geometry() |> | ||
group_by(service) |> | ||
summarise(count = n_distinct(osm_id)) | ||
# ambo: 463, ambo_fire: 1, fire: 1410, police: 1317 | ||
|
||
|
||
# Dealing with buildings which span across multiple OAs ------ | ||
buildings_multiple_oas <- services_eng_dedup |> | ||
group_by(osm_id, service) |> | ||
mutate(multiple_oas = n_distinct(OA11CD)) |> | ||
filter(multiple_oas > 1) | ||
|
||
# 'largest = TRUE' argument below joins the OA with the largest overlap with the buildings | ||
# https://github.com/r-spatial/sf/issues/578 | ||
# Was unable to use this on whole dataset on line 91 due to an error | ||
buildings_multiple_oas_largest_oa <- services |> | ||
st_make_valid() |> # https://github.com/r-spatial/sf/issues/870 | ||
filter(osm_id %in% buildings_multiple_oas$osm_id) |> | ||
st_transform(crs = st_crs(boundaries_oa_eng)) |> | ||
st_join(boundaries_oa_eng, left = FALSE, largest = TRUE) | ||
|
||
# Combine the outputs | ||
services_final_output <- services_eng_dedup |> | ||
filter(!osm_id %in% buildings_multiple_oas$osm_id) |> | ||
bind_rows(buildings_multiple_oas_largest_oa) | ||
|
||
# Check no duplicates | ||
services_final_output |> | ||
group_by(osm_id, service) |> | ||
mutate(count_osm_id = n()) |> | ||
filter(count_osm_id > 1) | ||
|
||
|
||
# Which OAs are in flood risk areas? ---- | ||
emergency_services_flood_risk_oa <- | ||
services_final_output |> | ||
st_drop_geometry() |> | ||
left_join(flood_risk_oa, by = c("OA11CD" = "oa_code")) |> | ||
select(osm_id, service, oa_11_code = OA11CD, flood_risk) | ||
|
||
# ---- Aggregate into Local Authorities ---- | ||
oa_lad <- | ||
lookup_postcode_oa_lsoa_msoa_lad %>% | ||
distinct(oa_code, lad_code) | ||
oa_lad <- | ||
lookup_postcode_oa_11_lsoa_11_msoa_11_lad_20 |> | ||
distinct(oa_11_code, lad_20_code) | ||
|
||
# Calculate proportion of emergency services at risk of flooding in each LA | ||
emergency_services_flood_risk_lad <- | ||
emergency_services_flood_risk_oa %>% | ||
left_join(oa_lad) %>% | ||
|
||
group_by(lad_code, flood_risk) %>% | ||
summarise(n = sum(n)) %>% | ||
ungroup() %>% | ||
|
||
pivot_wider(names_from = flood_risk, values_from = n) %>% | ||
mutate(proportion_emergency_services_flood_risk = `TRUE` / (`TRUE` + `FALSE`)) %>% | ||
|
||
select(lad_code, proportion_emergency_services_flood_risk) | ||
emergency_services_flood_risk_lad <- | ||
emergency_services_flood_risk_oa |> | ||
left_join(oa_lad, by = "oa_11_code") |> | ||
group_by(lad_20_code, flood_risk) |> | ||
summarise(n = n()) |> | ||
ungroup() |> | ||
pivot_wider(names_from = flood_risk, values_from = n) |> | ||
mutate(proportion_emergency_services_flood_risk = `TRUE` / (`TRUE` + `FALSE`)) |> | ||
select(lad_code = lad_20_code, proportion_emergency_services_flood_risk) | ||
|
||
# ---- Assign the LA-level proportions to constituent LSOAs ---- | ||
emergency_services_flood_risk_lsoa <- | ||
lookup_postcode_oa_lsoa_msoa_lad %>% | ||
distinct(lsoa_code, lad_code) %>% | ||
filter(str_detect(lsoa_code, "^E")) %>% | ||
|
||
left_join(emergency_services_flood_risk_lad) %>% | ||
select(-lad_code) | ||
emergency_services_flood_risk_lsoa <- | ||
lookup_postcode_oa_11_lsoa_11_msoa_11_lad_20 |> | ||
distinct(lsoa_11_code, lad_20_code) |> | ||
filter(str_detect(lsoa_11_code, "^E")) |> | ||
left_join(emergency_services_flood_risk_lad, by = c("lad_20_code" = "lad_code")) |> | ||
select(-lad_20_code) |> | ||
rename(lsoa_code = lsoa_11_code) | ||
|
||
# Save | ||
emergency_services_flood_risk_lsoa %>% | ||
emergency_services_flood_risk_lsoa |> | ||
write_rds("data/vulnerability/disasters-emergencies/flooding/2022-interim/england/community-support/flooding-exposure-emergency-services-lsoa.rds") | ||
|
||
emergency_services_flood_risk_lad %>% | ||
emergency_services_flood_risk_lad |> | ||
write_rds("data/vulnerability/disasters-emergencies/flooding/2022-interim/england/community-support/flooding-exposure-emergency-services-lad.rds") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good job on the defensive programming style 😃