Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

emergency services deduplication code #13

Open
wants to merge 12 commits into
base: flooding
Choose a base branch
from
71 changes: 71 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,77 @@ calculate_composite_score <-
return(data)
}

#' Takes in Open Street Map data returned from the osmdata library and applies following steps:
#' 1. Retains any multiploygons
#' 2. Retains any polygons which do not intersect with the multipolygons
#' 3. Retains any points which aren't in steps 1 or 2
#'
#' This process aims to deal with duplication of points relating to the same building.
#'
#' @param osm_data osmdata object osmdata in sf format (i.e. output from osmdata::osmdata_sf())

osm_data_reduce <-
function(osm_data) {

points <- osm_data$osm_points |>
select(osm_id_p = osm_id, name_p = name)

polygons <- osm_data$osm_polygons |>
select(osm_id_poly = osm_id, name_poly = name)

multipolygons <- osm_data$osm_multipolygons |>
select(osm_id_mpoly = osm_id, name_mpoly = name)

# Check if error on joins
tryCatch(
{
polygons |>
st_join(multipolygons)
},
error = function(e) {
message("There is a joining error, you may need to turn off s2 processing using sf::sf_use_s2(FALSE)")
}
)
Comment on lines +344 to +353
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good job on the defensive programming style 😃


# Retain polygons not intersecting with multipolygons
if (nrow(multipolygons) != 0) {
polyons_not_multipolygon_overlap <- polygons |>
st_join(multipolygons) |>
filter(is.na(osm_id_mpoly)) |>
distinct(osm_id_poly)

polygons_to_keep <- polygons |>
inner_join(polyons_not_multipolygon_overlap, by = "osm_id_poly") |>
rename(osm_id = osm_id_poly, name = name_poly)

polys_multipolys <- multipolygons |>
rename(osm_id = osm_id_mpoly, name = name_mpoly) |>
bind_rows(polygons_to_keep)
} else {
polys_multipolys <- polygons |>
rename(osm_id = osm_id_poly, name = name_poly)
}

# Keep points not already covered in a multipolygon or kept polygon
if (nrow(polygons) != 0) {
points_not_polygon_multipolygon_overlap <- points |>
st_join(polys_multipolys) |>
Comment on lines +388 to +389
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could some defensive programming for this join be introduced to help potential debugging?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in f030d5b.

filter(is.na(osm_id)) |>
distinct(osm_id_p)

points_to_keep <- points |>
inner_join(points_not_polygon_multipolygon_overlap, by = "osm_id_p")
} else {
points_to_keep <- points
}

combined <- points_to_keep |>
rename(osm_id = osm_id_p, name = name_p) |>
bind_rows(polys_multipolys)

return(combined)
}

# ---- Themes ----
theme_map <-
function(...) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ vulnerability_domain_scores <-
num_quantiles = 10
)


write_csv(
vulnerability_domain_scores,
"data/vulnerability/disasters-emergencies/flooding/2022-interim/england/flood-vulnerability-index.csv"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ library(readr)
library(osmdata)
library(sf)
library(geographr)
library(stringr)
library(ggplot2)
library(tidyr)

source("R/utils.R") # for osm_data_reduce()

flood_risk_oa <- read_rds("data/vulnerability/disasters-emergencies/flooding/flood-risk-output-areas.rds")

Expand All @@ -12,92 +17,194 @@ flood_risk_oa <- read_rds("data/vulnerability/disasters-emergencies/flooding/flo
# boundaries_oa <- read_sf("https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Output_Areas_December_2011_Boundaries_EW_BFE/FeatureServer/0/query?where=1%3D1&outFields=OA11CD&outSR=4326&f=json")
boundaries_oa <- read_sf("data/vulnerability/disasters-emergencies/flooding/Output_Areas__December_2011__Boundaries_EW_BFE.shp")

boundaries_oa <- st_transform(boundaries_oa, crs = 4326)
boundaries_oa_eng <- boundaries_oa |>
filter(str_detect(OA11CD, "^E"))

# ---- Get map points for each type of emergency service ----
# Bounding box for England
england_bb <- getbb("England")

fire <-
opq(england_bb, timeout = 1000) %>%
add_osm_feature("amenity", "fire_station") %>%
fire <-
opq(england_bb, timeout = 1000) |>
add_osm_feature("amenity", "fire_station") |>
osmdata_sf()

police <-
opq(england_bb, timeout = 1000) %>%
add_osm_feature("amenity", "police") %>%
police <-
opq(england_bb, timeout = 1000) |>
add_osm_feature("amenity", "police") |>
osmdata_sf()

ambo <-
opq(england_bb, timeout = 1000) %>%
add_osm_feature("emergency", "ambulance_station") %>%
ambo <-
opq(england_bb, timeout = 1000) |>
add_osm_feature("emergency", "ambulance_station") |>
osmdata_sf()

emergency_services <-
bind_rows(
fire$osm_points,
police$osm_points,
ambo$osm_points
) %>%
select(osm_id, name)

emergency_services <- st_transform(emergency_services, crs = st_crs(boundaries_oa))

# ---- Get Output Areas for each emergency service ----
st_crs(emergency_services) <- 4326 # Need to do this to make the next block of code work - see https://stackoverflow.com/a/62268361

emergency_services_oa <-
emergency_services %>%
st_join(boundaries_oa)

emergency_services_oa <-
emergency_services_oa %>%
st_drop_geometry() %>%
as_tibble() %>%
count(oa_code = OA11CD)

# emergency_services_oa %>%
# write_rds("data/vulnerability/disasters-emergencies/flooding/2022-interim/flooding-exposure-emergency-services-oa.rds")
#
# emergency_services_oa <-
# read_rds("data/vulnerability/disasters-emergencies/flooding/2022-interim/flooding-exposure-emergency-services-oa.rds")

# Which OAs are in flood risk areas?
emergency_services_flood_risk_oa <-
emergency_services_oa %>%
left_join(flood_risk_oa)
# Use function to reduce duplication of points relating to the same building ----
sf::sf_use_s2(FALSE) # turn off s2 as otherwise get joining errors about spherical geometries
fire_reduce <- osm_data_reduce(fire)
ambo_reduce <- osm_data_reduce(ambo)
police_reduce <- osm_data_reduce(police)

# Issue in dplyr where can't use id argument when binding sf objects?
# https://github.com/tidyverse/dplyr/issues/5780
services <- bind_rows(
fire_reduce,
ambo_reduce,
police_reduce
) |>
mutate(service = case_when(
osm_id %in% police_reduce$osm_id & osm_id %in% fire_reduce$osm_id & osm_id %in% ambo_reduce$osm_id ~ "ambo_fire_police",
osm_id %in% fire_reduce$osm_id & osm_id %in% ambo_reduce$osm_id ~ "ambo_fire",
osm_id %in% police_reduce$osm_id & osm_id %in% ambo_reduce$osm_id ~ "ambo_police",
osm_id %in% police_reduce$osm_id & osm_id %in% fire_reduce$osm_id ~ "fire_police",
osm_id %in% fire_reduce$osm_id ~ "fire",
osm_id %in% ambo_reduce$osm_id ~ "ambo",
osm_id %in% police_reduce$osm_id ~ "police",
TRUE ~ NA_character_
))

# Check service counts
services |>
st_drop_geometry() |>
group_by(service) |>
summarise(count = n())
# ambo: 575, ambo_fire: 12, fire: 1920, police: 2001

services |>
st_drop_geometry() |>
group_by(osm_id) |>
mutate(count_id = n()) |>
filter(count_id > 1)

# Check count of the geometry types
services |>
mutate(area = st_area(geometry)) |>
st_drop_geometry() |>
mutate(point_poly = if_else(area == units::as_units(0, value = "m^2"), "point", "poly")) |>
group_by(point_poly) |>
summarise(count = n())

# Keep only those within England boundary ----
# Not this will cause duplicated polygon/multipolygon rows where it crosses multiple OAs
services_eng <- services |>
st_transform(crs = st_crs(boundaries_oa_eng)) |>
st_join(boundaries_oa_eng, left = FALSE)

services_eng |>
st_drop_geometry() |>
group_by(service) |>
summarise(count = n_distinct(osm_id))
# ambo: 502, ambo_fire: 1, fire: 1520, police: 1462

# Checking those with no OA match ----------------
# Are in Ireland, Scot, Wales & France (picked up do to boundary box in line 21)
services |>
st_transform(crs = st_crs(boundaries_oa_eng)) |>
st_join(boundaries_oa_eng) |>
mutate(no_oa_match = if_else(is.na(OA11CD), T, F)) |>
ggplot() +
geom_sf(aes(colour = no_oa_match))

# Checking reasonableness of total numbers of each service -----
# Source: https://www.gov.uk/government/statistical-data-sets/fire-statistics-data-tables
# Fire: 'FIRE1403: Fire stations and appliances, by fire and rescue authority tab'
# In 2020 1,393 Fire Stations in England

# https://www.ukcrimestats.com/Police_Stations/
# 355 Police stations

# https://www.londonambulance.nhs.uk/talking-with-us/how-to-find-us/
# 70 Ambulance stations in London

# Check when multiple buildings of same type per OA
# Assumption: unlikely will be building for same service with same OA so assume is duplicate
services_eng_dups <- services_eng |>
group_by(OA11CD, service) |>
mutate(count_id = n()) |>
filter(count_id > 1) |>
arrange(desc(count_id), OA11CD, name) |>
st_transform(crs = 4326)

# Take top one where has name (if not null for all) and then the largest size
services_eng_dedup <- services_eng_dups |>
mutate(size = st_area(geometry)) |>
group_by(OA11CD, service) |>
arrange(OA11CD, name, desc(size)) |>
slice(1)

services_eng_dedup <- services_eng |>
filter(!osm_id %in% services_eng_dups$osm_id) |>
bind_rows(services_eng_dedup)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this method should also be bundled into a separate R function?

Are we safe in the assumption that it is unlikely that the same building will be used for the same service in the same OA?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My intuition is that this assumption is sound, and is probably something we want to generically apply to all of the outputs from the osm_data_reduce() function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah suppose difficult to validate the assumption and there may be exceptions where is more than 1 of a service in a single OA but thinking was since there are about 171k OAs in England and if take fire (which has more than police or ambulance) there are 1,400 stations of these so if well distributed in theory unlikely to be crossover (and OAs are about ~125 households and have a population of ~300). Suppose balance of this assumption, which may have some exceptions, with the duplicated cases within the OSM data which would need manually checking perhaps.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have put this deduplication part into a separate function osm_oa_deduping() in case want to use logic inosm_data_reduce() and osm_oa_deduping() separately. Has been updated in 558a30a.


services_eng_dedup |>
st_drop_geometry() |>
group_by(service) |>
summarise(count = n_distinct(osm_id))
# ambo: 463, ambo_fire: 1, fire: 1410, police: 1317


# Dealing with buildings which span across multiple OAs ------
buildings_multiple_oas <- services_eng_dedup |>
group_by(osm_id, service) |>
mutate(multiple_oas = n_distinct(OA11CD)) |>
filter(multiple_oas > 1)

# 'largest = TRUE' argument below joins the OA with the largest overlap with the buildings
# https://github.com/r-spatial/sf/issues/578
# Was unable to use this on whole dataset on line 91 due to an error
buildings_multiple_oas_largest_oa <- services |>
st_make_valid() |> # https://github.com/r-spatial/sf/issues/870
filter(osm_id %in% buildings_multiple_oas$osm_id) |>
st_transform(crs = st_crs(boundaries_oa_eng)) |>
st_join(boundaries_oa_eng, left = FALSE, largest = TRUE)

# Combine the outputs
services_final_output <- services_eng_dedup |>
filter(!osm_id %in% buildings_multiple_oas$osm_id) |>
bind_rows(buildings_multiple_oas_largest_oa)

# Check no duplicates
services_final_output |>
group_by(osm_id, service) |>
mutate(count_osm_id = n()) |>
filter(count_osm_id > 1)


# Which OAs are in flood risk areas? ----
emergency_services_flood_risk_oa <-
services_final_output |>
st_drop_geometry() |>
left_join(flood_risk_oa, by = c("OA11CD" = "oa_code")) |>
select(osm_id, service, oa_11_code = OA11CD, flood_risk)

# ---- Aggregate into Local Authorities ----
oa_lad <-
lookup_postcode_oa_lsoa_msoa_lad %>%
distinct(oa_code, lad_code)
oa_lad <-
lookup_postcode_oa_11_lsoa_11_msoa_11_lad_20 |>
distinct(oa_11_code, lad_20_code)

# Calculate proportion of emergency services at risk of flooding in each LA
emergency_services_flood_risk_lad <-
emergency_services_flood_risk_oa %>%
left_join(oa_lad) %>%

group_by(lad_code, flood_risk) %>%
summarise(n = sum(n)) %>%
ungroup() %>%

pivot_wider(names_from = flood_risk, values_from = n) %>%
mutate(proportion_emergency_services_flood_risk = `TRUE` / (`TRUE` + `FALSE`)) %>%

select(lad_code, proportion_emergency_services_flood_risk)
emergency_services_flood_risk_lad <-
emergency_services_flood_risk_oa |>
left_join(oa_lad, by = "oa_11_code") |>
group_by(lad_20_code, flood_risk) |>
summarise(n = n()) |>
ungroup() |>
pivot_wider(names_from = flood_risk, values_from = n) |>
mutate(proportion_emergency_services_flood_risk = `TRUE` / (`TRUE` + `FALSE`)) |>
select(lad_code = lad_20_code, proportion_emergency_services_flood_risk)

# ---- Assign the LA-level proportions to constituent LSOAs ----
emergency_services_flood_risk_lsoa <-
lookup_postcode_oa_lsoa_msoa_lad %>%
distinct(lsoa_code, lad_code) %>%
filter(str_detect(lsoa_code, "^E")) %>%

left_join(emergency_services_flood_risk_lad) %>%
select(-lad_code)
emergency_services_flood_risk_lsoa <-
lookup_postcode_oa_11_lsoa_11_msoa_11_lad_20 |>
distinct(lsoa_11_code, lad_20_code) |>
filter(str_detect(lsoa_11_code, "^E")) |>
left_join(emergency_services_flood_risk_lad, by = c("lad_20_code" = "lad_code")) |>
select(-lad_20_code) |>
rename(lsoa_code = lsoa_11_code)

# Save
emergency_services_flood_risk_lsoa %>%
emergency_services_flood_risk_lsoa |>
write_rds("data/vulnerability/disasters-emergencies/flooding/2022-interim/england/community-support/flooding-exposure-emergency-services-lsoa.rds")

emergency_services_flood_risk_lad %>%
emergency_services_flood_risk_lad |>
write_rds("data/vulnerability/disasters-emergencies/flooding/2022-interim/england/community-support/flooding-exposure-emergency-services-lad.rds")
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading