homework_2_file.Rmd

```{r}
# packages
library(tidyverse)
library(here)
library(sf)
library(janitor)
library(tmap)
```

```{r}
# read
report <- read_csv(here::here("homework_2", 
                   "Report_Card_Assessment_Data_2018-19_School_Year.csv"),
                              na= "NULL")

shape <- st_read(here::here("homework_2",                          "Washington_Counties_with_Natural_Shoreline___washsh_area",                           "Washington_Counties_with_Natural_Shoreline___washsh_area.shp"))
```


# Task

calculate the average percent of science students (in all) grades per county meeting the required standards

We have:

* Count met standard
* Count expected to test 
* test subject

Need to filter for science and mutate to work out % that have met 

```{r}
Datatypelist <- report %>% 
  summarise_all(class) %>%
  pivot_longer(everything(), 
               names_to="All_variables", 
               values_to="Variable_class")

Datatypelist

county_only <- report %>%
  clean_names() %>%
  select(county, organization_level, test_subject, count_met_standard, 
         count_of_students_expected_to_test, grade_level)%>%
  # the != means don't select this, but select everything else
  # i could also filter on where 
  filter(county != "Multiple")%>%
  filter(organization_level == "School")%>%
  filter(test_subject == "Science")%>%
  filter(grade_level=="All Grades")%>%
    group_by(county)%>%
  # we need to remove NAs - note we can use this function or do it within the summarise below with the argument na.rm=T they do the same thing!
  na.omit()%>%
  # na.rm = T means remove missing values from the data
  # could also use na.omit or filter greater than 0
  summarise(total_county_met_standard=sum(count_met_standard), 
            total_county_to_test=sum(count_of_students_expected_to_test))%>%
  mutate(percent_met_per_county=(total_county_met_standard/total_county_to_test)*100)

```

We now have the percent that me from the counties and and need to work out what the state average is....

```{r}

state_average <- county_only%>%
  summarise(state_average= mean(percent_met_per_county))%>%
  pull()

# or
state_average2 <- mean(county_only$percent_met_per_county)

#or
state_average3 <- county_only%>%
  select(percent_met_per_county)%>%
  summarise(state_average=mean(percent_met_per_county))%>%
  pull()

```

Ok, now we need to make a column that compares each county value to the state average and some text to say if it is above or below...

```{r}
county_only_above_below_state <- county_only %>%
  mutate(difference_to_state=(percent_met_per_county-state_average))%>%
  mutate(across(difference_to_state , round, 0))%>%
  mutate(above_below = case_when(difference_to_state<0 ~ "below",
                                   difference_to_state>0 ~ "above",
                                   difference_to_state==0 ~ "equal"
                                   ))

```

Join to our spatial data....

```{r}

joined_data <- shape %>% 
  clean_names(.) %>%
  left_join(., 
            county_only_above_below_state,
            by = c("countylabe" = "county"))
```


Let's map...

```{r}

#bbox_county <- joined_data %>%
#  st_bbox(.) %>% 
#  tmaptools::read_osm(., type = "osm", zoom = NULL)

#tm_shape(bbox_county)+
#  tm_rgb()+

  tm_shape(joined_data) + 
  tm_polygons("above_below", 
             # style="pretty",
              palette="Blues",
              midpoint=NA,
              #title="Number of years",
              alpha = 0.5) + 
  tm_compass(position = c("left", "bottom"),type = "arrow") + 
  tm_scale_bar(position = c("left", "bottom")) +
  tm_layout(title = "Counties above or below state avearge for science in all grades", 
            legend.position = c("right", "bottom"))


```