-
Notifications
You must be signed in to change notification settings - Fork 2
/
homework_2_file.Rmd
130 lines (98 loc) · 3.64 KB
/
homework_2_file.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
```{r}
# packages
library(tidyverse)
library(here)
library(sf)
library(janitor)
library(tmap)
```
```{r}
# read
report <- read_csv(here::here("homework_2",
"Report_Card_Assessment_Data_2018-19_School_Year.csv"),
na= "NULL")
shape <- st_read(here::here("homework_2", "Washington_Counties_with_Natural_Shoreline___washsh_area", "Washington_Counties_with_Natural_Shoreline___washsh_area.shp"))
```
# Task
calculate the average percent of science students (in all) grades per county meeting the required standards
We have:
* Count met standard
* Count expected to test
* test subject
Need to filter for science and mutate to work out % that have met
```{r}
Datatypelist <- report %>%
summarise_all(class) %>%
pivot_longer(everything(),
names_to="All_variables",
values_to="Variable_class")
Datatypelist
county_only <- report %>%
clean_names() %>%
select(county, organization_level, test_subject, count_met_standard,
count_of_students_expected_to_test, grade_level)%>%
# the != means don't select this, but select everything else
# i could also filter on where
filter(county != "Multiple")%>%
filter(organization_level == "School")%>%
filter(test_subject == "Science")%>%
filter(grade_level=="All Grades")%>%
group_by(county)%>%
# we need to remove NAs - note we can use this function or do it within the summarise below with the argument na.rm=T they do the same thing!
na.omit()%>%
# na.rm = T means remove missing values from the data
# could also use na.omit or filter greater than 0
summarise(total_county_met_standard=sum(count_met_standard),
total_county_to_test=sum(count_of_students_expected_to_test))%>%
mutate(percent_met_per_county=(total_county_met_standard/total_county_to_test)*100)
```
We now have the percent that me from the counties and and need to work out what the state average is....
```{r}
state_average <- county_only%>%
summarise(state_average= mean(percent_met_per_county))%>%
pull()
# or
state_average2 <- mean(county_only$percent_met_per_county)
#or
state_average3 <- county_only%>%
select(percent_met_per_county)%>%
summarise(state_average=mean(percent_met_per_county))%>%
pull()
```
Ok, now we need to make a column that compares each county value to the state average and some text to say if it is above or below...
```{r}
county_only_above_below_state <- county_only %>%
mutate(difference_to_state=(percent_met_per_county-state_average))%>%
mutate(across(difference_to_state , round, 0))%>%
mutate(above_below = case_when(difference_to_state<0 ~ "below",
difference_to_state>0 ~ "above",
difference_to_state==0 ~ "equal"
))
```
Join to our spatial data....
```{r}
joined_data <- shape %>%
clean_names(.) %>%
left_join(.,
county_only_above_below_state,
by = c("countylabe" = "county"))
```
Let's map...
```{r}
#bbox_county <- joined_data %>%
# st_bbox(.) %>%
# tmaptools::read_osm(., type = "osm", zoom = NULL)
#tm_shape(bbox_county)+
# tm_rgb()+
tm_shape(joined_data) +
tm_polygons("above_below",
# style="pretty",
palette="Blues",
midpoint=NA,
#title="Number of years",
alpha = 0.5) +
tm_compass(position = c("left", "bottom"),type = "arrow") +
tm_scale_bar(position = c("left", "bottom")) +
tm_layout(title = "Counties above or below state avearge for science in all grades",
legend.position = c("right", "bottom"))
```