answering Q1:
covid = read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv")
state.of.interest = "California"
state_new_cases = covid %>%
filter(state == state.of.interest) %>%
group_by(county) %>%
mutate(new_daily_cases = cases - lag(cases)) %>%
ungroup()
most_cumulative_cases = state_new_cases %>%
group_by(county) %>%
summarise(cum_cases = sum(cases)) %>%
ungroup() %>%
slice_max(cum_cases, n = 5)
most_new_cases = state_new_cases %>%
filter(date == max(date)) %>%
slice_max(new_daily_cases, n = 5) %>%
select(county, new_daily_cases)
knitr::kable(most_cumulative_cases, caption = "5 Counties with the Most Cumulative Cases", col.names = c("County", "Cumulative Cases"), format.args = list(big.mark = ",")) %>%
kable_styling("striped", full_width = TRUE, font_size = 11)
5 Counties with the Most Cumulative Cases
County
|
Cumulative Cases
|
Los Angeles
|
18,408,002
|
Riverside
|
3,611,739
|
Orange
|
3,293,539
|
San Bernardino
|
2,972,488
|
San Diego
|
2,832,227
|
knitr::kable(most_new_cases, caption = "5 Counties with the Most New Cases", col.names = c("County", "New Daily Cases"), format.args = list(big.mark = ",")) %>%
kable_styling("striped", full_width = TRUE, font_size = 11)
5 Counties with the Most New Cases
County
|
New Daily Cases
|
Los Angeles
|
809
|
San Diego
|
265
|
Orange
|
185
|
Fresno
|
159
|
San Bernardino
|
156
|
PopulationEstimates <- read_excel("~/github/geog-176A-labs/data/PopulationEstimates.xls", skip = 2)
CA_county_pop = PopulationEstimates %>%
filter(State == "CA") %>%
select(state = State, county = Area_Name, fips = FIPStxt, pop_19 = POP_ESTIMATE_2019)
CA_covid = covid %>%
filter(state == "California") %>%
group_by(county, fips) %>%
summarise(CA_covid_cases = sum(cases)) %>%
ungroup()
CA_covid_new = covid %>%
filter(state == "California", date == max(date))
CA_per_cap = inner_join(CA_covid, CA_county_pop, by = "fips") %>%
select(county.x, CA_covid_cases, pop_19) %>%
mutate(most_per_cap = (CA_covid_cases / pop_19)) %>%
slice_max(most_per_cap, n = 5)
NEW_per_cap = left_join(CA_covid_new, CA_county_pop, by = "fips") %>%
summarise(county.x, cases, pop_19, new_per_cap = (cases / pop_19)) %>%
ungroup() %>%
slice_max(new_per_cap, n = 5)
knitr::kable(CA_per_cap, caption = "5 Counties with the Most Cumulative Cases Per Capita", col.names = c("County", "Cumulative Cases", "Population", "Cases per Capita"), format.args = list(big.mark = ",")) %>%
kable_styling("striped", full_width = TRUE, font_size = 11)
5 Counties with the Most Cumulative Cases Per Capita
County
|
Cumulative Cases
|
Population
|
Cases per Capita
|
Imperial
|
868,946
|
181,215
|
4.795111
|
Kings
|
423,990
|
152,940
|
2.772264
|
Tulare
|
908,771
|
466,195
|
1.949337
|
Kern
|
1,660,479
|
900,202
|
1.844563
|
Los Angeles
|
18,408,002
|
10,039,107
|
1.833629
|
knitr::kable(NEW_per_cap, caption = "5 Counties with the Most New Cases Per Capita", col.names = c("County", "New Cases", "Population", "New Cases per Capita"), format.args = list(big.mark = ",")) %>%
kable_styling("striped", full_width = TRUE, font_size = 11)
5 Counties with the Most New Cases Per Capita
County
|
New Cases
|
Population
|
New Cases per Capita
|
Imperial
|
11,274
|
181,215
|
0.0622134
|
Kings
|
7,097
|
152,940
|
0.0464038
|
Kern
|
30,735
|
900,202
|
0.0341423
|
Tulare
|
15,114
|
466,195
|
0.0324199
|
Merced
|
8,541
|
277,680
|
0.0307584
|
CA_covid_14 = covid %>%
filter(state == "California", date > max(date) - 14)
CA_14_per100 = CA_covid_14 %>%
group_by(date) %>%
summarize(CA_14_cases = sum(cases)) %>%
mutate(CA_14_100 = (CA_14_cases / 100000))
knitr::kable(CA_14_per100, caption = "Total New Cases in the Last 14 Days per 100,000 People", col.names = c("Date", "Daily Total New Cases", "Daily New Cases per 100,000"), format.args = list(big.mark = ",")) %>%
kable_styling("striped", full_width = TRUE, font_size = 11)
Total New Cases in the Last 14 Days per 100,000 People
Date
|
Daily Total New Cases
|
Daily New Cases per 100,000
|
2020-08-31
|
712,541
|
7.12541
|
2020-09-01
|
716,628
|
7.16628
|
2020-09-02
|
722,035
|
7.22035
|
2020-09-03
|
727,398
|
7.27398
|
2020-09-04
|
732,691
|
7.32691
|
2020-09-05
|
737,073
|
7.37073
|
2020-09-06
|
740,233
|
7.40233
|
2020-09-07
|
742,689
|
7.42689
|
2020-09-08
|
746,113
|
7.46113
|
2020-09-09
|
749,196
|
7.49196
|
2020-09-10
|
753,017
|
7.53017
|
2020-09-11
|
757,125
|
7.57125
|
2020-09-12
|
760,581
|
7.60581
|
2020-09-13
|
763,389
|
7.63389
|
Result:
total_cases = state_new_cases %>%
filter(date == max(date)) %>%
select(county, cases) %>%
arrange(-cases)
total_new_cases = state_new_cases %>%
group_by(date) %>%
summarize(new_total_cases = sum(cases)) %>%
ungroup() %>%
arrange(-new_total_cases)
safe_counties = inner_join(CA_covid, CA_county_pop, by = "fips") %>%
select(county.x, CA_covid_cases, pop_19) %>%
mutate(most_per_cap = CA_covid_cases / pop_19) %>%
arrange(most_per_cap)
knitr::kable(total_cases, caption = "Total Number of Cases", col.names = c("County", "Total Cases"), format.args = list(big.mark = ",")) %>%
kable_styling("striped", full_width = TRUE, font_size = 11)
Total Number of Cases
County
|
Total Cases
|
Los Angeles
|
253,985
|
Riverside
|
55,073
|
Orange
|
52,121
|
San Bernardino
|
50,699
|
San Diego
|
42,742
|
Kern
|
30,735
|
Fresno
|
27,076
|
Sacramento
|
20,183
|
Alameda
|
20,012
|
Santa Clara
|
19,549
|
San Joaquin
|
19,108
|
Stanislaus
|
15,976
|
Contra Costa
|
15,351
|
Tulare
|
15,114
|
Ventura
|
11,759
|
Imperial
|
11,274
|
San Francisco
|
10,305
|
San Mateo
|
9,077
|
Monterey
|
8,992
|
Santa Barbara
|
8,642
|
Merced
|
8,541
|
Kings
|
7,097
|
Sonoma
|
6,701
|
Marin
|
6,464
|
Solano
|
5,825
|
Madera
|
4,150
|
Placer
|
3,316
|
San Luis Obispo
|
3,222
|
Yolo
|
2,677
|
Butte
|
2,629
|
Santa Cruz
|
2,043
|
Sutter
|
1,564
|
Napa
|
1,557
|
San Benito
|
1,238
|
Yuba
|
1,057
|
El Dorado
|
1,034
|
Mendocino
|
789
|
Lassen
|
727
|
Shasta
|
612
|
Glenn
|
517
|
Colusa
|
489
|
Nevada
|
482
|
Tehama
|
471
|
Humboldt
|
447
|
Lake
|
393
|
Calaveras
|
298
|
Amador
|
277
|
Tuolumne
|
213
|
Inyo
|
177
|
Mono
|
163
|
Siskiyou
|
155
|
Del Norte
|
129
|
Mariposa
|
75
|
Plumas
|
42
|
Modoc
|
21
|
Trinity
|
16
|
Sierra
|
6
|
Alpine
|
2
|
knitr::kable(total_new_cases, caption = "Total Number of New Cases", col.names = c("Date", "Total New Cases"), format.args = list(big.mark = ",")) %>%
kable_styling("striped", full_width = TRUE, font_size = 11)
Total Number of New Cases
Date
|
Total New Cases
|
2020-09-13
|
763,389
|
2020-09-12
|
760,581
|
2020-09-11
|
757,125
|
2020-09-10
|
753,017
|
2020-09-09
|
749,196
|
2020-09-08
|
746,113
|
2020-09-07
|
742,689
|
2020-09-06
|
740,233
|
2020-09-05
|
737,073
|
2020-09-04
|
732,691
|
2020-09-03
|
727,398
|
2020-09-02
|
722,035
|
2020-09-01
|
716,628
|
2020-08-31
|
712,541
|
2020-08-30
|
706,589
|
2020-08-29
|
702,499
|
2020-08-28
|
698,389
|
2020-08-27
|
692,962
|
2020-08-26
|
687,612
|
2020-08-25
|
682,320
|
2020-08-24
|
676,236
|
2020-08-23
|
669,944
|
2020-08-22
|
665,325
|
2020-08-21
|
659,991
|
2020-08-20
|
653,401
|
2020-08-19
|
646,742
|
2020-08-18
|
640,499
|
2020-08-17
|
634,991
|
2020-08-16
|
628,508
|
2020-08-15
|
621,981
|
2020-08-14
|
613,243
|
2020-08-13
|
603,212
|
2020-08-12
|
595,097
|
2020-08-11
|
586,078
|
2020-08-10
|
574,267
|
2020-08-09
|
563,244
|
2020-08-08
|
556,158
|
2020-08-07
|
548,142
|
2020-08-06
|
541,013
|
2020-08-05
|
532,776
|
2020-08-04
|
527,258
|
2020-08-03
|
522,235
|
2020-08-02
|
515,937
|
2020-08-01
|
509,507
|
2020-07-31
|
502,273
|
2020-07-30
|
494,269
|
2020-07-29
|
486,039
|
2020-07-28
|
474,951
|
2020-07-27
|
467,103
|
2020-07-26
|
459,338
|
2020-07-25
|
453,327
|
2020-07-24
|
443,096
|
2020-07-23
|
433,175
|
2020-07-22
|
422,528
|
2020-07-21
|
410,366
|
2020-07-20
|
400,195
|
2020-07-19
|
391,460
|
2020-07-18
|
383,194
|
2020-07-17
|
374,922
|
2020-07-16
|
364,761
|
2020-07-15
|
355,497
|
2020-07-14
|
346,593
|
2020-07-13
|
336,206
|
2020-07-12
|
327,676
|
2020-07-11
|
320,030
|
2020-07-10
|
311,505
|
2020-07-09
|
303,516
|
2020-07-08
|
296,304
|
2020-07-07
|
287,766
|
2020-07-06
|
277,869
|
2020-07-05
|
271,587
|
2020-07-04
|
265,176
|
2020-07-03
|
256,298
|
2020-07-02
|
248,198
|
2020-07-01
|
239,764
|
2020-06-30
|
232,153
|
2020-06-29
|
223,995
|
2020-06-28
|
216,955
|
2020-06-27
|
211,453
|
2020-06-26
|
207,027
|
2020-06-25
|
201,413
|
2020-06-24
|
195,889
|
2020-06-23
|
191,039
|
2020-06-22
|
184,620
|
2020-06-21
|
178,807
|
2020-06-20
|
174,854
|
2020-06-19
|
170,843
|
2020-06-18
|
167,135
|
2020-06-17
|
163,381
|
2020-06-16
|
159,131
|
2020-06-15
|
155,662
|
2020-06-14
|
152,953
|
2020-06-13
|
150,434
|
2020-06-12
|
147,285
|
2020-06-11
|
143,709
|
2020-06-10
|
140,139
|
2020-06-09
|
137,245
|
2020-06-08
|
134,287
|
2020-06-07
|
131,997
|
2020-06-06
|
129,147
|
2020-06-05
|
126,510
|
2020-06-04
|
122,917
|
2020-06-03
|
120,407
|
2020-06-02
|
118,081
|
2020-06-01
|
115,643
|
2020-05-31
|
113,114
|
2020-05-30
|
110,100
|
2020-05-29
|
107,043
|
2020-05-28
|
104,071
|
2020-05-27
|
101,873
|
2020-05-26
|
99,924
|
2020-05-25
|
97,017
|
2020-05-24
|
94,743
|
2020-05-23
|
92,815
|
2020-05-22
|
90,801
|
2020-05-21
|
88,488
|
2020-05-20
|
86,125
|
2020-05-19
|
83,981
|
2020-05-18
|
81,943
|
2020-05-17
|
80,366
|
2020-05-16
|
78,933
|
2020-05-15
|
77,015
|
2020-05-14
|
74,947
|
2020-05-13
|
73,218
|
2020-05-12
|
71,150
|
2020-05-11
|
69,514
|
2020-05-10
|
68,051
|
2020-05-09
|
66,824
|
2020-05-08
|
64,616
|
2020-05-07
|
62,481
|
2020-05-06
|
60,787
|
2020-05-05
|
58,848
|
2020-05-04
|
56,333
|
2020-05-03
|
55,072
|
2020-05-02
|
53,753
|
2020-05-01
|
52,318
|
2020-04-30
|
50,470
|
2020-04-29
|
48,904
|
2020-04-28
|
46,570
|
2020-04-27
|
45,208
|
2020-04-26
|
43,691
|
2020-04-25
|
42,590
|
2020-04-24
|
41,368
|
2020-04-23
|
39,534
|
2020-04-22
|
37,573
|
2020-04-21
|
35,844
|
2020-04-20
|
33,862
|
2020-04-19
|
31,544
|
2020-04-18
|
30,829
|
2020-04-17
|
29,398
|
2020-04-16
|
28,142
|
2020-04-15
|
27,107
|
2020-04-14
|
25,758
|
2020-04-13
|
24,334
|
2020-04-12
|
23,323
|
2020-04-11
|
22,421
|
2020-04-10
|
21,366
|
2020-04-09
|
20,191
|
2020-04-08
|
19,043
|
2020-04-07
|
17,540
|
2020-04-06
|
16,361
|
2020-04-05
|
15,202
|
2020-04-04
|
13,796
|
2020-04-03
|
12,569
|
2020-04-02
|
11,190
|
2020-04-01
|
9,857
|
2020-03-31
|
8,583
|
2020-03-30
|
7,421
|
2020-03-29
|
6,321
|
2020-03-28
|
5,566
|
2020-03-27
|
4,915
|
2020-03-26
|
4,060
|
2020-03-25
|
3,183
|
2020-03-24
|
2,644
|
2020-03-23
|
2,240
|
2020-03-22
|
1,851
|
2020-03-21
|
1,544
|
2020-03-20
|
1,283
|
2020-03-19
|
1,067
|
2020-03-18
|
893
|
2020-03-17
|
732
|
2020-03-16
|
588
|
2020-03-15
|
478
|
2020-03-14
|
381
|
2020-03-13
|
320
|
2020-03-12
|
252
|
2020-03-11
|
202
|
2020-03-10
|
179
|
2020-03-09
|
172
|
2020-03-08
|
112
|
2020-03-07
|
100
|
2020-03-06
|
81
|
2020-03-05
|
67
|
2020-03-04
|
55
|
2020-03-03
|
45
|
2020-03-02
|
38
|
2020-03-01
|
33
|
2020-02-29
|
28
|
2020-02-28
|
27
|
2020-02-26
|
26
|
2020-02-27
|
26
|
2020-02-24
|
11
|
2020-02-25
|
11
|
2020-02-21
|
9
|
2020-02-22
|
9
|
2020-02-23
|
9
|
2020-02-20
|
8
|
2020-02-10
|
7
|
2020-02-11
|
7
|
2020-02-12
|
7
|
2020-02-13
|
7
|
2020-02-14
|
7
|
2020-02-15
|
7
|
2020-02-16
|
7
|
2020-02-17
|
7
|
2020-02-18
|
7
|
2020-02-19
|
7
|
2020-02-02
|
6
|
2020-02-03
|
6
|
2020-02-04
|
6
|
2020-02-05
|
6
|
2020-02-06
|
6
|
2020-02-07
|
6
|
2020-02-08
|
6
|
2020-02-09
|
6
|
2020-01-31
|
3
|
2020-02-01
|
3
|
2020-01-26
|
2
|
2020-01-27
|
2
|
2020-01-28
|
2
|
2020-01-29
|
2
|
2020-01-30
|
2
|
2020-01-25
|
1
|
knitr::kable(safe_counties, caption = "List of Safe Counties", col.names = c("County", "Total Cases", "Population", "Total Cases per Capita"), format.args = list(big.mark = ",")) %>%
kable_styling("striped", full_width = TRUE, font_size = 11)
List of Safe Counties
County
|
Total Cases
|
Population
|
Total Cases per Capita
|
Modoc
|
382
|
8,841
|
0.0432078
|
Trinity
|
679
|
12,285
|
0.0552707
|
Sierra
|
286
|
3,005
|
0.0951747
|
Plumas
|
2,393
|
18,807
|
0.1272399
|
Siskiyou
|
7,622
|
43,539
|
0.1750614
|
Shasta
|
33,700
|
180,080
|
0.1871390
|
Tuolumne
|
10,829
|
54,478
|
0.1987775
|
Humboldt
|
28,240
|
135,558
|
0.2083241
|
Alpine
|
280
|
1,129
|
0.2480071
|
Calaveras
|
12,989
|
45,905
|
0.2829539
|
Nevada
|
28,495
|
99,755
|
0.2856498
|
Lake
|
18,431
|
64,386
|
0.2862579
|
El Dorado
|
57,750
|
192,843
|
0.2994664
|
Mariposa
|
5,163
|
17,203
|
0.3001221
|
Amador
|
12,057
|
39,752
|
0.3033055
|
Tehama
|
20,465
|
65,084
|
0.3144398
|
Del Norte
|
9,065
|
27,812
|
0.3259384
|
Mendocino
|
34,021
|
86,749
|
0.3921774
|
Santa Cruz
|
108,695
|
273,213
|
0.3978398
|
Butte
|
95,465
|
219,186
|
0.4355433
|
Placer
|
180,424
|
398,329
|
0.4529522
|
Inyo
|
8,610
|
18,039
|
0.4772992
|
Santa Clara
|
1,132,263
|
1,927,852
|
0.5873184
|
Yuba
|
46,561
|
78,668
|
0.5918671
|
San Luis Obispo
|
175,100
|
283,111
|
0.6184853
|
Napa
|
85,313
|
137,744
|
0.6193591
|
Sacramento
|
1,002,040
|
1,552,058
|
0.6456202
|
Sonoma
|
322,561
|
494,336
|
0.6525137
|
Yolo
|
145,881
|
220,500
|
0.6615918
|
Contra Costa
|
816,257
|
1,153,526
|
0.7076191
|
Alameda
|
1,228,491
|
1,671,329
|
0.7350384
|
Solano
|
336,017
|
447,643
|
0.7506361
|
Sutter
|
73,365
|
96,971
|
0.7565664
|
San Mateo
|
616,970
|
766,573
|
0.8048418
|
Ventura
|
684,879
|
846,006
|
0.8095439
|
San Francisco
|
723,507
|
881,549
|
0.8207224
|
Mono
|
12,171
|
14,444
|
0.8426336
|
San Diego
|
2,832,227
|
3,338,330
|
0.8483964
|
Glenn
|
25,861
|
28,393
|
0.9108231
|
San Benito
|
62,376
|
62,808
|
0.9931219
|
Monterey
|
445,704
|
434,061
|
1.0268234
|
Orange
|
3,293,539
|
3,175,692
|
1.0371091
|
Colusa
|
24,493
|
21,547
|
1.1367244
|
Madera
|
191,712
|
157,327
|
1.2185575
|
Santa Barbara
|
597,872
|
446,499
|
1.3390220
|
San Joaquin
|
1,024,851
|
762,148
|
1.3446876
|
San Bernardino
|
2,972,488
|
2,180,085
|
1.3634734
|
Fresno
|
1,403,937
|
999,101
|
1.4052003
|
Riverside
|
3,611,739
|
2,470,546
|
1.4619193
|
Lassen
|
45,008
|
30,573
|
1.4721486
|
Merced
|
409,128
|
277,680
|
1.4733794
|
Stanislaus
|
815,215
|
550,660
|
1.4804326
|
Marin
|
420,750
|
258,826
|
1.6256095
|
Los Angeles
|
18,408,002
|
10,039,107
|
1.8336294
|
Kern
|
1,660,479
|
900,202
|
1.8445627
|
Tulare
|
908,771
|
466,195
|
1.9493367
|
Kings
|
423,990
|
152,940
|
2.7722636
|
Imperial
|
868,946
|
181,215
|
4.7951108
|
Answering Q2:
state_level = covid %>%
filter(state %in% c("New York", "California", "Louisiana", "Florida")) %>%
group_by(date, state) %>%
summarize(cases = sum(cases)) %>%
ungroup() %>%
group_by(state) %>%
mutate(new_cases = cases - lag(cases)) %>%
mutate(roll_7 = rollmean(new_cases, 7, na.pad = TRUE, align = "left")) %>%
filter(new_cases > 0)
ggplot(state_level, aes(x = date)) +
geom_col(aes(y = new_cases), col = "#b9a9d6", fill = "#dac8fa") +
geom_line(aes(y = roll_7), col = "#8a7ba6", size = .5) +
ggthemes::theme_clean() +
labs(title = paste("New Cases")) +
theme(plot.title = element_text(size = 11, face = 'bold'), plot.background = element_rect(fill = "white"), panel.background = element_rect(fill = "white")) +
facet_grid(~state, scales = "free_y")
CAS_PER_CAP = PopulationEstimates %>%
select(POP_ESTIMATE_2019, Area_Name) %>%
right_join(state_level, by = c("Area_Name" = "state")) %>%
mutate(cases_per_cap = (new_cases / POP_ESTIMATE_2019)) %>%
mutate(roll_7 = rollmean(cases_per_cap, 7, na.pad = TRUE, align = "left")) %>%
filter(cases_per_cap > 0)
ggplot(CAS_PER_CAP, aes(x = date)) +
geom_col(aes(y = cases_per_cap), col = "#b9a9d6", fill = "#dac8fa") +
geom_line(aes(y = roll_7), col = "#8a7ba6", size = .5) +
ggthemes::theme_clean() +
labs(title = paste("New Cases Per Capita")) +
theme(plot.title = element_text(size = 11, face = 'bold'), plot.background = element_rect(fill = "white"), panel.background = element_rect(fill = "white")) +
facet_grid(~Area_Name, scales = "free_y")