This is an exploratory data analysis for educational purposes. The analysis is carried out with data related to COVID-19 reported by various countries. The data is obtained from the Website: https://ourworldindata.org/coronavirus-source-data. The site’s data set is constantly updated. The results of the analysis changes according to the day the code is executed.
The data set is loaded directly from the Website to obtain information updated according to the execution day of the code.
covid.df <- read.csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")
Required packages are loaded.
library(ggplot2)
library(knitr)
library(reshape2)
library(kableExtra)
#library(dplyr)
library(tidyverse)
library(lubridate)
library(gmodels)
plotscaption <- "oscarcastrolopez.github.io"
str(covid.df)
## 'data.frame': 60098 obs. of 50 variables:
## $ iso_code : Factor w/ 192 levels "","AFG","AGO",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ continent : Factor w/ 7 levels "","Africa","Asia",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ location : Factor w/ 192 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ date : Factor w/ 336 levels "2020-01-01","2020-01-02",..: 23 24 25 26 27 28 29 30 31 32 ...
## $ total_cases : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_cases : num 0 0 0 0 0 0 0 0 0 0 ...
## $ new_cases_smoothed : num NA NA NA NA NA 0 0 0 0 0 ...
## $ total_deaths : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_deaths : num 0 0 0 0 0 0 0 0 0 0 ...
## $ new_deaths_smoothed : num NA NA NA NA NA 0 0 0 0 0 ...
## $ total_cases_per_million : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_cases_per_million : num 0 0 0 0 0 0 0 0 0 0 ...
## $ new_cases_smoothed_per_million : num NA NA NA NA NA 0 0 0 0 0 ...
## $ total_deaths_per_million : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_deaths_per_million : num 0 0 0 0 0 0 0 0 0 0 ...
## $ new_deaths_smoothed_per_million : num NA NA NA NA NA 0 0 0 0 0 ...
## $ reproduction_rate : num NA NA NA NA NA NA NA NA NA NA ...
## $ icu_patients : num NA NA NA NA NA NA NA NA NA NA ...
## $ icu_patients_per_million : num NA NA NA NA NA NA NA NA NA NA ...
## $ hosp_patients : num NA NA NA NA NA NA NA NA NA NA ...
## $ hosp_patients_per_million : num NA NA NA NA NA NA NA NA NA NA ...
## $ weekly_icu_admissions : num NA NA NA NA NA NA NA NA NA NA ...
## $ weekly_icu_admissions_per_million : num NA NA NA NA NA NA NA NA NA NA ...
## $ weekly_hosp_admissions : num NA NA NA NA NA NA NA NA NA NA ...
## $ weekly_hosp_admissions_per_million: num NA NA NA NA NA NA NA NA NA NA ...
## $ total_tests : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_tests : num NA NA NA NA NA NA NA NA NA NA ...
## $ total_tests_per_thousand : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_tests_per_thousand : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_tests_smoothed : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_tests_smoothed_per_thousand : num NA NA NA NA NA NA NA NA NA NA ...
## $ positive_rate : num NA NA NA NA NA NA NA NA NA NA ...
## $ tests_per_case : num NA NA NA NA NA NA NA NA NA NA ...
## $ tests_units : Factor w/ 5 levels "","people tested",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ stringency_index : num 0 0 0 0 0 0 0 0 0 0 ...
## $ population : num 38928341 38928341 38928341 38928341 38928341 ...
## $ population_density : num 54.4 54.4 54.4 54.4 54.4 ...
## $ median_age : num 18.6 18.6 18.6 18.6 18.6 18.6 18.6 18.6 18.6 18.6 ...
## $ aged_65_older : num 2.58 2.58 2.58 2.58 2.58 ...
## $ aged_70_older : num 1.34 1.34 1.34 1.34 1.34 ...
## $ gdp_per_capita : num 1804 1804 1804 1804 1804 ...
## $ extreme_poverty : num NA NA NA NA NA NA NA NA NA NA ...
## $ cardiovasc_death_rate : num 597 597 597 597 597 ...
## $ diabetes_prevalence : num 9.59 9.59 9.59 9.59 9.59 9.59 9.59 9.59 9.59 9.59 ...
## $ female_smokers : num NA NA NA NA NA NA NA NA NA NA ...
## $ male_smokers : num NA NA NA NA NA NA NA NA NA NA ...
## $ handwashing_facilities : num 37.7 37.7 37.7 37.7 37.7 ...
## $ hospital_beds_per_thousand : num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
## $ life_expectancy : num 64.8 64.8 64.8 64.8 64.8 ...
## $ human_development_index : num 0.498 0.498 0.498 0.498 0.498 0.498 0.498 0.498 0.498 0.498 ...
A detailed description of each variable of the data set can be found on the following Website: https://github.com/owid/covid-19-data/blob/master/public/data/owid-covid-codebook.csv.
The variable date is class factor is converted to class date.
covid.df$date <- as.Date(covid.df$date, format("%Y-%m-%d"))
levels(covid.df$location)
## [1] "Afghanistan" "Albania"
## [3] "Algeria" "Andorra"
## [5] "Angola" "Antigua and Barbuda"
## [7] "Argentina" "Armenia"
## [9] "Australia" "Austria"
## [11] "Azerbaijan" "Bahamas"
## [13] "Bahrain" "Bangladesh"
## [15] "Barbados" "Belarus"
## [17] "Belgium" "Belize"
## [19] "Benin" "Bhutan"
## [21] "Bolivia" "Bosnia and Herzegovina"
## [23] "Botswana" "Brazil"
## [25] "Brunei" "Bulgaria"
## [27] "Burkina Faso" "Burundi"
## [29] "Cambodia" "Cameroon"
## [31] "Canada" "Cape Verde"
## [33] "Central African Republic" "Chad"
## [35] "Chile" "China"
## [37] "Colombia" "Comoros"
## [39] "Congo" "Costa Rica"
## [41] "Cote d'Ivoire" "Croatia"
## [43] "Cuba" "Cyprus"
## [45] "Czech Republic" "Democratic Republic of Congo"
## [47] "Denmark" "Djibouti"
## [49] "Dominica" "Dominican Republic"
## [51] "Ecuador" "Egypt"
## [53] "El Salvador" "Equatorial Guinea"
## [55] "Eritrea" "Estonia"
## [57] "Ethiopia" "Fiji"
## [59] "Finland" "France"
## [61] "Gabon" "Gambia"
## [63] "Georgia" "Germany"
## [65] "Ghana" "Greece"
## [67] "Grenada" "Guatemala"
## [69] "Guinea" "Guinea-Bissau"
## [71] "Guyana" "Haiti"
## [73] "Honduras" "Hong Kong"
## [75] "Hungary" "Iceland"
## [77] "India" "Indonesia"
## [79] "International" "Iran"
## [81] "Iraq" "Ireland"
## [83] "Israel" "Italy"
## [85] "Jamaica" "Japan"
## [87] "Jordan" "Kazakhstan"
## [89] "Kenya" "Kosovo"
## [91] "Kuwait" "Kyrgyzstan"
## [93] "Laos" "Latvia"
## [95] "Lebanon" "Lesotho"
## [97] "Liberia" "Libya"
## [99] "Liechtenstein" "Lithuania"
## [101] "Luxembourg" "Macedonia"
## [103] "Madagascar" "Malawi"
## [105] "Malaysia" "Maldives"
## [107] "Mali" "Malta"
## [109] "Marshall Islands" "Mauritania"
## [111] "Mauritius" "Mexico"
## [113] "Moldova" "Monaco"
## [115] "Mongolia" "Montenegro"
## [117] "Morocco" "Mozambique"
## [119] "Myanmar" "Namibia"
## [121] "Nepal" "Netherlands"
## [123] "New Zealand" "Nicaragua"
## [125] "Niger" "Nigeria"
## [127] "Norway" "Oman"
## [129] "Pakistan" "Palestine"
## [131] "Panama" "Papua New Guinea"
## [133] "Paraguay" "Peru"
## [135] "Philippines" "Poland"
## [137] "Portugal" "Qatar"
## [139] "Romania" "Russia"
## [141] "Rwanda" "Saint Kitts and Nevis"
## [143] "Saint Lucia" "Saint Vincent and the Grenadines"
## [145] "San Marino" "Sao Tome and Principe"
## [147] "Saudi Arabia" "Senegal"
## [149] "Serbia" "Seychelles"
## [151] "Sierra Leone" "Singapore"
## [153] "Slovakia" "Slovenia"
## [155] "Solomon Islands" "Somalia"
## [157] "South Africa" "South Korea"
## [159] "South Sudan" "Spain"
## [161] "Sri Lanka" "Sudan"
## [163] "Suriname" "Swaziland"
## [165] "Sweden" "Switzerland"
## [167] "Syria" "Taiwan"
## [169] "Tajikistan" "Tanzania"
## [171] "Thailand" "Timor"
## [173] "Togo" "Trinidad and Tobago"
## [175] "Tunisia" "Turkey"
## [177] "Uganda" "Ukraine"
## [179] "United Arab Emirates" "United Kingdom"
## [181] "United States" "Uruguay"
## [183] "Uzbekistan" "Vanuatu"
## [185] "Vatican" "Venezuela"
## [187] "Vietnam" "Western Sahara"
## [189] "World" "Yemen"
## [191] "Zambia" "Zimbabwe"
levels(covid.df$continent)
## [1] "" "Africa" "Asia" "Europe"
## [5] "North America" "Oceania" "South America"
maxdays <- max(table(covid.df$location))
print(paste("Maximum reported days:",maxdays))
## [1] "Maximum reported days: 336"
names(table(covid.df$location)[table(covid.df$location)==maxdays])
## [1] "Mexico"
startdate <- min(covid.df$date)
enddate <- max(covid.df$date)
todayformatted <- format(Sys.Date(), "%A, %B %d, %Y")
todayformatted <- paste(toupper(substr(todayformatted, 1, 1)),
substr(todayformatted, 2, nchar(todayformatted)),
sep="")
print(paste("Starts:", format(startdate, "%A, %B, %d of %Y"),
"Ends:", format(enddate, "%A, %B, %d of %Y")))
## [1] "Starts: Wednesday, January, 01 of 2020 Ends: Tuesday, December, 01 of 2020"
From the original dataframe, only the rows where date is equal to the maximum date (or compilation date) are obtained. Using this filter only the updated totals or accumulated values of each country are obtained. Additionally, countries with less than a million inhabitants are discarded. Some countries with small population have very high statistics per million of inhabitants.
The rankings are created with the following variables:
# Get one row of each country with the updated totals
covid.total.df <- covid.df[covid.df$date== enddate,]
# Filter countries with less than 1 millón population
covid.total.df <- covid.total.df[covid.total.df$population >= 1000000,]
covid.total.df$location <- droplevels.factor(covid.total.df$location)
print(names(covid.total.df)[c(5,7,9,11)])
## [1] "total_cases" "new_cases_smoothed"
## [3] "new_deaths" "total_cases_per_million"
After discarding the countries that have less than a million inhabitants, the data set now has: 156 countries.
For each of the variables with total values, the 20 countries with the highest and lowest values are obtained. If México is not included in the top or bottom 20 it is added to the list indicating its ranking position. For each variable to be analyzed, the following is done in R:
# Data is ordered according to the total_cases column
ranking.total_cases <- covid.total.df[order(-covid.total.df$total_cases),]
# The row corresponding to World is removed
ranking.total_cases <- ranking.total_cases[ranking.total_cases$location != "World", ]
# A new column indicating the positionin the rank is added
ranking.total_cases$position <- 1:nrow(ranking.total_cases)
# Only columns of interest are kept
columnfilter <- c("position", "location", "total_cases")
bottom20.total_cases <- tail(ranking.total_cases[, columnfilter],20)
top20.total_cases <- head(ranking.total_cases[, columnfilter],20)
rm(ranking.total_cases)
rownames(top20.total_cases) <- c()
rownames(bottom20.total_cases) <- c()
mexrow <- which(top20.total_cases$location=='Mexico')
top20.total_cases$total_cases_formated <- formatC(top20.total_cases$total_cases,
format="f", big.mark=",", digits=0)
bottom20.total_cases$total_cases_formated <- formatC(bottom20.total_cases$total_cases,
format="f", big.mark=",", digits=0)
tablecolnames <- c("Position", "Country", "Total cases")
kable(top20.total_cases[,c(1,2,4)], format = "html", align = "clr",
col.names=tablecolnames, caption="Countries with most cases") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "float_left") %>%
row_spec(mexrow, bold = T, color = "black", background = "yellow")
kable(bottom20.total_cases[,c(1,2,4)], format = "html", align = "clr",
col.names=tablecolnames, caption="Countries with least cases") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "right")
Position | Country | Total cases |
---|---|---|
1 | United States | 13,721,304 |
2 | India | 9,462,809 |
3 | Brazil | 6,386,787 |
4 | Russia | 2,302,062 |
5 | France | 2,275,429 |
6 | Spain | 1,656,444 |
7 | United Kingdom | 1,647,230 |
8 | Italy | 1,620,901 |
9 | Argentina | 1,432,570 |
10 | Colombia | 1,324,792 |
11 | Mexico | 1,122,362 |
12 | Germany | 1,094,678 |
13 | Poland | 999,924 |
14 | Iran | 975,951 |
15 | Peru | 963,605 |
16 | South Africa | 792,299 |
17 | Ukraine | 765,117 |
18 | Turkey | 668,957 |
19 | Belgium | 579,212 |
20 | Iraq | 554,767 |
Position | Country | Total cases |
---|---|---|
137 | Guinea-Bissau | 2,441 |
138 | Sierra Leone | 2,413 |
139 | Yemen | 2,197 |
140 | Lesotho | 2,137 |
141 | New Zealand | 2,060 |
142 | Chad | 1,700 |
143 | Liberia | 1,595 |
144 | Niger | 1,586 |
145 | Vietnam | 1,351 |
146 | Mongolia | 812 |
147 | Burundi | 689 |
148 | Taiwan | 679 |
149 | Papua New Guinea | 669 |
150 | Eritrea | 577 |
151 | Tanzania | 509 |
152 | Mauritius | 505 |
153 | Cambodia | 329 |
154 | Laos | 39 |
155 | Timor | 30 |
156 | NA | NA |
ggplot(data=top20.total_cases, aes(x=reorder(paste(position, location),total_cases),
y=total_cases, fill=location))+
geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
ylab("Total cases of COVID-19") +
geom_text(aes(y=max(total_cases)+170000,
label=total_cases_formated,
fontface="bold"),
color="black")+
labs(title="Top 20 countries with most cases of COVID-19",
subtitle = todayformatted,
caption = plotscaption)+
scale_y_continuous(breaks=c(100000, 500000, 750000, 1000000, 2000000, 3000000, 4000000, 5000000),
label=c("100k", "500k", "750k", "1m", "2m", "3m", "4m", "5m"))+
coord_flip() +
xlab("Countries") +
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"))
This ranking is done with American countries only. The data is obtained by filtering the rows where the continent variable is either “North America” or “South America”.
ranking.total_cases.america <- covid.total.df[covid.total.df$continent %in%
c("North America", "South America") ,]
ranking.total_cases.america <- ranking.total_cases.america[order(-ranking.total_cases.america$total_cases),]
ranking.total_cases.america$position <- 1:nrow(ranking.total_cases.america)
bottom20.total_cases.america <- tail(ranking.total_cases.america[, c("position", "location", "total_cases")],5)
top20.total_cases.america <- head(ranking.total_cases.america[, c("position", "location", "total_cases")],20)
rownames(bottom20.total_cases.america) <- c()
rownames(top20.total_cases.america) <- c()
mexrow <- which(ranking.total_cases.america$location=='Mexico')
rm(ranking.total_cases.america)
top20.total_cases.america$total_cases_formated <- formatC(top20.total_cases.america$total_cases,
format="f", big.mark=",", digits=0)
bottom20.total_cases.america$total_cases_formated <- formatC(bottom20.total_cases.america$total_cases,
format="f", big.mark=",", digits=0)
tablecolnames <- c("Position", "Country", "Total cases")
kable(top20.total_cases.america[,c(1,2,4)], format = "html", align = "clr",
col.names=tablecolnames, caption="Countries of the American Continent with most cases") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center") %>%
row_spec(mexrow, bold = T, color = "black", background = "yellow")
Position | Country | Total cases |
---|---|---|
1 | United States | 13,721,304 |
2 | Brazil | 6,386,787 |
3 | Argentina | 1,432,570 |
4 | Colombia | 1,324,792 |
5 | Mexico | 1,122,362 |
6 | Peru | 963,605 |
7 | Chile | 552,864 |
8 | Canada | 387,052 |
9 | Ecuador | 193,673 |
10 | Panama | 167,311 |
11 | Bolivia | 144,810 |
12 | Dominican Republic | 144,302 |
13 | Costa Rica | 140,172 |
14 | Guatemala | 122,774 |
15 | Honduras | 108,253 |
16 | Venezuela | 102,621 |
17 | Paraguay | 83,479 |
18 | El Salvador | 39,130 |
19 | Jamaica | 10,810 |
20 | Haiti | 9,296 |
ggplot(data=top20.total_cases.america, aes(x=reorder(paste(position, location),total_cases),
y=total_cases, fill=location))+
geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
ylab("Total cases of COVID-19") +
geom_text(aes(y=max(total_cases)+170000,
label=format(total_cases, big.mark=","),
fontface="bold"),
color="black")+
labs(title="Top 20 of American countries with most cases of COVID-19",
subtitle = todayformatted,
caption = plotscaption)+
scale_y_continuous(breaks=c(100000, 250000, 500000, 750000, 1000000, 2000000, 3000000, 4000000, 5000000),
label=c("100k", "250k", "500k", "750k", "1m", "2m", "3m", "4m", "5m"))+
coord_flip() +
xlab("Countries") +
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"))
ranking.total_cases_per_million <- covid.total.df[order(-covid.total.df$total_cases_per_million),]
ranking.total_cases_per_million$position <- 1:nrow(ranking.total_cases_per_million)
columnfilter <- c("position", "location", "total_cases_per_million")
ranking.total_cases_per_million <- ranking.total_cases_per_million[, columnfilter]
ranking.total_cases_per_million <- na.omit(ranking.total_cases_per_million)
mexico.total_cases_per_million <- ranking.total_cases_per_million[
ranking.total_cases_per_million$location == "Mexico", ]
bottom20.total_cases_per_million <- tail(ranking.total_cases_per_million,20)
top20.total_cases_per_million <- head(ranking.total_cases_per_million,20)
mexico.total_cases_per_million <- mexico.total_cases_per_million[, columnfilter]
rm(ranking.total_cases_per_million)
rownames(top20.total_cases_per_million) <- c()
rownames(bottom20.total_cases_per_million) <- c()
rownames(mexico.total_cases_per_million) <- c()
top20.total_cases_per_million <- rbind(top20.total_cases_per_million, mexico.total_cases_per_million)
mexrow <- which(top20.total_cases_per_million$location=='Mexico')
top20.total_cases_per_million$total_cases_per_million_formated <- formatC(
top20.total_cases_per_million$total_cases_per_million, format="f", big.mark=",", digits=2)
bottom20.total_cases_per_million$total_cases_per_million_formated<- formatC(
bottom20.total_cases_per_million$total_cases_per_million, format="f", big.mark=",", digits=2)
tablecolnames <- c("Position", "country", "Cases pmi")
kable(top20.total_cases_per_million[,c(1,2,4)], format = "html", align = "clr",
col.names=tablecolnames, caption="Countries with most cases per million inhabitants (pmi)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "float_left") %>%
row_spec(mexrow, bold = T, color = "black", background = "yellow")
kable(bottom20.total_cases_per_million[,c(1,2,4)], format = "html", align = "clr",
col.names=tablecolnames, caption="Countries with least cases per million inhabitants (pmi)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "right")
Position | country | Cases pmi |
---|---|---|
1 | Bahrain | 51,209.37 |
2 | Belgium | 49,976.81 |
3 | Czech Republic | 49,348.67 |
4 | Qatar | 48,246.48 |
5 | Armenia | 45,884.67 |
6 | United States | 41,453.76 |
7 | Israel | 39,064.80 |
8 | Panama | 38,776.36 |
9 | Switzerland | 38,230.92 |
10 | Slovenia | 37,103.19 |
11 | Spain | 35,428.33 |
12 | Georgia | 34,930.28 |
13 | France | 34,859.91 |
14 | Kuwait | 33,483.17 |
15 | Croatia | 31,993.53 |
16 | Austria | 31,698.46 |
17 | Argentina | 31,696.99 |
18 | Netherlands | 31,288.76 |
19 | Macedonia | 30,212.92 |
20 | Brazil | 30,047.07 |
70 | Mexico | 8,705.02 |
Position | country | Cases pmi |
---|---|---|
137 | South Sudan | 277.92 |
138 | Benin | 248.70 |
139 | Mongolia | 247.69 |
140 | Mali | 235.15 |
141 | Eritrea | 162.70 |
142 | Democratic Republic of Congo | 143.58 |
143 | Burkina Faso | 140.22 |
144 | Chad | 103.50 |
145 | Papua New Guinea | 74.77 |
146 | Yemen | 73.66 |
147 | Niger | 65.52 |
148 | China | 64.61 |
149 | Burundi | 57.94 |
150 | Thailand | 57.68 |
151 | Taiwan | 28.51 |
152 | Timor | 22.75 |
153 | Cambodia | 19.68 |
154 | Vietnam | 13.88 |
155 | Tanzania | 8.52 |
156 | Laos | 5.36 |
ggplot(data=top20.total_cases_per_million,
aes(x=reorder(paste(position, location),total_cases_per_million),
y=total_cases_per_million, fill=location))+
geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
ylab("Total cases of COVID-19 per million inhabitants") +
geom_text(aes(y=max(total_cases_per_million)+1250,
label=total_cases_per_million_formated,
fontface="bold"),
color="black")+
labs(title="Top 20 of countries with most cases of COVID-19 per million inhabitants (+ México)",
subtitle = todayformatted,
caption = plotscaption)+
coord_flip() +
xlab("Countries") +
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"))
ranking.total_deaths <- covid.total.df[order(-covid.total.df$total_deaths),]
ranking.total_deaths <- ranking.total_deaths[ranking.total_deaths$location != "World", ]
ranking.total_deaths$position <- 1:nrow(ranking.total_deaths)
columnfilter <- c("position", "location", "total_deaths")
ranking.total_deaths <- ranking.total_deaths[, columnfilter]
ranking.total_deaths <- na.omit(ranking.total_deaths)
bottom20.total_deaths <- tail(ranking.total_deaths,20)
top20.total_deaths <- head(ranking.total_deaths,20)
mexrow <- which(ranking.total_deaths$location=='Mexico')
rm(ranking.total_deaths)
rownames(top20.total_deaths) <- c()
rownames(bottom20.total_deaths) <- c()
top20.total_deaths$total_deaths_formated <- formatC(top20.total_deaths$total_deaths,
format="f", big.mark=",", digits=0)
bottom20.total_deaths$total_deaths_formated <- formatC(bottom20.total_deaths$total_deaths,
format="f", big.mark=",", digits=0)
tablecolnames <- c("Position", "Country", "Total deaths")
kable(top20.total_deaths[,c(1,2,4)], format = "html", align = "clr",
col.names=tablecolnames, caption="Countries with most deaths from COVID-19") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "float_left") %>%
row_spec(mexrow, bold = T, color = "black", background = "yellow")
kable(bottom20.total_deaths[,c(1,2,4)], format = "html", align = "clr",
col.names=tablecolnames, caption="Countries with least deaths from COVID-19") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "right")
Position | Country | Total deaths |
---|---|---|
1 | United States | 270,642 |
2 | Brazil | 173,817 |
3 | India | 137,621 |
4 | Mexico | 106,765 |
5 | United Kingdom | 59,148 |
6 | Italy | 56,361 |
7 | France | 52,821 |
8 | Iran | 48,628 |
9 | Spain | 45,511 |
10 | Russia | 40,050 |
11 | Argentina | 38,928 |
12 | Colombia | 36,934 |
13 | Peru | 35,966 |
14 | South Africa | 21,644 |
15 | Poland | 17,599 |
16 | Germany | 17,177 |
17 | Indonesia | 17,081 |
18 | Belgium | 16,786 |
19 | Chile | 15,430 |
20 | Turkey | 13,936 |
Position | Country | Total deaths |
---|---|---|
131 | Sierra Leone | 74 |
132 | Burkina Faso | 68 |
133 | Togo | 64 |
134 | Central African Republic | 63 |
135 | South Sudan | 61 |
136 | Gabon | 60 |
137 | Thailand | 60 |
138 | Rwanda | 49 |
139 | Guinea-Bissau | 44 |
140 | Lesotho | 44 |
141 | Benin | 43 |
142 | Vietnam | 35 |
143 | Botswana | 34 |
144 | Singapore | 29 |
145 | New Zealand | 25 |
146 | Tanzania | 21 |
147 | Mauritius | 10 |
148 | Papua New Guinea | 7 |
149 | Taiwan | 7 |
150 | Burundi | 1 |
ggplot(data=top20.total_deaths,
aes(x=reorder(paste(position, location),total_deaths),
y=total_deaths, fill=location))+
geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
ylab("Total deaths from COVID-19") +
geom_text(aes(y=max(total_deaths)+7500,
label=total_deaths_formated,
fontface="bold"),
color="black")+
labs(title="Top 20 of countries with total deaths from COVID-19",
subtitle = todayformatted,
caption = plotscaption)+
scale_y_continuous(breaks=c(10000, 25000, 50000, 75000, 100000, 150000, 170000),
label=c("10k", "25k", "50k", "75k", "100k", "150k", "170k"))+
coord_flip() +
xlab("Countries") +
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"))
ranking.total_deaths_per_million <- covid.total.df[order(-covid.total.df$total_deaths_per_million),]
ranking.total_deaths_per_million$position <- 1:nrow(ranking.total_deaths_per_million)
columnfilter <- c("position", "location", "total_deaths_per_million")
ranking.total_deaths_per_million <- ranking.total_deaths_per_million[, columnfilter]
ranking.total_deaths_per_million <- na.omit(ranking.total_deaths_per_million)
bottom20.total_deaths_per_million <- tail(ranking.total_deaths_per_million, 20)
top20.total_deaths_per_million <- head(ranking.total_deaths_per_million, 20)
mexrow <- which(ranking.total_deaths_per_million$location=='Mexico')
rm(ranking.total_deaths_per_million)
rownames(top20.total_deaths_per_million) <- c()
rownames(bottom20.total_deaths_per_million) <- c()
top20.total_deaths_per_million$total_deaths_per_million_formated <- formatC(
top20.total_deaths_per_million$total_deaths_per_million, format="f", big.mark=",", digits=2)
bottom20.total_deaths_per_million$total_deaths_per_million_formated <- formatC(
bottom20.total_deaths_per_million$total_deaths_per_million, format="f", big.mark=",", digits=2)
tablecolnames <- c("Position", "Country", "Deaths pmi")
kable(top20.total_deaths_per_million[,c(1,2,4)], format = "html", align = "clr",
col.names=tablecolnames,
caption="Countries with most deaths from COVID-19 per million inhabitants (pmi)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "float_left") %>%
row_spec(mexrow, bold = T, color = "black", background = "yellow")
kable(bottom20.total_deaths_per_million[,c(1,2,4)], format = "html", align = "clr",
col.names=tablecolnames,
caption="Countries with least deaths from COVID-19 per million inhabitants (pmi)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "right")
Position | Country | Deaths pmi |
---|---|---|
1 | Belgium | 1,448.37 |
2 | Peru | 1,090.81 |
3 | Spain | 973.40 |
4 | Italy | 932.17 |
5 | United Kingdom | 871.28 |
6 | Argentina | 861.32 |
7 | Macedonia | 860.14 |
8 | Bosnia and Herzegovina | 831.20 |
9 | Mexico | 828.07 |
10 | Brazil | 817.73 |
11 | United States | 817.64 |
12 | France | 809.23 |
13 | Chile | 807.17 |
14 | Czech Republic | 785.04 |
15 | Bolivia | 767.84 |
16 | Ecuador | 765.23 |
17 | Armenia | 740.07 |
18 | Colombia | 725.86 |
19 | Panama | 718.00 |
20 | Slovenia | 716.71 |
Position | Country | Deaths pmi |
---|---|---|
132 | Nigeria | 5.71 |
133 | Sri Lanka | 5.70 |
134 | South Sudan | 5.45 |
135 | New Zealand | 5.18 |
136 | Cote d’Ivoire | 5.00 |
137 | Singapore | 4.96 |
138 | Uganda | 4.48 |
139 | Mozambique | 4.19 |
140 | Rwanda | 3.78 |
141 | Democratic Republic of Congo | 3.74 |
142 | Benin | 3.55 |
143 | China | 3.29 |
144 | Burkina Faso | 3.25 |
145 | Niger | 3.10 |
146 | Thailand | 0.86 |
147 | Papua New Guinea | 0.78 |
148 | Vietnam | 0.36 |
149 | Tanzania | 0.35 |
150 | Taiwan | 0.29 |
151 | Burundi | 0.08 |
ggplot(data=top20.total_deaths_per_million,
aes(x=reorder(paste(position, location),total_deaths_per_million),
y=total_deaths_per_million, fill=location))+
geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
ylab("Deaths from COVID-19 per million inhabitants") +
geom_text(aes(y=max(total_deaths_per_million)+25,
label=total_deaths_per_million_formated,
fontface="bold"),
color="black")+
labs(title="Top 20 countries by deaths from COVID-19 per million inhabitants",
subtitle = todayformatted,
caption = plotscaption)+
coord_flip() +
xlab("Countries") +
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"))
The following functions are created to facilitate building line graphs.
##Line plot of new_cases with date breaks by month
plot.trend.new_cases.month <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
tmp.df <- covid.df[covid.df$location %in% countries,]
tmp.df <- tmp.df[tmp.df$date >= startdate,]
tmp.df <- tmp.df[tmp.df$date <= enddate,]
ggplot(data=tmp.df , aes(x=date, y=new_cases, group=location, colour=location)) +
geom_line(size=1) +
scale_color_discrete(name = "Countries")+
scale_x_date(date_breaks = "month", date_labels = "%B")+
ggtitle(graphtitle,
subtitle = graphsubtitle)+
labs(caption = plotscaption)+
ylab("New cases of COVID-19")+
xlab("Date")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"),
legend.position = "bottom",
legend.title = element_text(size = 14),
legend.text = element_text(size = 13),
legend.key.width = unit(1.5,"cm"))+
guides(colour = guide_legend(override.aes = list(size=2)))
}
##Line plot of new_cases with date breaks by short month and days
plot.trend.new_cases.monthday <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
tmp.df <- covid.df[covid.df$location %in% countries,]
tmp.df <- tmp.df[tmp.df$date >= startdate,]
tmp.df <- tmp.df[tmp.df$date <= enddate,]
ggplot(data=tmp.df , aes(x=date, y=new_cases, group=location, colour=location)) +
geom_line(size=1) +
scale_color_discrete(name = "Countries")+
scale_x_date(date_breaks = "day", date_labels = "%b %d")+
ggtitle(graphtitle,
subtitle = graphsubtitle)+
labs(caption = plotscaption)+
ylab("New cases of COVID-19")+
xlab("Date")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"),
legend.position = "bottom",
legend.title = element_text(size = 14),
legend.text = element_text(size = 13),
legend.key.width = unit(1.5,"cm"))+
guides(colour = guide_legend(override.aes = list(size=2)))
}
Line graphs with date ranges. The starting date varies, whereas the end date is always Wednesday, December 02, 2020.
top5.total_cases <- c(head(as.character(top20.total_cases$location),5), "Mexico")
sublabel.tmp <- paste("Range:", format(startdate, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.new_cases.month(startdate, enddate, top5.total_cases, "Trends of new cases of COVID-19", sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-5
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.new_cases.month(startdate.tmp, enddate, top5.total_cases, "Trends of new cases of COVID-19", sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-3
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.new_cases.month(startdate.tmp, enddate, top5.total_cases, "Trends of new cases of COVID-19", sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-2
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.new_cases.month(startdate.tmp, enddate, top5.total_cases, "Trends of new cases of COVID-19", sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.new_cases.month(startdate.tmp, enddate, top5.total_cases, "Trends of new cases of COVID-19", sublabel.tmp)
sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.new_cases.monthday(enddate-21, enddate, top5.total_cases, "Trends of new cases of COVID-19", sublabel.tmp)
sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.new_cases.monthday(enddate-14, enddate, top5.total_cases, "Trends of new cases of COVID-19", sublabel.tmp)
Functions to build the line graphs.
##Line plot of new_deaths with date breaks by month
plot.trend.new_deaths.month <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
tmp.df <- covid.df[covid.df$location %in% countries,]
tmp.df <- tmp.df[tmp.df$date >= startdate,]
tmp.df <- tmp.df[tmp.df$date <= enddate,]
ggplot(data=tmp.df , aes(x=date, y=new_deaths, group=location, colour=location)) +
geom_line(size=1) +
scale_color_discrete(name = "Countries")+
scale_x_date(date_breaks = "month", date_labels = "%B")+
ggtitle(graphtitle,
subtitle = graphsubtitle)+
labs(caption = plotscaption)+
ylab("Daily deaths from COVID-19")+
xlab("Date")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"),
legend.position = "bottom",
legend.title = element_text(size = 14),
legend.text = element_text(size = 13),
legend.key.width = unit(1.5,"cm"))+
guides(colour = guide_legend(override.aes = list(size=2)))
}
##Line plot of new_deaths with date breaks by short month and day
plot.trend.new_deaths.monthdays <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
tmp.df <- covid.df[covid.df$location %in% countries,]
tmp.df <- tmp.df[tmp.df$date >= startdate,]
tmp.df <- tmp.df[tmp.df$date <= enddate,]
ggplot(data=tmp.df , aes(x=date, y=new_deaths, group=location, colour=location)) +
geom_line(size=1) +
scale_color_discrete(name = "Countries")+
scale_x_date(date_breaks = "day", date_labels = "%b %d")+
ggtitle(graphtitle,
subtitle = graphsubtitle)+
labs(caption = plotscaption)+
ylab("Daily deaths from COVID-19")+
xlab("Date")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"),
legend.position = "bottom",
legend.title = element_text(size = 14),
legend.text = element_text(size = 13),
legend.key.width = unit(1.5,"cm"))+
guides(colour = guide_legend(override.aes = list(size=2)))
}
Line graphs with date ranges. The starting date varies, whereas the end date is always Wednesday, December 02, 2020.
top5.total_cases <- c(head(as.character(top20.total_cases$location),5), "Mexico")
sublabel.tmp <- paste("Date range:", format(startdate, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
title.tmp <- "Trends of daily deaths from COVID-19"
plot.trend.new_deaths.month(startdate, enddate, top5.total_cases, title.tmp, sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-5
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.new_deaths.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-3
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.new_deaths.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-2
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.new_deaths.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.new_deaths.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)
sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.new_deaths.monthdays(enddate-21, enddate, top5.total_cases, title.tmp, sublabel.tmp)
sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.new_deaths.monthdays(enddate-14, enddate, top5.total_cases, title.tmp, sublabel.tmp)
For the following graphs, the variables total_cases and total_deaths are used.
Line graphs with date ranges. The starting date varies, whereas the end date is always Wednesday, December 02, 2020.
Functions to build the line graphs.
##Line plot of new_total_cases with date breaks by month
plot.trend.total_cases.month <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
tmp.df <- covid.df[covid.df$location %in% countries,]
tmp.df <- tmp.df[tmp.df$date >= startdate,]
tmp.df <- tmp.df[tmp.df$date <= enddate,]
ggplot(data=tmp.df , aes(x=date, y=total_cases, group=location, colour=location)) +
geom_line(size=1) +
scale_color_discrete(name = "Countries")+
scale_x_date(date_breaks = "month", date_labels = "%B")+
ggtitle(graphtitle,
subtitle = graphsubtitle)+
labs(caption = plotscaption)+
ylab("Cumulative cases of COVID-19")+
xlab("Date")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"),
legend.position = "bottom",
legend.title = element_text(size = 14),
legend.text = element_text(size = 13),
legend.key.width = unit(1.5,"cm"))+
guides(colour = guide_legend(override.aes = list(size=2)))
}
##Line plot of new_deaths with date breaks by short month and day
plot.trend.total_cases.monthdays <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
tmp.df <- covid.df[covid.df$location %in% countries,]
tmp.df <- tmp.df[tmp.df$date >= startdate,]
tmp.df <- tmp.df[tmp.df$date <= enddate,]
ggplot(data=tmp.df , aes(x=date, y=total_cases, group=location, colour=location)) +
geom_line(size=1) +
scale_color_discrete(name = "Países")+
scale_x_date(date_breaks = "day", date_labels = "%b %d")+
ggtitle(graphtitle,
subtitle = graphsubtitle)+
labs(caption = plotscaption)+
ylab("Cumulative cases of COVID-19")+
xlab("Date")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"),
legend.position = "bottom",
legend.title = element_text(size = 14),
legend.text = element_text(size = 13),
legend.key.width = unit(1.5,"cm"))+
guides(colour = guide_legend(override.aes = list(size=2)))
}
sublabel.tmp <- paste("Date range:", format(startdate, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
title.tmp <- "Cumulative cases of COVID-19"
plot.trend.total_cases.month(startdate, enddate, top5.total_cases, title.tmp, sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-5
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-3
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-2
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)
The amount of cases of United States and Brazil do not allow to appreciate in detail the curves of the rest of the countries. Those two countries are removed from the graph, the countries in positions 6 and 7 of the top 20 of countries with most cases are added.
mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
total_cases.others <- as.character(top20.total_cases$location)[3:7]
plot.trend.total_cases.month(startdate.tmp, enddate, total_cases.others, title.tmp, sublabel.tmp)
sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_cases.monthdays(enddate-21, enddate, total_cases.others, title.tmp, sublabel.tmp)
sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_cases.monthdays(enddate-14, enddate, total_cases.others, title.tmp, sublabel.tmp)
total_cases.others <- as.character(top20.total_cases$location)[3:7]
total_cases.others <- total_cases.others[c(-1,-2)]
countrieslabels <- paste(total_cases.others, collapse = ', ')
sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
title.tmp <- paste(title.tmp, "of", countrieslabels)
plot.trend.total_cases.monthdays(enddate-14, enddate, total_cases.others, title.tmp, sublabel.tmp)
Line graphs with date ranges. The starting date varies, whereas the end date is always Wednesday, December 02, 2020.
Functions to build the line graphs.
# Generate total_deaths line plot with month breaks
plot.trend.total_deaths.month <- function(startdate, enddate, countries, graphtitle, subgraphtitle){
tmp.df <- covid.df[covid.df$location %in% countries,]
tmp.df <- tmp.df[tmp.df$date >= startdate,]
tmp.df <- tmp.df[tmp.df$date <= enddate,]
ggplot(data=tmp.df , aes(x=date, y=total_deaths, group=location, colour=location)) +
geom_line(size=1) +
scale_color_discrete(name = "Countries")+
scale_x_date(date_breaks = "month", date_labels = "%B")+
ggtitle(graphtitle,
subtitle = subgraphtitle)+
labs(caption = plotscaption)+
ylab("Cumulative deaths from COVID-19")+
xlab("Date")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"),
legend.position = "bottom",
legend.title = element_text(size = 14),
legend.text = element_text(size = 13),
legend.key.width = unit(1.5,"cm"))+
guides(colour = guide_legend(override.aes = list(size=2)))
}
# Generate total_deaths line plot with month and day breaks
plot.trend.total_deaths.monthdays <- function(startdate, enddate, countries, graphtitle, subgraphtitle){
tmp.df <- covid.df[covid.df$location %in% countries,]
tmp.df <- tmp.df[tmp.df$date >= startdate,]
tmp.df <- tmp.df[tmp.df$date <= enddate,]
ggplot(data=tmp.df , aes(x=date, y=total_deaths, group=location, colour=location)) +
geom_line(size=1) +
scale_color_discrete(name = "Countries")+
scale_x_date(date_breaks = "day", date_labels = "%b %d")+
ggtitle(graphtitle,
subtitle = subgraphtitle)+
labs(caption = plotscaption)+
ylab("Cumulative deaths from COVID-19")+
xlab("Date")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"),
legend.position = "bottom",
legend.title = element_text(size = 14),
legend.text = element_text(size = 13),
legend.key.width = unit(1.5,"cm"))+
guides(colour = guide_legend(override.aes = list(size=2)))
}
top5.total_deaths <- head(as.character(top20.total_deaths$location),5)
sublabel.tmp <- paste("Date range:", format(startdate, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
title.tmp <- "Cumulative deaths from COVID-19"
plot.trend.total_deaths.month(startdate, enddate, top5.total_deaths, title.tmp, sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-5
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-3
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-2
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)
mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)
The amount of deaths of United States and Brazil do not allow to appreciate in detail the curves of the rest of the countries. Those two countries are removed from the graph, the countries in positions 6 and 7 of the top 20 of countries with the most total deaths are added.
mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
total_deaths.others <- as.character(top20.total_deaths$location)[3:7]
plot.trend.total_deaths.month(startdate.tmp, enddate, total_deaths.others, title.tmp, sublabel.tmp)
sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-21, enddate, total_deaths.others, title.tmp, sublabel.tmp)
The acumulative curves of France, Italy, and the United Kindgdom are almost flat, the number of deaths from COVID-19 has stagnated. One possible reason is that the pandemic started first in Europe and the number of recently reported deaths is zero or very low. The curves for Mexico and India continues to rise. The countries of France, Italy, and the United Kingdom are discarded from the data. Other countries are added where it seems that the number of deaths from COVID-19 is still increasing. The graph is generated with the accumulated death curves of the last 3 weeks of India, Iran, Mexico, Peru and Russia:
total_deaths.others <- as.character(top20.total_deaths$location)[c(3,5,8,9,10)]
countrieslabels <- paste(total_deaths.others, collapse = ', ')
title.tmp <- paste(title.tmp, "de", countrieslabels)
sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-21, enddate, total_deaths.others, title.tmp, sublabel.tmp)
total_deaths.others <- as.character(top20.total_deaths$location)[c(3,5,8,9,10)]
countrieslabels <- paste(total_deaths.others, collapse = ', ')
title.tmp <- paste(title.tmp, "de", countrieslabels)
sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-14, enddate, total_deaths.others, title.tmp, sublabel.tmp)
covid.mexico <- covid.df[covid.df$location=='Mexico',]
covid.mexico <- covid.mexico[covid.mexico$date > "2020-04-01",]
covid.mexico.new <- covid.mexico[,c("date", "new_cases", "new_deaths")]
covid.mexico.new.lf <-melt(covid.mexico.new, id.vars = c("date"))
ggplot(data=covid.mexico.new.lf , aes(x=date, y=value, group=variable, colour=variable)) +
geom_line(size=1) +
scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
scale_x_date(date_breaks = "week", date_labels = "%b %d")+
ggtitle("COVID-19 Daily cases and deaths in México")+
labs(caption = plotscaption)+
ylab("")+
xlab("Date")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"),
legend.position = "bottom",
legend.title = element_text(size = 14),
legend.text = element_text(size = 13),
legend.key.width = unit(1.5,"cm"))+
guides(colour = guide_legend(override.aes = list(size=2)))
covid.mexico.new.lastmonth <- covid.mexico.new[covid.mexico.new$date >= enddate-30, ]
covid.mexico.new.lastmonth.lf <- melt(covid.mexico.new.lastmonth, id.vars = c("date"))
ggplot(data=covid.mexico.new.lastmonth.lf , aes(x=date, y=value, group=variable, colour=variable)) +
geom_line(size=1) +
scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
scale_x_date(date_breaks = "week", date_labels = "%b %d")+
ggtitle("COVID-19 Daily cases and deaths in México - Last month")+
labs(caption = plotscaption)+
ylab("")+
xlab("Date")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"),
legend.position = "bottom",
legend.title = element_text(size = 14),
legend.text = element_text(size = 13),
legend.key.width = unit(1.5,"cm"))+
guides(colour = guide_legend(override.aes = list(size=2)))
covid.mexico.total <- covid.mexico[,c("date", "total_cases", "total_deaths")]
covid.mexico.total.lf <-melt(covid.mexico.total, id.vars = c("date"))
ggplot(data=covid.mexico.total.lf , aes(x=date, y=value, group=variable, colour=variable)) +
geom_line(size=1) +
scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
scale_x_date(date_breaks = "week", date_labels = "%b %d")+
scale_y_continuous(breaks=c(50000, 100000, 200000, 300000, 400000, 500000, 750000),
label=c("50k","100k", "200k", "300k", "400k", "500k", "750k"))+
ggtitle("COVID-19 Cumulative cases and deaths in México")+
labs(caption = plotscaption)+
ylab("")+
xlab("Date")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"),
legend.position = "bottom",
legend.title = element_text(size = 14),
legend.text = element_text(size = 13),
legend.key.width = unit(1.5,"cm"))+
guides(colour = guide_legend(override.aes = list(size=2)))
covid.mexico.total.lasttmonth <- covid.mexico.total[covid.mexico.total$date >= enddate-30, ]
covid.mexico.total.lasttmonth.lf <- melt(covid.mexico.total.lasttmonth, id.vars = c("date"))
ggplot(data=covid.mexico.total.lasttmonth.lf , aes(x=date, y=value, group=variable, colour=variable)) +
geom_line(size=1) +
scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
scale_x_date(date_breaks = "week", date_labels = "%b %d")+
scale_y_continuous(breaks=c(50000, 100000, 200000, 300000, 400000, 500000, 750000),
label=c("50k","100k", "200k", "300k", "400k", "500k", "750k"))+
ggtitle("COVID-19 Cumulative cases and deaths in México - Last month")+
labs(caption = plotscaption)+
ylab("")+
xlab("Fecha")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"),
legend.position = "bottom",
legend.title = element_text(size = 14),
legend.text = element_text(size = 13),
legend.key.width = unit(1.5,"cm"))+
guides(colour = guide_legend(override.aes = list(size=2)))
covid.mexico <- covid.df[covid.df$location=='Mexico',]
week.agg <- covid.mexico %>%
group_by(week = week(date)) %>%
summarise(mean_new_cases = mean(new_cases),
n = n(),
loCI_new_cases = ci(new_cases)[2],
hiCI_new_cases = ci(new_cases)[3],
mean_new_deaths = mean(new_deaths),
loCI_new_deaths = ci(new_deaths)[2],
hiCI_new_deaths = ci(new_deaths)[3],
total_new_cases = sum(new_cases),
total_new_deaths = sum(new_deaths))
# Weeks with zero mean new cases are deleted
week.agg <- week.agg[which(week.agg$mean_new_cases > 0),]
# Finding the date of the first week to be ploted
firstweek <- min(week.agg$week)
rowindex <- match(firstweek, week(covid.mexico[,"date"]))
firstweek.date <- format(covid.mexico[rowindex, ]$date, "%d/%m/%Y")
rm(firstweek, rowindex)
# Finding the date of the last week to be ploted
lastweek <- max(week.agg$week)
rowindex <- match(lastweek, week(covid.mexico[,"date"]))
lastweek.date <- format(covid.mexico[rowindex, ]$date, "%d/%m/%Y")
rm(lastweek, rowindex)
sublabel <- paste("Date range:", firstweek.date, "-", lastweek.date)
ggplot(data=week.agg, aes(x=as.character(week),y=mean_new_cases))+
geom_bar(stat = 'identity', aes(fill = mean_new_cases)) +
geom_errorbar(aes(ymin=loCI_new_cases, ymax=hiCI_new_cases), width=.2,
position=position_dodge(.9)) +
xlab("Week")+
ylab("Mean cases per week ± CI 95%")+
labs(title="Mean cases per week of COVID-19 in México",
subtitle = sublabel,
caption = plotscaption)+
scale_fill_gradient(name="Cases", low = "green", high = "red")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"))
ggplot(data=week.agg, aes(x=as.character(week),y=mean_new_deaths))+
geom_bar(stat = 'identity', aes(fill = mean_new_deaths)) +
geom_errorbar(aes(ymin=loCI_new_deaths, ymax=hiCI_new_deaths), width=.2,
position=position_dodge(.9)) +
xlab("Week")+
ylab("Mean deaths per week ± CI 95%")+
labs(title="Mean deaths per week from COVID-19 in México",
subtitle = sublabel,
caption = plotscaption)+
scale_fill_gradient(name="Deaths", low = "green", high = "red")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"))
ggplot(data=week.agg, aes(x=as.character(week),y=total_new_cases))+
geom_bar(stat = 'identity', aes(fill = total_new_cases)) +
xlab("Week")+
ylab("Total cases per week")+
labs(title="Total COVID-19 cases per week in México",
subtitle = sublabel,
caption = plotscaption)+
scale_fill_gradient(name="Total cases", low = "green", high = "red")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"))
ggplot(data=week.agg, aes(x=as.character(week),y=total_new_deaths))+
geom_bar(stat = 'identity', aes(fill = total_new_deaths)) +
xlab("Week")+
ylab("Total deaths per week")+
labs(title="Total COVID-19 deaths per week in México",
subtitle=sublabel,
caption = plotscaption)+
scale_fill_gradient(name="Total deaths", low = "green", high = "red")+
theme_bw()+
theme(title = element_text(size=14, face="bold", colour = "black"),
axis.text.y = element_text(size=11, face="bold", colour = "black"),
axis.text.x = element_text(size=11, face="bold", colour = "black"))