Description

This is an exploratory data analysis for educational purposes. The analysis is carried out with data related to COVID-19 reported by various countries. The data is obtained from the Website: https://ourworldindata.org/coronavirus-source-data. The site’s data set is constantly updated. The results of the analysis changes according to the day the code is executed.

Data reading

The data set is loaded directly from the Website to obtain information updated according to the execution day of the code.

covid.df <- read.csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")

Required packages are loaded.

library(ggplot2)
library(knitr)
library(reshape2)
library(kableExtra)
#library(dplyr)
library(tidyverse)
library(lubridate)
library(gmodels)
plotscaption <- "oscarcastrolopez.github.io"

Display of the dataframe structure with its variables and their type.

str(covid.df)
## 'data.frame':    60098 obs. of  50 variables:
##  $ iso_code                          : Factor w/ 192 levels "","AFG","AGO",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ continent                         : Factor w/ 7 levels "","Africa","Asia",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ location                          : Factor w/ 192 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ date                              : Factor w/ 336 levels "2020-01-01","2020-01-02",..: 23 24 25 26 27 28 29 30 31 32 ...
##  $ total_cases                       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_cases                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ new_cases_smoothed                : num  NA NA NA NA NA 0 0 0 0 0 ...
##  $ total_deaths                      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_deaths                        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ new_deaths_smoothed               : num  NA NA NA NA NA 0 0 0 0 0 ...
##  $ total_cases_per_million           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_cases_per_million             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ new_cases_smoothed_per_million    : num  NA NA NA NA NA 0 0 0 0 0 ...
##  $ total_deaths_per_million          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_deaths_per_million            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ new_deaths_smoothed_per_million   : num  NA NA NA NA NA 0 0 0 0 0 ...
##  $ reproduction_rate                 : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ icu_patients                      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ icu_patients_per_million          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ hosp_patients                     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ hosp_patients_per_million         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ weekly_icu_admissions             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ weekly_icu_admissions_per_million : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ weekly_hosp_admissions            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ weekly_hosp_admissions_per_million: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ total_tests                       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_tests                         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ total_tests_per_thousand          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_tests_per_thousand            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_tests_smoothed                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_tests_smoothed_per_thousand   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ positive_rate                     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ tests_per_case                    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ tests_units                       : Factor w/ 5 levels "","people tested",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ stringency_index                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ population                        : num  38928341 38928341 38928341 38928341 38928341 ...
##  $ population_density                : num  54.4 54.4 54.4 54.4 54.4 ...
##  $ median_age                        : num  18.6 18.6 18.6 18.6 18.6 18.6 18.6 18.6 18.6 18.6 ...
##  $ aged_65_older                     : num  2.58 2.58 2.58 2.58 2.58 ...
##  $ aged_70_older                     : num  1.34 1.34 1.34 1.34 1.34 ...
##  $ gdp_per_capita                    : num  1804 1804 1804 1804 1804 ...
##  $ extreme_poverty                   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ cardiovasc_death_rate             : num  597 597 597 597 597 ...
##  $ diabetes_prevalence               : num  9.59 9.59 9.59 9.59 9.59 9.59 9.59 9.59 9.59 9.59 ...
##  $ female_smokers                    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ male_smokers                      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ handwashing_facilities            : num  37.7 37.7 37.7 37.7 37.7 ...
##  $ hospital_beds_per_thousand        : num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
##  $ life_expectancy                   : num  64.8 64.8 64.8 64.8 64.8 ...
##  $ human_development_index           : num  0.498 0.498 0.498 0.498 0.498 0.498 0.498 0.498 0.498 0.498 ...

A detailed description of each variable of the data set can be found on the following Website: https://github.com/owid/covid-19-data/blob/master/public/data/owid-covid-codebook.csv.

The variable date is class factor is converted to class date.

covid.df$date <- as.Date(covid.df$date, format("%Y-%m-%d"))

List of countries of the data set.

levels(covid.df$location)
##   [1] "Afghanistan"                      "Albania"                         
##   [3] "Algeria"                          "Andorra"                         
##   [5] "Angola"                           "Antigua and Barbuda"             
##   [7] "Argentina"                        "Armenia"                         
##   [9] "Australia"                        "Austria"                         
##  [11] "Azerbaijan"                       "Bahamas"                         
##  [13] "Bahrain"                          "Bangladesh"                      
##  [15] "Barbados"                         "Belarus"                         
##  [17] "Belgium"                          "Belize"                          
##  [19] "Benin"                            "Bhutan"                          
##  [21] "Bolivia"                          "Bosnia and Herzegovina"          
##  [23] "Botswana"                         "Brazil"                          
##  [25] "Brunei"                           "Bulgaria"                        
##  [27] "Burkina Faso"                     "Burundi"                         
##  [29] "Cambodia"                         "Cameroon"                        
##  [31] "Canada"                           "Cape Verde"                      
##  [33] "Central African Republic"         "Chad"                            
##  [35] "Chile"                            "China"                           
##  [37] "Colombia"                         "Comoros"                         
##  [39] "Congo"                            "Costa Rica"                      
##  [41] "Cote d'Ivoire"                    "Croatia"                         
##  [43] "Cuba"                             "Cyprus"                          
##  [45] "Czech Republic"                   "Democratic Republic of Congo"    
##  [47] "Denmark"                          "Djibouti"                        
##  [49] "Dominica"                         "Dominican Republic"              
##  [51] "Ecuador"                          "Egypt"                           
##  [53] "El Salvador"                      "Equatorial Guinea"               
##  [55] "Eritrea"                          "Estonia"                         
##  [57] "Ethiopia"                         "Fiji"                            
##  [59] "Finland"                          "France"                          
##  [61] "Gabon"                            "Gambia"                          
##  [63] "Georgia"                          "Germany"                         
##  [65] "Ghana"                            "Greece"                          
##  [67] "Grenada"                          "Guatemala"                       
##  [69] "Guinea"                           "Guinea-Bissau"                   
##  [71] "Guyana"                           "Haiti"                           
##  [73] "Honduras"                         "Hong Kong"                       
##  [75] "Hungary"                          "Iceland"                         
##  [77] "India"                            "Indonesia"                       
##  [79] "International"                    "Iran"                            
##  [81] "Iraq"                             "Ireland"                         
##  [83] "Israel"                           "Italy"                           
##  [85] "Jamaica"                          "Japan"                           
##  [87] "Jordan"                           "Kazakhstan"                      
##  [89] "Kenya"                            "Kosovo"                          
##  [91] "Kuwait"                           "Kyrgyzstan"                      
##  [93] "Laos"                             "Latvia"                          
##  [95] "Lebanon"                          "Lesotho"                         
##  [97] "Liberia"                          "Libya"                           
##  [99] "Liechtenstein"                    "Lithuania"                       
## [101] "Luxembourg"                       "Macedonia"                       
## [103] "Madagascar"                       "Malawi"                          
## [105] "Malaysia"                         "Maldives"                        
## [107] "Mali"                             "Malta"                           
## [109] "Marshall Islands"                 "Mauritania"                      
## [111] "Mauritius"                        "Mexico"                          
## [113] "Moldova"                          "Monaco"                          
## [115] "Mongolia"                         "Montenegro"                      
## [117] "Morocco"                          "Mozambique"                      
## [119] "Myanmar"                          "Namibia"                         
## [121] "Nepal"                            "Netherlands"                     
## [123] "New Zealand"                      "Nicaragua"                       
## [125] "Niger"                            "Nigeria"                         
## [127] "Norway"                           "Oman"                            
## [129] "Pakistan"                         "Palestine"                       
## [131] "Panama"                           "Papua New Guinea"                
## [133] "Paraguay"                         "Peru"                            
## [135] "Philippines"                      "Poland"                          
## [137] "Portugal"                         "Qatar"                           
## [139] "Romania"                          "Russia"                          
## [141] "Rwanda"                           "Saint Kitts and Nevis"           
## [143] "Saint Lucia"                      "Saint Vincent and the Grenadines"
## [145] "San Marino"                       "Sao Tome and Principe"           
## [147] "Saudi Arabia"                     "Senegal"                         
## [149] "Serbia"                           "Seychelles"                      
## [151] "Sierra Leone"                     "Singapore"                       
## [153] "Slovakia"                         "Slovenia"                        
## [155] "Solomon Islands"                  "Somalia"                         
## [157] "South Africa"                     "South Korea"                     
## [159] "South Sudan"                      "Spain"                           
## [161] "Sri Lanka"                        "Sudan"                           
## [163] "Suriname"                         "Swaziland"                       
## [165] "Sweden"                           "Switzerland"                     
## [167] "Syria"                            "Taiwan"                          
## [169] "Tajikistan"                       "Tanzania"                        
## [171] "Thailand"                         "Timor"                           
## [173] "Togo"                             "Trinidad and Tobago"             
## [175] "Tunisia"                          "Turkey"                          
## [177] "Uganda"                           "Ukraine"                         
## [179] "United Arab Emirates"             "United Kingdom"                  
## [181] "United States"                    "Uruguay"                         
## [183] "Uzbekistan"                       "Vanuatu"                         
## [185] "Vatican"                          "Venezuela"                       
## [187] "Vietnam"                          "Western Sahara"                  
## [189] "World"                            "Yemen"                           
## [191] "Zambia"                           "Zimbabwe"

List of continents of the data set.

levels(covid.df$continent)
## [1] ""              "Africa"        "Asia"          "Europe"       
## [5] "North America" "Oceania"       "South America"

Máximum number of days that the data set comprises.

maxdays <- max(table(covid.df$location))
print(paste("Maximum reported days:",maxdays))
## [1] "Maximum reported days: 336"

Countries reporting data of the maximum days of the data set.

names(table(covid.df$location)[table(covid.df$location)==maxdays])
## [1] "Mexico"

Start and end dates of the data set.

startdate <- min(covid.df$date)
enddate <- max(covid.df$date)
todayformatted <- format(Sys.Date(), "%A, %B %d, %Y")
todayformatted <- paste(toupper(substr(todayformatted, 1, 1)), 
                        substr(todayformatted, 2, nchar(todayformatted)), 
                        sep="")
print(paste("Starts:", format(startdate, "%A, %B, %d of %Y"), 
            "Ends:",  format(enddate, "%A, %B, %d of %Y")))
## [1] "Starts: Wednesday, January, 01 of 2020 Ends: Tuesday, December, 01 of 2020"

Rankings of countries with the variables of totals

From the original dataframe, only the rows where date is equal to the maximum date (or compilation date) are obtained. Using this filter only the updated totals or accumulated values of each country are obtained. Additionally, countries with less than a million inhabitants are discarded. Some countries with small population have very high statistics per million of inhabitants.

The rankings are created with the following variables:

# Get one row of each country with the updated totals
covid.total.df <- covid.df[covid.df$date== enddate,]
# Filter countries with less than 1 millón population
covid.total.df <- covid.total.df[covid.total.df$population >= 1000000,]
covid.total.df$location <- droplevels.factor(covid.total.df$location)
print(names(covid.total.df)[c(5,7,9,11)])
## [1] "total_cases"             "new_cases_smoothed"     
## [3] "new_deaths"              "total_cases_per_million"

After discarding the countries that have less than a million inhabitants, the data set now has: 156 countries.

For each of the variables with total values, the 20 countries with the highest and lowest values are obtained. If México is not included in the top or bottom 20 it is added to the list indicating its ranking position. For each variable to be analyzed, the following is done in R:

  1. The data is ordered by the corresponding variable to be analyzed and stored in a new dataframe.
  2. In the new dataframe the information of the 20 countries with the highest values and the 20 countries with the lowest values are kept, the rest is discarded (row filtering).
  3. In the new dataframe only variables of interest are kept, the rest is discarded (column filtering). An additional column is created with the numeric ranking of each country.
  4. The data of the rankings is displayed as tables and bar graphs.

Ranking of countries by total cases of COVID-19 with column total_cases

# Data is ordered according to the total_cases column
ranking.total_cases <- covid.total.df[order(-covid.total.df$total_cases),]
# The row corresponding to World is removed
ranking.total_cases <- ranking.total_cases[ranking.total_cases$location != "World", ]
# A new column indicating the positionin the rank is added
ranking.total_cases$position <- 1:nrow(ranking.total_cases)
# Only columns of interest are kept
columnfilter <- c("position", "location", "total_cases")
bottom20.total_cases <- tail(ranking.total_cases[, columnfilter],20)
top20.total_cases <- head(ranking.total_cases[, columnfilter],20)
rm(ranking.total_cases)
rownames(top20.total_cases) <- c()
rownames(bottom20.total_cases) <- c()

mexrow <- which(top20.total_cases$location=='Mexico')
top20.total_cases$total_cases_formated <- formatC(top20.total_cases$total_cases, 
                                                  format="f", big.mark=",", digits=0)
bottom20.total_cases$total_cases_formated <- formatC(bottom20.total_cases$total_cases, 
                                                     format="f", big.mark=",", digits=0)

Tables

tablecolnames <- c("Position", "Country", "Total cases")
kable(top20.total_cases[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with most cases") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_cases[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with least cases") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most cases
Position Country Total cases
1 United States 13,721,304
2 India 9,462,809
3 Brazil 6,386,787
4 Russia 2,302,062
5 France 2,275,429
6 Spain 1,656,444
7 United Kingdom 1,647,230
8 Italy 1,620,901
9 Argentina 1,432,570
10 Colombia 1,324,792
11 Mexico 1,122,362
12 Germany 1,094,678
13 Poland 999,924
14 Iran 975,951
15 Peru 963,605
16 South Africa 792,299
17 Ukraine 765,117
18 Turkey 668,957
19 Belgium 579,212
20 Iraq 554,767
Countries with least cases
Position Country Total cases
137 Guinea-Bissau 2,441
138 Sierra Leone 2,413
139 Yemen 2,197
140 Lesotho 2,137
141 New Zealand 2,060
142 Chad 1,700
143 Liberia 1,595
144 Niger 1,586
145 Vietnam 1,351
146 Mongolia 812
147 Burundi 689
148 Taiwan 679
149 Papua New Guinea 669
150 Eritrea 577
151 Tanzania 509
152 Mauritius 505
153 Cambodia 329
154 Laos 39
155 Timor 30
156 NA NA

Top 20 graph

ggplot(data=top20.total_cases, aes(x=reorder(paste(position, location),total_cases), 
                                   y=total_cases, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total cases of COVID-19") +
  geom_text(aes(y=max(total_cases)+170000, 
                label=total_cases_formated,
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 countries with most cases of  COVID-19", 
       subtitle = todayformatted,
       caption = plotscaption)+
  scale_y_continuous(breaks=c(100000, 500000, 750000, 1000000, 2000000, 3000000, 4000000, 5000000),
                     label=c("100k", "500k", "750k", "1m", "2m", "3m", "4m", "5m"))+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total cases of COVID-19 with column total_cases - American countries

This ranking is done with American countries only. The data is obtained by filtering the rows where the continent variable is either “North America” or “South America”.

ranking.total_cases.america <- covid.total.df[covid.total.df$continent %in% 
                                                c("North America", "South America") ,]
ranking.total_cases.america <- ranking.total_cases.america[order(-ranking.total_cases.america$total_cases),]
ranking.total_cases.america$position <- 1:nrow(ranking.total_cases.america)
bottom20.total_cases.america <- tail(ranking.total_cases.america[, c("position", "location", "total_cases")],5)
top20.total_cases.america <- head(ranking.total_cases.america[, c("position", "location", "total_cases")],20)
rownames(bottom20.total_cases.america) <- c()
rownames(top20.total_cases.america) <- c()
mexrow <- which(ranking.total_cases.america$location=='Mexico')
rm(ranking.total_cases.america)
top20.total_cases.america$total_cases_formated <- formatC(top20.total_cases.america$total_cases,
                                                          format="f", big.mark=",", digits=0)
bottom20.total_cases.america$total_cases_formated <- formatC(bottom20.total_cases.america$total_cases, 
                                                             format="f", big.mark=",", digits=0)

Table

tablecolnames <- c("Position", "Country", "Total cases")
kable(top20.total_cases.america[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries of the American Continent with most cases") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "center") %>%
row_spec(mexrow, bold = T, color = "black", background = "yellow")
Countries of the American Continent with most cases
Position Country Total cases
1 United States 13,721,304
2 Brazil 6,386,787
3 Argentina 1,432,570
4 Colombia 1,324,792
5 Mexico 1,122,362
6 Peru 963,605
7 Chile 552,864
8 Canada 387,052
9 Ecuador 193,673
10 Panama 167,311
11 Bolivia 144,810
12 Dominican Republic 144,302
13 Costa Rica 140,172
14 Guatemala 122,774
15 Honduras 108,253
16 Venezuela 102,621
17 Paraguay 83,479
18 El Salvador 39,130
19 Jamaica 10,810
20 Haiti 9,296

Top 20 graph

ggplot(data=top20.total_cases.america, aes(x=reorder(paste(position, location),total_cases),
                                           y=total_cases, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total cases of COVID-19") +
  geom_text(aes(y=max(total_cases)+170000, 
                label=format(total_cases, big.mark=","),
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 of American countries with most cases of COVID-19", 
       subtitle = todayformatted,
       caption = plotscaption)+
  scale_y_continuous(breaks=c(100000, 250000, 500000, 750000, 1000000, 2000000, 3000000, 4000000, 5000000),
                     label=c("100k", "250k", "500k", "750k", "1m", "2m", "3m", "4m", "5m"))+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total cases of COVID-19 per million inhabitants with column total_cases_per_million

ranking.total_cases_per_million <- covid.total.df[order(-covid.total.df$total_cases_per_million),]
ranking.total_cases_per_million$position <- 1:nrow(ranking.total_cases_per_million)
columnfilter <- c("position", "location", "total_cases_per_million")
ranking.total_cases_per_million <- ranking.total_cases_per_million[, columnfilter]

ranking.total_cases_per_million <- na.omit(ranking.total_cases_per_million)
mexico.total_cases_per_million <- ranking.total_cases_per_million[
                                  ranking.total_cases_per_million$location == "Mexico", ]


bottom20.total_cases_per_million <- tail(ranking.total_cases_per_million,20)
top20.total_cases_per_million <- head(ranking.total_cases_per_million,20)
mexico.total_cases_per_million <- mexico.total_cases_per_million[, columnfilter]
rm(ranking.total_cases_per_million)
rownames(top20.total_cases_per_million) <- c()
rownames(bottom20.total_cases_per_million) <- c()
rownames(mexico.total_cases_per_million) <- c()

top20.total_cases_per_million <- rbind(top20.total_cases_per_million, mexico.total_cases_per_million)
mexrow <- which(top20.total_cases_per_million$location=='Mexico')

top20.total_cases_per_million$total_cases_per_million_formated <- formatC(
  top20.total_cases_per_million$total_cases_per_million, format="f", big.mark=",", digits=2)
bottom20.total_cases_per_million$total_cases_per_million_formated<- formatC(
  bottom20.total_cases_per_million$total_cases_per_million, format="f", big.mark=",", digits=2)

Tables

tablecolnames <- c("Position", "country", "Cases pmi")
kable(top20.total_cases_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with most cases per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_cases_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with least cases per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most cases per million inhabitants (pmi)
Position country Cases pmi
1 Bahrain 51,209.37
2 Belgium 49,976.81
3 Czech Republic 49,348.67
4 Qatar 48,246.48
5 Armenia 45,884.67
6 United States 41,453.76
7 Israel 39,064.80
8 Panama 38,776.36
9 Switzerland 38,230.92
10 Slovenia 37,103.19
11 Spain 35,428.33
12 Georgia 34,930.28
13 France 34,859.91
14 Kuwait 33,483.17
15 Croatia 31,993.53
16 Austria 31,698.46
17 Argentina 31,696.99
18 Netherlands 31,288.76
19 Macedonia 30,212.92
20 Brazil 30,047.07
70 Mexico 8,705.02
Countries with least cases per million inhabitants (pmi)
Position country Cases pmi
137 South Sudan 277.92
138 Benin 248.70
139 Mongolia 247.69
140 Mali 235.15
141 Eritrea 162.70
142 Democratic Republic of Congo 143.58
143 Burkina Faso 140.22
144 Chad 103.50
145 Papua New Guinea 74.77
146 Yemen 73.66
147 Niger 65.52
148 China 64.61
149 Burundi 57.94
150 Thailand 57.68
151 Taiwan 28.51
152 Timor 22.75
153 Cambodia 19.68
154 Vietnam 13.88
155 Tanzania 8.52
156 Laos 5.36

Top 20 graph

ggplot(data=top20.total_cases_per_million, 
       aes(x=reorder(paste(position, location),total_cases_per_million), 
           y=total_cases_per_million, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total cases of COVID-19 per million inhabitants") +
  geom_text(aes(y=max(total_cases_per_million)+1250, 
                label=total_cases_per_million_formated,
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 of countries with most cases of COVID-19 per million inhabitants (+ México)",
       subtitle = todayformatted,
       caption = plotscaption)+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total deaths from COVID-19 with the variable total_deaths

ranking.total_deaths <- covid.total.df[order(-covid.total.df$total_deaths),]
ranking.total_deaths <- ranking.total_deaths[ranking.total_deaths$location != "World", ]
ranking.total_deaths$position <- 1:nrow(ranking.total_deaths)
columnfilter <- c("position", "location", "total_deaths")
ranking.total_deaths <- ranking.total_deaths[, columnfilter]
ranking.total_deaths <- na.omit(ranking.total_deaths)
bottom20.total_deaths <- tail(ranking.total_deaths,20)
top20.total_deaths <- head(ranking.total_deaths,20)
mexrow <- which(ranking.total_deaths$location=='Mexico')
rm(ranking.total_deaths)
rownames(top20.total_deaths) <- c()
rownames(bottom20.total_deaths) <- c()

top20.total_deaths$total_deaths_formated <- formatC(top20.total_deaths$total_deaths, 
                                                  format="f", big.mark=",", digits=0)
bottom20.total_deaths$total_deaths_formated <- formatC(bottom20.total_deaths$total_deaths, 
                                                     format="f", big.mark=",", digits=0)

Tables

tablecolnames <- c("Position", "Country", "Total deaths")
kable(top20.total_deaths[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with most deaths from COVID-19") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_deaths[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with least deaths from COVID-19") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most deaths from COVID-19
Position Country Total deaths
1 United States 270,642
2 Brazil 173,817
3 India 137,621
4 Mexico 106,765
5 United Kingdom 59,148
6 Italy 56,361
7 France 52,821
8 Iran 48,628
9 Spain 45,511
10 Russia 40,050
11 Argentina 38,928
12 Colombia 36,934
13 Peru 35,966
14 South Africa 21,644
15 Poland 17,599
16 Germany 17,177
17 Indonesia 17,081
18 Belgium 16,786
19 Chile 15,430
20 Turkey 13,936
Countries with least deaths from COVID-19
Position Country Total deaths
131 Sierra Leone 74
132 Burkina Faso 68
133 Togo 64
134 Central African Republic 63
135 South Sudan 61
136 Gabon 60
137 Thailand 60
138 Rwanda 49
139 Guinea-Bissau 44
140 Lesotho 44
141 Benin 43
142 Vietnam 35
143 Botswana 34
144 Singapore 29
145 New Zealand 25
146 Tanzania 21
147 Mauritius 10
148 Papua New Guinea 7
149 Taiwan 7
150 Burundi 1

Top 20 graph

ggplot(data=top20.total_deaths, 
       aes(x=reorder(paste(position, location),total_deaths), 
                                   y=total_deaths, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total deaths from COVID-19") +
  geom_text(aes(y=max(total_deaths)+7500, 
                label=total_deaths_formated,
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 of countries with total deaths from COVID-19",
       subtitle = todayformatted,
       caption = plotscaption)+
  scale_y_continuous(breaks=c(10000, 25000, 50000, 75000, 100000, 150000, 170000), 
                     label=c("10k", "25k", "50k", "75k", "100k", "150k", "170k"))+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total deaths from COVID-19 per million inhabitants using the variable total_deaths_per_million

ranking.total_deaths_per_million <- covid.total.df[order(-covid.total.df$total_deaths_per_million),]
ranking.total_deaths_per_million$position <- 1:nrow(ranking.total_deaths_per_million)
columnfilter <- c("position", "location", "total_deaths_per_million")
ranking.total_deaths_per_million <- ranking.total_deaths_per_million[, columnfilter]
ranking.total_deaths_per_million <- na.omit(ranking.total_deaths_per_million)
bottom20.total_deaths_per_million <- tail(ranking.total_deaths_per_million, 20)
top20.total_deaths_per_million <- head(ranking.total_deaths_per_million, 20)
mexrow <- which(ranking.total_deaths_per_million$location=='Mexico')
rm(ranking.total_deaths_per_million)
rownames(top20.total_deaths_per_million) <- c()
rownames(bottom20.total_deaths_per_million) <- c()

top20.total_deaths_per_million$total_deaths_per_million_formated <- formatC(
  top20.total_deaths_per_million$total_deaths_per_million, format="f", big.mark=",", digits=2)
bottom20.total_deaths_per_million$total_deaths_per_million_formated <- formatC(
  bottom20.total_deaths_per_million$total_deaths_per_million, format="f", big.mark=",", digits=2)

Table

tablecolnames <- c("Position", "Country", "Deaths pmi")

kable(top20.total_deaths_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, 
      caption="Countries with most deaths from COVID-19 per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_deaths_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, 
      caption="Countries with least deaths from COVID-19 per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most deaths from COVID-19 per million inhabitants (pmi)
Position Country Deaths pmi
1 Belgium 1,448.37
2 Peru 1,090.81
3 Spain 973.40
4 Italy 932.17
5 United Kingdom 871.28
6 Argentina 861.32
7 Macedonia 860.14
8 Bosnia and Herzegovina 831.20
9 Mexico 828.07
10 Brazil 817.73
11 United States 817.64
12 France 809.23
13 Chile 807.17
14 Czech Republic 785.04
15 Bolivia 767.84
16 Ecuador 765.23
17 Armenia 740.07
18 Colombia 725.86
19 Panama 718.00
20 Slovenia 716.71
Countries with least deaths from COVID-19 per million inhabitants (pmi)
Position Country Deaths pmi
132 Nigeria 5.71
133 Sri Lanka 5.70
134 South Sudan 5.45
135 New Zealand 5.18
136 Cote d’Ivoire 5.00
137 Singapore 4.96
138 Uganda 4.48
139 Mozambique 4.19
140 Rwanda 3.78
141 Democratic Republic of Congo 3.74
142 Benin 3.55
143 China 3.29
144 Burkina Faso 3.25
145 Niger 3.10
146 Thailand 0.86
147 Papua New Guinea 0.78
148 Vietnam 0.36
149 Tanzania 0.35
150 Taiwan 0.29
151 Burundi 0.08

Top 20 graph

ggplot(data=top20.total_deaths_per_million, 
       aes(x=reorder(paste(position, location),total_deaths_per_million), 
           y=total_deaths_per_million, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Deaths from COVID-19 per million inhabitants") +
  geom_text(aes(y=max(total_deaths_per_million)+25, 
                label=total_deaths_per_million_formated,
                fontface="bold"),
            color="black")+
  labs(title="Top 20 countries by deaths from COVID-19 per million inhabitants",
       subtitle = todayformatted,
       caption = plotscaption)+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Cumulative COVID-19 Cases and Deaths

For the following graphs, the variables total_cases and total_deaths are used.

Curves of total cases of the five countries with most cases (+ México)

Line graphs with date ranges. The starting date varies, whereas the end date is always Wednesday, December 02, 2020.

Functions to build the line graphs.

##Line plot of new_total_cases with date breaks by month
plot.trend.total_cases.month <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_cases, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Countries")+
    scale_x_date(date_breaks = "month", date_labels = "%B")+
    ggtitle(graphtitle,
            subtitle = graphsubtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative cases of COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}
##Line plot of new_deaths with date breaks by short month and day
plot.trend.total_cases.monthdays <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_cases, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Países")+
    scale_x_date(date_breaks = "day", date_labels = "%b %d")+
    ggtitle(graphtitle,
            subtitle = graphsubtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative cases of COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}

All

sublabel.tmp <- paste("Date range:", format(startdate, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
title.tmp <- "Cumulative cases of COVID-19"
plot.trend.total_cases.month(startdate, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-5 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-5
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-3 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-3
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-2 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-2
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-1 Month

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-1 Month TOP[3:7]

The amount of cases of United States and Brazil do not allow to appreciate in detail the curves of the rest of the countries. Those two countries are removed from the graph, the countries in positions 6 and 7 of the top 20 of countries with most cases are added.

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
total_cases.others <- as.character(top20.total_cases$location)[3:7]
plot.trend.total_cases.month(startdate.tmp, enddate, total_cases.others, title.tmp, sublabel.tmp)

-3 Weeks TOP[3:7]

sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_cases.monthdays(enddate-21, enddate, total_cases.others, title.tmp, sublabel.tmp)

-2 Weeks TOP[3:7]

sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_cases.monthdays(enddate-14, enddate, total_cases.others, title.tmp, sublabel.tmp)

-2 Weeks MX, PE y SA

total_cases.others <- as.character(top20.total_cases$location)[3:7]
total_cases.others <- total_cases.others[c(-1,-2)]
countrieslabels <- paste(total_cases.others, collapse = ', ')
sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
title.tmp <- paste(title.tmp, "of", countrieslabels)
plot.trend.total_cases.monthdays(enddate-14, enddate, total_cases.others, title.tmp, sublabel.tmp)

Curves of daily deaths of the five countries with most deaths from COVID-19 (+ México)

Line graphs with date ranges. The starting date varies, whereas the end date is always Wednesday, December 02, 2020.

Functions to build the line graphs.

# Generate total_deaths line plot with month breaks
plot.trend.total_deaths.month <- function(startdate, enddate, countries, graphtitle, subgraphtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_deaths, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Countries")+
    scale_x_date(date_breaks = "month", date_labels = "%B")+
    ggtitle(graphtitle,
            subtitle = subgraphtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative deaths from COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}
# Generate total_deaths line plot with month and day breaks
plot.trend.total_deaths.monthdays <- function(startdate, enddate, countries, graphtitle, subgraphtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_deaths, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Countries")+
    scale_x_date(date_breaks = "day", date_labels = "%b %d")+
    ggtitle(graphtitle,
            subtitle = subgraphtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative deaths from COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}

All

top5.total_deaths <-  head(as.character(top20.total_deaths$location),5)
sublabel.tmp <- paste("Date range:", format(startdate, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
title.tmp <- "Cumulative deaths from COVID-19"
plot.trend.total_deaths.month(startdate, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-5 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-5
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-3 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-3
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-2 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-2
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-1 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-1 Month TOP[3:7]

The amount of deaths of United States and Brazil do not allow to appreciate in detail the curves of the rest of the countries. Those two countries are removed from the graph, the countries in positions 6 and 7 of the top 20 of countries with the most total deaths are added.

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
total_deaths.others <- as.character(top20.total_deaths$location)[3:7]
plot.trend.total_deaths.month(startdate.tmp, enddate, total_deaths.others, title.tmp, sublabel.tmp)

-3 Weeks TOP[3:7]

sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-21, enddate, total_deaths.others, title.tmp, sublabel.tmp)

-3 Weeks IN, IR, MX, PE, RU

The acumulative curves of France, Italy, and the United Kindgdom are almost flat, the number of deaths from COVID-19 has stagnated. One possible reason is that the pandemic started first in Europe and the number of recently reported deaths is zero or very low. The curves for Mexico and India continues to rise. The countries of France, Italy, and the United Kingdom are discarded from the data. Other countries are added where it seems that the number of deaths from COVID-19 is still increasing. The graph is generated with the accumulated death curves of the last 3 weeks of India, Iran, Mexico, Peru and Russia:

total_deaths.others <- as.character(top20.total_deaths$location)[c(3,5,8,9,10)]
countrieslabels <- paste(total_deaths.others, collapse = ', ')
title.tmp <- paste(title.tmp, "de", countrieslabels)
sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-21, enddate, total_deaths.others, title.tmp, sublabel.tmp)

-2 Weeks IN, IR, MX, PE, RU

total_deaths.others <- as.character(top20.total_deaths$location)[c(3,5,8,9,10)]
countrieslabels <- paste(total_deaths.others, collapse = ', ')
title.tmp <- paste(title.tmp, "de", countrieslabels)
sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-14, enddate, total_deaths.others, title.tmp, sublabel.tmp)

Comparison of COVID-19 cases and deaths curves in Mexico

Daily cases and deaths

covid.mexico <- covid.df[covid.df$location=='Mexico',]
covid.mexico <- covid.mexico[covid.mexico$date > "2020-04-01",]
covid.mexico.new <- covid.mexico[,c("date", "new_cases", "new_deaths")]
covid.mexico.new.lf <-melt(covid.mexico.new, id.vars = c("date"))
ggplot(data=covid.mexico.new.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    ggtitle("COVID-19 Daily cases and deaths in México")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

Daily cases and deaths last month

covid.mexico.new.lastmonth <- covid.mexico.new[covid.mexico.new$date >= enddate-30, ]
covid.mexico.new.lastmonth.lf <- melt(covid.mexico.new.lastmonth, id.vars = c("date"))
ggplot(data=covid.mexico.new.lastmonth.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    ggtitle("COVID-19 Daily cases and deaths in México - Last month")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

Total cases and deaths

covid.mexico.total <- covid.mexico[,c("date", "total_cases", "total_deaths")]
covid.mexico.total.lf <-melt(covid.mexico.total, id.vars = c("date"))
ggplot(data=covid.mexico.total.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    scale_y_continuous(breaks=c(50000, 100000, 200000, 300000, 400000, 500000, 750000),
                     label=c("50k","100k", "200k", "300k", "400k", "500k", "750k"))+
    ggtitle("COVID-19 Cumulative cases and deaths in México")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

Total cases and deaths last month

covid.mexico.total.lasttmonth <- covid.mexico.total[covid.mexico.total$date >= enddate-30, ]
covid.mexico.total.lasttmonth.lf <- melt(covid.mexico.total.lasttmonth, id.vars = c("date"))
ggplot(data=covid.mexico.total.lasttmonth.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    scale_y_continuous(breaks=c(50000, 100000, 200000, 300000, 400000, 500000, 750000),
                     label=c("50k","100k", "200k", "300k", "400k", "500k", "750k"))+
    ggtitle("COVID-19 Cumulative cases and deaths in México - Last month")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Fecha")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

COVID-19 cases and deaths in México per week

covid.mexico <- covid.df[covid.df$location=='Mexico',]
week.agg <- covid.mexico %>%
  group_by(week = week(date)) %>%
  summarise(mean_new_cases = mean(new_cases),
            n = n(), 
            loCI_new_cases = ci(new_cases)[2], 
            hiCI_new_cases = ci(new_cases)[3], 
            mean_new_deaths = mean(new_deaths), 
            loCI_new_deaths = ci(new_deaths)[2], 
            hiCI_new_deaths = ci(new_deaths)[3], 
            total_new_cases = sum(new_cases), 
            total_new_deaths = sum(new_deaths))
# Weeks with zero mean new cases are deleted
week.agg <- week.agg[which(week.agg$mean_new_cases > 0),]
# Finding the date of the first week to be ploted
firstweek <- min(week.agg$week)
rowindex <- match(firstweek, week(covid.mexico[,"date"]))
firstweek.date <- format(covid.mexico[rowindex, ]$date, "%d/%m/%Y")
rm(firstweek, rowindex)
# Finding the date of the last week to be ploted
lastweek <- max(week.agg$week)
rowindex <- match(lastweek, week(covid.mexico[,"date"]))
lastweek.date <- format(covid.mexico[rowindex, ]$date, "%d/%m/%Y")
rm(lastweek, rowindex)
sublabel <- paste("Date range:", firstweek.date, "-", lastweek.date)

Mean cases per week

ggplot(data=week.agg, aes(x=as.character(week),y=mean_new_cases))+
  geom_bar(stat = 'identity', aes(fill = mean_new_cases)) +
  geom_errorbar(aes(ymin=loCI_new_cases, ymax=hiCI_new_cases), width=.2,
                position=position_dodge(.9)) +
  xlab("Week")+
  ylab("Mean cases per week ± CI 95%")+
  labs(title="Mean cases per week of COVID-19 in México",
       subtitle = sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Cases", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Mean deaths per week

ggplot(data=week.agg, aes(x=as.character(week),y=mean_new_deaths))+
  geom_bar(stat = 'identity', aes(fill = mean_new_deaths)) +
  geom_errorbar(aes(ymin=loCI_new_deaths, ymax=hiCI_new_deaths), width=.2,
                position=position_dodge(.9)) +
  xlab("Week")+
  ylab("Mean deaths per week ± CI 95%")+
  labs(title="Mean deaths per week from COVID-19 in México",
       subtitle = sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Deaths", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Total cases per week

ggplot(data=week.agg, aes(x=as.character(week),y=total_new_cases))+
  geom_bar(stat = 'identity', aes(fill = total_new_cases)) +
  xlab("Week")+
  ylab("Total cases per week")+
  labs(title="Total COVID-19 cases per week in México",
       subtitle = sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Total cases", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Total deaths per week

ggplot(data=week.agg, aes(x=as.character(week),y=total_new_deaths))+
  geom_bar(stat = 'identity', aes(fill = total_new_deaths)) +
  xlab("Week")+
  ylab("Total deaths per week")+
  labs(title="Total COVID-19 deaths per week in México",
       subtitle=sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Total deaths", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))