Description

This is an exploratory data analysis for educational purposes. The analysis is carried out with data related to COVID-19 reported by various countries. The data is obtained from the Website: https://ourworldindata.org/coronavirus-source-data. The site’s data set is constantly updated. The results of the analysis changes according to the day the code is executed.

Data reading

The data set is loaded directly from the Website to obtain information updated according to the execution day of the code.

covid.df <- read.csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")

Required packages are loaded.

library(ggplot2)
library(knitr)
library(reshape2)
library(kableExtra)
#library(dplyr)
library(tidyverse)
library(lubridate)
library(gmodels)
plotscaption <- "oscarcastrolopez.github.io"

Display of the dataframe structure with its variables and their type.

str(covid.df)
## 'data.frame':    38638 obs. of  40 variables:
##  $ iso_code                       : Factor w/ 212 levels "","ABW","AFG",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ continent                      : Factor w/ 7 levels "","Africa","Asia",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ location                       : Factor w/ 212 levels "Afghanistan",..: 10 10 10 10 10 10 10 10 10 10 ...
##  $ date                           : Factor w/ 235 levels "2019-12-31","2020-01-01",..: 74 80 81 82 83 84 85 86 87 88 ...
##  $ total_cases                    : num  2 NA 4 NA NA NA 12 17 19 28 ...
##  $ new_cases                      : num  2 NA 2 NA NA NA 8 5 2 9 ...
##  $ new_cases_smoothed             : num  NA 0.286 0.286 0.286 0.286 ...
##  $ total_deaths                   : num  0 NA 0 NA NA NA 0 0 0 0 ...
##  $ new_deaths                     : num  0 NA 0 NA NA NA 0 0 0 0 ...
##  $ new_deaths_smoothed            : num  NA 0 0 0 0 0 0 0 0 0 ...
##  $ total_cases_per_million        : num  18.7 NA 37.5 NA NA ...
##  $ new_cases_per_million          : num  18.7 NA 18.7 NA NA ...
##  $ new_cases_smoothed_per_million : num  NA 2.68 2.68 2.68 2.68 ...
##  $ total_deaths_per_million       : num  0 NA 0 NA NA NA 0 0 0 0 ...
##  $ new_deaths_per_million         : num  0 NA 0 NA NA NA 0 0 0 0 ...
##  $ new_deaths_smoothed_per_million: num  NA 0 0 0 0 0 0 0 0 0 ...
##  $ new_tests                      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ total_tests                    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ total_tests_per_thousand       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_tests_per_thousand         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_tests_smoothed             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_tests_smoothed_per_thousand: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ tests_per_case                 : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ positive_rate                  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ tests_units                    : Factor w/ 7 levels "","people tested",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ stringency_index               : num  0 33.3 33.3 44.4 44.4 ...
##  $ population                     : num  106766 106766 106766 106766 106766 ...
##  $ population_density             : num  585 585 585 585 585 ...
##  $ median_age                     : num  41.2 41.2 41.2 41.2 41.2 41.2 41.2 41.2 41.2 41.2 ...
##  $ aged_65_older                  : num  13.1 13.1 13.1 13.1 13.1 ...
##  $ aged_70_older                  : num  7.45 7.45 7.45 7.45 7.45 ...
##  $ gdp_per_capita                 : num  35974 35974 35974 35974 35974 ...
##  $ extreme_poverty                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ cardiovasc_death_rate          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ diabetes_prevalence            : num  11.6 11.6 11.6 11.6 11.6 ...
##  $ female_smokers                 : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ male_smokers                   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ handwashing_facilities         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ hospital_beds_per_thousand     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ life_expectancy                : num  76.3 76.3 76.3 76.3 76.3 ...

A detailed description of each variable of the data set can be found on the following Website: https://github.com/owid/covid-19-data/blob/master/public/data/owid-covid-codebook.csv.

The variable date is class factor is converted to class date.

covid.df$date <- as.Date(covid.df$date, format("%Y-%m-%d"))

List of countries of the data set.

levels(covid.df$location)
##   [1] "Afghanistan"                      "Albania"                         
##   [3] "Algeria"                          "Andorra"                         
##   [5] "Angola"                           "Anguilla"                        
##   [7] "Antigua and Barbuda"              "Argentina"                       
##   [9] "Armenia"                          "Aruba"                           
##  [11] "Australia"                        "Austria"                         
##  [13] "Azerbaijan"                       "Bahamas"                         
##  [15] "Bahrain"                          "Bangladesh"                      
##  [17] "Barbados"                         "Belarus"                         
##  [19] "Belgium"                          "Belize"                          
##  [21] "Benin"                            "Bermuda"                         
##  [23] "Bhutan"                           "Bolivia"                         
##  [25] "Bonaire Sint Eustatius and Saba"  "Bosnia and Herzegovina"          
##  [27] "Botswana"                         "Brazil"                          
##  [29] "British Virgin Islands"           "Brunei"                          
##  [31] "Bulgaria"                         "Burkina Faso"                    
##  [33] "Burundi"                          "Cambodia"                        
##  [35] "Cameroon"                         "Canada"                          
##  [37] "Cape Verde"                       "Cayman Islands"                  
##  [39] "Central African Republic"         "Chad"                            
##  [41] "Chile"                            "China"                           
##  [43] "Colombia"                         "Comoros"                         
##  [45] "Congo"                            "Costa Rica"                      
##  [47] "Cote d'Ivoire"                    "Croatia"                         
##  [49] "Cuba"                             "Curacao"                         
##  [51] "Cyprus"                           "Czech Republic"                  
##  [53] "Democratic Republic of Congo"     "Denmark"                         
##  [55] "Djibouti"                         "Dominica"                        
##  [57] "Dominican Republic"               "Ecuador"                         
##  [59] "Egypt"                            "El Salvador"                     
##  [61] "Equatorial Guinea"                "Eritrea"                         
##  [63] "Estonia"                          "Ethiopia"                        
##  [65] "Faeroe Islands"                   "Falkland Islands"                
##  [67] "Fiji"                             "Finland"                         
##  [69] "France"                           "French Polynesia"                
##  [71] "Gabon"                            "Gambia"                          
##  [73] "Georgia"                          "Germany"                         
##  [75] "Ghana"                            "Gibraltar"                       
##  [77] "Greece"                           "Greenland"                       
##  [79] "Grenada"                          "Guam"                            
##  [81] "Guatemala"                        "Guernsey"                        
##  [83] "Guinea"                           "Guinea-Bissau"                   
##  [85] "Guyana"                           "Haiti"                           
##  [87] "Honduras"                         "Hong Kong"                       
##  [89] "Hungary"                          "Iceland"                         
##  [91] "India"                            "Indonesia"                       
##  [93] "International"                    "Iran"                            
##  [95] "Iraq"                             "Ireland"                         
##  [97] "Isle of Man"                      "Israel"                          
##  [99] "Italy"                            "Jamaica"                         
## [101] "Japan"                            "Jersey"                          
## [103] "Jordan"                           "Kazakhstan"                      
## [105] "Kenya"                            "Kosovo"                          
## [107] "Kuwait"                           "Kyrgyzstan"                      
## [109] "Laos"                             "Latvia"                          
## [111] "Lebanon"                          "Lesotho"                         
## [113] "Liberia"                          "Libya"                           
## [115] "Liechtenstein"                    "Lithuania"                       
## [117] "Luxembourg"                       "Macedonia"                       
## [119] "Madagascar"                       "Malawi"                          
## [121] "Malaysia"                         "Maldives"                        
## [123] "Mali"                             "Malta"                           
## [125] "Mauritania"                       "Mauritius"                       
## [127] "Mexico"                           "Moldova"                         
## [129] "Monaco"                           "Mongolia"                        
## [131] "Montenegro"                       "Montserrat"                      
## [133] "Morocco"                          "Mozambique"                      
## [135] "Myanmar"                          "Namibia"                         
## [137] "Nepal"                            "Netherlands"                     
## [139] "New Caledonia"                    "New Zealand"                     
## [141] "Nicaragua"                        "Niger"                           
## [143] "Nigeria"                          "Northern Mariana Islands"        
## [145] "Norway"                           "Oman"                            
## [147] "Pakistan"                         "Palestine"                       
## [149] "Panama"                           "Papua New Guinea"                
## [151] "Paraguay"                         "Peru"                            
## [153] "Philippines"                      "Poland"                          
## [155] "Portugal"                         "Puerto Rico"                     
## [157] "Qatar"                            "Romania"                         
## [159] "Russia"                           "Rwanda"                          
## [161] "Saint Kitts and Nevis"            "Saint Lucia"                     
## [163] "Saint Vincent and the Grenadines" "San Marino"                      
## [165] "Sao Tome and Principe"            "Saudi Arabia"                    
## [167] "Senegal"                          "Serbia"                          
## [169] "Seychelles"                       "Sierra Leone"                    
## [171] "Singapore"                        "Sint Maarten (Dutch part)"       
## [173] "Slovakia"                         "Slovenia"                        
## [175] "Somalia"                          "South Africa"                    
## [177] "South Korea"                      "South Sudan"                     
## [179] "Spain"                            "Sri Lanka"                       
## [181] "Sudan"                            "Suriname"                        
## [183] "Swaziland"                        "Sweden"                          
## [185] "Switzerland"                      "Syria"                           
## [187] "Taiwan"                           "Tajikistan"                      
## [189] "Tanzania"                         "Thailand"                        
## [191] "Timor"                            "Togo"                            
## [193] "Trinidad and Tobago"              "Tunisia"                         
## [195] "Turkey"                           "Turks and Caicos Islands"        
## [197] "Uganda"                           "Ukraine"                         
## [199] "United Arab Emirates"             "United Kingdom"                  
## [201] "United States"                    "United States Virgin Islands"    
## [203] "Uruguay"                          "Uzbekistan"                      
## [205] "Vatican"                          "Venezuela"                       
## [207] "Vietnam"                          "Western Sahara"                  
## [209] "World"                            "Yemen"                           
## [211] "Zambia"                           "Zimbabwe"

List of continents of the data set.

levels(covid.df$continent)
## [1] ""              "Africa"        "Asia"          "Europe"       
## [5] "North America" "Oceania"       "South America"

Máximum number of days that the data set comprises.

maxdays <- max(table(covid.df$location))
print(paste("Maximum reported days:",maxdays))
## [1] "Maximum reported days: 235"

Countries reporting data of the maximum days of the data set.

names(table(covid.df$location)[table(covid.df$location)==maxdays])
##  [1] "Afghanistan"          "Algeria"              "Armenia"             
##  [4] "Australia"            "Austria"              "Azerbaijan"          
##  [7] "Bahrain"              "Belarus"              "Belgium"             
## [10] "Brazil"               "Cambodia"             "Canada"              
## [13] "China"                "Croatia"              "Czech Republic"      
## [16] "Denmark"              "Dominican Republic"   "Ecuador"             
## [19] "Egypt"                "Estonia"              "Finland"             
## [22] "France"               "Georgia"              "Germany"             
## [25] "Greece"               "Iceland"              "India"               
## [28] "Indonesia"            "International"        "Iran"                
## [31] "Iraq"                 "Ireland"              "Israel"              
## [34] "Italy"                "Japan"                "Kuwait"              
## [37] "Lebanon"              "Lithuania"            "Luxembourg"          
## [40] "Macedonia"            "Malaysia"             "Mexico"              
## [43] "Monaco"               "Nepal"                "Netherlands"         
## [46] "New Zealand"          "Nigeria"              "Norway"              
## [49] "Oman"                 "Pakistan"             "Philippines"         
## [52] "Qatar"                "Romania"              "Russia"              
## [55] "San Marino"           "Singapore"            "South Korea"         
## [58] "Sri Lanka"            "Sweden"               "Switzerland"         
## [61] "Taiwan"               "Thailand"             "United Arab Emirates"
## [64] "United Kingdom"       "United States"        "Vietnam"             
## [67] "World"

Start and end dates of the data set.

startdate <- min(covid.df$date)
enddate <- max(covid.df$date)
todayformatted <- format(Sys.Date(), "%A, %B %d, %Y")
todayformatted <- paste(toupper(substr(todayformatted, 1, 1)), 
                        substr(todayformatted, 2, nchar(todayformatted)), 
                        sep="")
print(paste("Starts:", format(startdate, "%A, %B, %d of %Y"), 
            "Ends:",  format(enddate, "%A, %B, %d of %Y")))
## [1] "Starts: Tuesday, December, 31 of 2019 Ends: Friday, August, 21 of 2020"

Rankings of countries with the variables of totals

From the original dataframe, only the rows where date is equal to the maximum date (or compilation date) are obtained. Using this filter only the updated totals or accumulated values of each country are obtained. Additionally, countries with less than a million inhabitants are discarded. Some countries with small population have very high statistics per million of inhabitants.

The rankings are created with the following variables:

# Get one row of each country with the updated totals
covid.total.df <- covid.df[covid.df$date== enddate,]
# Filter countries with less than 1 millón population
covid.total.df <- covid.total.df[covid.total.df$population >= 1000000,]
covid.total.df$location <- droplevels.factor(covid.total.df$location)
print(names(covid.total.df)[c(5,7,9,11)])
## [1] "total_cases"             "new_cases_smoothed"     
## [3] "new_deaths"              "total_cases_per_million"

After discarding the countries that have less than a million inhabitants, the data set now has: 156 countries.

For each of the variables with total values, the 20 countries with the highest and lowest values are obtained. If México is not included in the top or bottom 20 it is added to the list indicating its ranking position. For each variable to be analyzed, the following is done in R:

  1. The data is ordered by the corresponding variable to be analyzed and stored in a new dataframe.
  2. In the new dataframe the information of the 20 countries with the highest values and the 20 countries with the lowest values are kept, the rest is discarded (row filtering).
  3. In the new dataframe only variables of interest are kept, the rest is discarded (column filtering). An additional column is created with the numeric ranking of each country.
  4. The data of the rankings is displayed as tables and bar graphs.

Ranking of countries by total cases of COVID-19 with column total_cases

# Data is ordered according to the total_cases column
ranking.total_cases <- covid.total.df[order(-covid.total.df$total_cases),]
# The row corresponding to World is removed
ranking.total_cases <- ranking.total_cases[ranking.total_cases$location != "World", ]
# A new column indicating the positionin the rank is added
ranking.total_cases$position <- 1:nrow(ranking.total_cases)
# Only columns of interest are kept
columnfilter <- c("position", "location", "total_cases")
bottom20.total_cases <- tail(ranking.total_cases[, columnfilter],20)
top20.total_cases <- head(ranking.total_cases[, columnfilter],20)
rm(ranking.total_cases)
rownames(top20.total_cases) <- c()
rownames(bottom20.total_cases) <- c()

mexrow <- which(top20.total_cases$location=='Mexico')
top20.total_cases$total_cases_formated <- formatC(top20.total_cases$total_cases, 
                                                  format="f", big.mark=",", digits=0)
bottom20.total_cases$total_cases_formated <- formatC(bottom20.total_cases$total_cases, 
                                                     format="f", big.mark=",", digits=0)

Tables

tablecolnames <- c("Position", "Country", "Total cases")
kable(top20.total_cases[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with most cases") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_cases[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with least cases") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most cases
Position Country Total cases
1 United States 5,573,847
2 Brazil 3,501,975
3 India 2,905,823
4 Russia 942,106
5 South Africa 599,940
6 Peru 567,059
7 Mexico 543,806
8 Colombia 513,719
9 Chile 391,849
10 Iran 352,558
11 United Kingdom 322,280
12 Argentina 312,646
13 Saudi Arabia 303,973
14 Pakistan 291,588
15 Bangladesh 287,959
16 Italy 256,118
17 Turkey 254,520
18 France 229,814
19 Germany 228,621
20 Iraq 192,797
Countries with least cases
Position Country Total cases
137 Jamaica 1,290
138 Liberia 1,284
139 Togo 1,212
140 Niger 1,169
141 Vietnam 1,007
142 Lesotho 996
143 Chad 972
144 Trinidad and Tobago 767
145 Tanzania 509
146 Taiwan 486
147 Burundi 422
148 Myanmar 409
149 Papua New Guinea 361
150 Mauritius 346
151 Eritrea 304
152 Mongolia 298
153 Cambodia 273
154 Timor 26
155 Laos 20
156 NA NA

Top 20 graph

ggplot(data=top20.total_cases, aes(x=reorder(paste(position, location),total_cases), 
                                   y=total_cases, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total cases of COVID-19") +
  geom_text(aes(y=max(total_cases)+170000, 
                label=total_cases_formated,
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 countries with most cases of  COVID-19", 
       subtitle = todayformatted,
       caption = plotscaption)+
  scale_y_continuous(breaks=c(100000, 500000, 750000, 1000000, 2000000, 3000000, 4000000, 5000000),
                     label=c("100k", "500k", "750k", "1m", "2m", "3m", "4m", "5m"))+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total cases of COVID-19 with column total_cases - American countries

This ranking is done with American countries only. The data is obtained by filtering the rows where the continent variable is either “North America” or “South America”.

ranking.total_cases.america <- covid.total.df[covid.total.df$continent %in% 
                                                c("North America", "South America") ,]
ranking.total_cases.america <- ranking.total_cases.america[order(-ranking.total_cases.america$total_cases),]
ranking.total_cases.america$position <- 1:nrow(ranking.total_cases.america)
bottom20.total_cases.america <- tail(ranking.total_cases.america[, c("position", "location", "total_cases")],5)
top20.total_cases.america <- head(ranking.total_cases.america[, c("position", "location", "total_cases")],20)
rownames(bottom20.total_cases.america) <- c()
rownames(top20.total_cases.america) <- c()
mexrow <- which(ranking.total_cases.america$location=='Mexico')
rm(ranking.total_cases.america)
top20.total_cases.america$total_cases_formated <- formatC(top20.total_cases.america$total_cases,
                                                          format="f", big.mark=",", digits=0)
bottom20.total_cases.america$total_cases_formated <- formatC(bottom20.total_cases.america$total_cases, 
                                                             format="f", big.mark=",", digits=0)

Table

tablecolnames <- c("Position", "Country", "Total cases")
kable(top20.total_cases.america[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries of the American Continent with most cases") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "center") %>%
row_spec(mexrow, bold = T, color = "black", background = "yellow")
Countries of the American Continent with most cases
Position Country Total cases
1 United States 5,573,847
2 Brazil 3,501,975
3 Peru 567,059
4 Mexico 543,806
5 Colombia 513,719
6 Chile 391,849
7 Argentina 312,646
8 Canada 123,873
9 Bolivia 106,065
10 Ecuador 105,508
11 Dominican Republic 89,010
12 Panama 83,855
13 Guatemala 65,983
14 Honduras 52,819
15 Venezuela 37,567
16 Costa Rica 31,075
17 Puerto Rico 28,143
18 El Salvador 23,964
19 Paraguay 11,817
20 Haiti 7,997

Top 20 graph

ggplot(data=top20.total_cases.america, aes(x=reorder(paste(position, location),total_cases),
                                           y=total_cases, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total cases of COVID-19") +
  geom_text(aes(y=max(total_cases)+170000, 
                label=format(total_cases, big.mark=","),
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 of American countries with most cases of COVID-19", 
       subtitle = todayformatted,
       caption = plotscaption)+
  scale_y_continuous(breaks=c(100000, 250000, 500000, 750000, 1000000, 2000000, 3000000, 4000000, 5000000),
                     label=c("100k", "250k", "500k", "750k", "1m", "2m", "3m", "4m", "5m"))+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total cases of COVID-19 per million inhabitants with column total_cases_per_million

ranking.total_cases_per_million <- covid.total.df[order(-covid.total.df$total_cases_per_million),]
ranking.total_cases_per_million$position <- 1:nrow(ranking.total_cases_per_million)
columnfilter <- c("position", "location", "total_cases_per_million")
mexico.total_cases_per_million <- ranking.total_cases_per_million[
                                  ranking.total_cases_per_million$location == "Mexico", ]
bottom20.total_cases_per_million <- tail(ranking.total_cases_per_million[, columnfilter],20)
top20.total_cases_per_million <- head(ranking.total_cases_per_million[, columnfilter],20)
mexico.total_cases_per_million <- mexico.total_cases_per_million[, columnfilter]
rm(ranking.total_cases_per_million)
rownames(top20.total_cases_per_million) <- c()
rownames(bottom20.total_cases_per_million) <- c()
rownames(mexico.total_cases_per_million) <- c()

top20.total_cases_per_million <- rbind(top20.total_cases_per_million, mexico.total_cases_per_million)
mexrow <- which(top20.total_cases_per_million$location=='Mexico')

top20.total_cases_per_million$total_cases_per_million_formated <- formatC(
  top20.total_cases_per_million$total_cases_per_million, format="f", big.mark=",", digits=2)
bottom20.total_cases_per_million$total_cases_per_million_formated<- formatC(
  bottom20.total_cases_per_million$total_cases_per_million, format="f", big.mark=",", digits=2)

Tables

tablecolnames <- c("Position", "country", "Cases pmi")
kable(top20.total_cases_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with most cases per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_cases_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with least cases per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most cases per million inhabitants (pmi)
Position country Cases pmi
1 Qatar 40,583.67
2 Bahrain 28,179.64
3 Chile 20,498.26
4 Panama 19,434.42
5 Kuwait 18,444.17
6 Peru 17,198.28
7 United States 16,839.28
8 Brazil 16,475.28
9 Oman 16,403.99
10 Armenia 14,281.36
11 Israel 11,506.96
12 South Africa 10,115.55
13 Colombia 10,096.11
14 Puerto Rico 9,837.32
15 Singapore 9,589.01
16 Bolivia 9,086.33
17 Saudi Arabia 8,731.38
18 Sweden 8,496.65
19 Dominican Republic 8,205.27
20 Moldova 7,917.03
42 Mexico 4,217.75
NA NA NA
Countries with least cases per million inhabitants (pmi)
Position country Cases pmi
138 Mongolia 90.90
139 Eritrea 85.72
140 Yemen 63.67
141 China 62.23
142 Burkina Faso 62.05
143 Angola 61.31
144 Chad 59.17
145 Thailand 48.57
146 Niger 48.29
147 Papua New Guinea 40.35
148 Uganda 38.26
149 Burundi 35.49
150 Taiwan 20.41
151 Timor 19.72
152 Cambodia 16.33
153 Vietnam 10.35
154 Tanzania 8.52
155 Myanmar 7.52
156 Laos 2.75
157 NA NA

Top 20 graph

ggplot(data=top20.total_cases_per_million, 
       aes(x=reorder(paste(position, location),total_cases_per_million), 
           y=total_cases_per_million, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total cases of COVID-19 per million inhabitants") +
  geom_text(aes(y=max(total_cases_per_million)+1250, 
                label=total_cases_per_million_formated,
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 of countries with most cases of COVID-19 per million inhabitants (+ México)",
       subtitle = todayformatted,
       caption = plotscaption)+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))
## Warning: Removed 1 rows containing missing values (geom_bar).
## Warning: Removed 22 rows containing missing values (geom_text).

Ranking of countries by total deaths from COVID-19 with the variable total_deaths

ranking.total_deaths <- covid.total.df[order(-covid.total.df$total_deaths),]
ranking.total_deaths <- ranking.total_deaths[ranking.total_deaths$location != "World", ]
ranking.total_deaths$position <- 1:nrow(ranking.total_deaths)
columnfilter <- c("position", "location", "total_deaths")
bottom20.total_deaths <- tail(ranking.total_deaths[, columnfilter],20)
top20.total_deaths <- head(ranking.total_deaths[, columnfilter],20)
mexrow <- which(ranking.total_deaths$location=='Mexico')
rm(ranking.total_deaths)
rownames(top20.total_deaths) <- c()
rownames(bottom20.total_deaths) <- c()

top20.total_deaths$total_deaths_formated <- formatC(top20.total_deaths$total_deaths, 
                                                  format="f", big.mark=",", digits=0)
bottom20.total_deaths$total_deaths_formated <- formatC(bottom20.total_deaths$total_deaths, 
                                                     format="f", big.mark=",", digits=0)

Tables

tablecolnames <- c("Position", "Country", "Total deaths")
kable(top20.total_deaths[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with most deaths from COVID-19") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_deaths[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with least deaths from COVID-19") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most deaths from COVID-19
Position Country Total deaths
1 United States 174,255
2 Brazil 112,304
3 Mexico 59,106
4 India 54,849
5 United Kingdom 41,403
6 Italy 35,418
7 France 30,480
8 Peru 27,034
9 Iran 20,264
10 Colombia 16,183
11 Russia 16,099
12 South Africa 12,618
13 Chile 10,671
14 Belgium 9,976
15 Germany 9,253
16 Canada 9,054
17 Indonesia 6,418
18 Argentina 6,406
19 Pakistan 6,219
20 Iraq 6,208
Countries with least deaths from COVID-19
Position Country Total deaths
137 Mozambique 20
138 Uganda 19
139 Georgia 17
140 Jamaica 15
141 Trinidad and Tobago 12
142 Jordan 11
143 Sri Lanka 11
144 Rwanda 11
145 Mauritius 10
146 Taiwan 7
147 Myanmar 6
148 Papua New Guinea 4
149 Botswana 3
150 Burundi 1
151 Eritrea 0
152 Cambodia 0
153 Laos 0
154 Mongolia 0
155 Timor 0
156 NA NA

Top 20 graph

ggplot(data=top20.total_deaths, 
       aes(x=reorder(paste(position, location),total_deaths), 
                                   y=total_deaths, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total deaths from COVID-19") +
  geom_text(aes(y=max(total_deaths)+7500, 
                label=total_deaths_formated,
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 of countries with total deaths from COVID-19",
       subtitle = todayformatted,
       caption = plotscaption)+
  scale_y_continuous(breaks=c(10000, 25000, 50000, 75000, 100000, 150000, 170000), 
                     label=c("10k", "25k", "50k", "75k", "100k", "150k", "170k"))+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total deaths from COVID-19 per million inhabitants using the variable total_deaths_per_million

ranking.total_deaths_per_million <- covid.total.df[order(-covid.total.df$total_deaths_per_million),]
ranking.total_deaths_per_million$position <- 1:nrow(ranking.total_deaths_per_million)
columnfilter <- c("position", "location", "total_deaths_per_million")
bottom20.total_deaths_per_million <- tail(ranking.total_deaths_per_million[, columnfilter],20)
top20.total_deaths_per_million <- head(ranking.total_deaths_per_million[, columnfilter],20)
mexrow <- which(ranking.total_deaths_per_million$location=='Mexico')
rm(ranking.total_deaths_per_million)
rownames(top20.total_deaths_per_million) <- c()
rownames(bottom20.total_deaths_per_million) <- c()

top20.total_deaths_per_million$total_deaths_per_million_formated <- formatC(
  top20.total_deaths_per_million$total_deaths_per_million, format="f", big.mark=",", digits=2)
bottom20.total_deaths_per_million$total_deaths_per_million_formated <- formatC(
  bottom20.total_deaths_per_million$total_deaths_per_million, format="f", big.mark=",", digits=2)

Table

tablecolnames <- c("Position", "Country", "Deaths pmi")

kable(top20.total_deaths_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, 
      caption="Countries with most deaths from COVID-19 per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_deaths_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, 
      caption="Countries with least deaths from COVID-19 per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most deaths from COVID-19 per million inhabitants (pmi)
Position Country Deaths pmi
1 Belgium 860.77
2 Peru 819.91
3 United Kingdom 609.89
4 Italy 585.79
5 Sweden 574.79
6 Chile 558.22
7 Brazil 528.34
8 United States 526.45
9 France 466.96
10 Mexico 458.43
11 Panama 427.37
12 Bolivia 368.80
13 Netherlands 360.79
14 Ireland 359.68
15 Ecuador 351.41
16 Colombia 318.04
17 Armenia 282.12
18 Macedonia 263.99
19 Iran 241.26
20 Canada 239.89
Countries with least deaths from COVID-19 per million inhabitants (pmi)
Position Country Deaths pmi
138 Burkina Faso 2.63
139 Botswana 1.28
140 Jordan 1.08
141 Rwanda 0.85
142 Thailand 0.83
143 Mozambique 0.64
144 Sri Lanka 0.51
145 Papua New Guinea 0.45
146 Uganda 0.41
147 Tanzania 0.35
148 Taiwan 0.29
149 Vietnam 0.26
150 Myanmar 0.11
151 Burundi 0.08
152 Eritrea 0.00
153 Cambodia 0.00
154 Laos 0.00
155 Mongolia 0.00
156 Timor 0.00
157 NA NA

Top 20 graph

ggplot(data=top20.total_deaths_per_million, 
       aes(x=reorder(paste(position, location),total_deaths_per_million), 
           y=total_deaths_per_million, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Deaths from COVID-19 per million inhabitants") +
  geom_text(aes(y=max(total_deaths_per_million)+25, 
                label=total_deaths_per_million_formated,
                fontface="bold"),
            color="black")+
  labs(title="Top 20 countries by deaths from COVID-19 per million inhabitants",
       subtitle = todayformatted,
       caption = plotscaption)+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Cumulative COVID-19 Cases and Deaths

For the following graphs, the variables total_cases and total_deaths are used.

Curves of total cases of the five countries with most cases (+ México)

Line graphs with date ranges. The starting date varies, whereas the end date is always Friday, August 21, 2020.

Functions to build the line graphs.

##Line plot of new_total_cases with date breaks by month
plot.trend.total_cases.month <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_cases, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Countries")+
    scale_x_date(date_breaks = "month", date_labels = "%B")+
    ggtitle(graphtitle,
            subtitle = graphsubtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative cases of COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}
##Line plot of new_deaths with date breaks by short month and day
plot.trend.total_cases.monthdays <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_cases, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Países")+
    scale_x_date(date_breaks = "day", date_labels = "%b %d")+
    ggtitle(graphtitle,
            subtitle = graphsubtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative cases of COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}

All

sublabel.tmp <- paste("Date range:", format(startdate, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
title.tmp <- "Cumulative cases of COVID-19"
plot.trend.total_cases.month(startdate, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-5 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-5
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-3 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-3
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-2 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-2
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-1 Month

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-1 Month TOP[3:7]

The amount of cases of United States and Brazil do not allow to appreciate in detail the curves of the rest of the countries. Those two countries are removed from the graph, the countries in positions 6 and 7 of the top 20 of countries with most cases are added.

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
total_cases.others <- as.character(top20.total_cases$location)[3:7]
plot.trend.total_cases.month(startdate.tmp, enddate, total_cases.others, title.tmp, sublabel.tmp)

-3 Weeks TOP[3:7]

sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_cases.monthdays(enddate-21, enddate, total_cases.others, title.tmp, sublabel.tmp)

-2 Weeks TOP[3:7]

sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_cases.monthdays(enddate-14, enddate, total_cases.others, title.tmp, sublabel.tmp)

-2 Weeks MX, PE y SA

total_cases.others <- as.character(top20.total_cases$location)[3:7]
total_cases.others <- total_cases.others[c(-1,-2)]
countrieslabels <- paste(total_cases.others, collapse = ', ')
sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
title.tmp <- paste(title.tmp, "of", countrieslabels)
plot.trend.total_cases.monthdays(enddate-14, enddate, total_cases.others, title.tmp, sublabel.tmp)

Curves of daily deaths of the five countries with most deaths from COVID-19 (+ México)

Line graphs with date ranges. The starting date varies, whereas the end date is always Friday, August 21, 2020.

Functions to build the line graphs.

# Generate total_deaths line plot with month breaks
plot.trend.total_deaths.month <- function(startdate, enddate, countries, graphtitle, subgraphtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_deaths, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Countries")+
    scale_x_date(date_breaks = "month", date_labels = "%B")+
    ggtitle(graphtitle,
            subtitle = subgraphtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative deaths from COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}
# Generate total_deaths line plot with month and day breaks
plot.trend.total_deaths.monthdays <- function(startdate, enddate, countries, graphtitle, subgraphtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_deaths, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Countries")+
    scale_x_date(date_breaks = "day", date_labels = "%b %d")+
    ggtitle(graphtitle,
            subtitle = subgraphtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative deaths from COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}

All

top5.total_deaths <-  head(as.character(top20.total_deaths$location),5)
sublabel.tmp <- paste("Date range:", format(startdate, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
title.tmp <- "Cumulative deaths from COVID-19"
plot.trend.total_deaths.month(startdate, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-5 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-5
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-3 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-3
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-2 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-2
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-1 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-1 Month TOP[3:7]

The amount of deaths of United States and Brazil do not allow to appreciate in detail the curves of the rest of the countries. Those two countries are removed from the graph, the countries in positions 6 and 7 of the top 20 of countries with the most total deaths are added.

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
total_deaths.others <- as.character(top20.total_deaths$location)[3:7]
plot.trend.total_deaths.month(startdate.tmp, enddate, total_deaths.others, title.tmp, sublabel.tmp)

-3 Weeks TOP[3:7]

sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-21, enddate, total_deaths.others, title.tmp, sublabel.tmp)

-3 Weeks IN, IR, MX, PE, RU

The acumulative curves of France, Italy, and the United Kindgdom are almost flat, the number of deaths from COVID-19 has stagnated. One possible reason is that the pandemic started first in Europe and the number of recently reported deaths is zero or very low. The curves for Mexico and India continues to rise. The countries of France, Italy, and the United Kingdom are discarded from the data. Other countries are added where it seems that the number of deaths from COVID-19 is still increasing. The graph is generated with the accumulated death curves of the last 3 weeks of India, Iran, Mexico, Peru and Russia:

total_deaths.others <- as.character(top20.total_deaths$location)[c(3,5,8,9,10)]
countrieslabels <- paste(total_deaths.others, collapse = ', ')
title.tmp <- paste(title.tmp, "de", countrieslabels)
sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-21, enddate, total_deaths.others, title.tmp, sublabel.tmp)

-2 Weeks IN, IR, MX, PE, RU

total_deaths.others <- as.character(top20.total_deaths$location)[c(3,5,8,9,10)]
countrieslabels <- paste(total_deaths.others, collapse = ', ')
title.tmp <- paste(title.tmp, "de", countrieslabels)
sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-14, enddate, total_deaths.others, title.tmp, sublabel.tmp)

Comparison of COVID-19 cases and deaths curves in Mexico

Daily cases and deaths

covid.mexico <- covid.df[covid.df$location=='Mexico',]
covid.mexico <- covid.mexico[covid.mexico$date > "2020-04-01",]
covid.mexico.new <- covid.mexico[,c("date", "new_cases", "new_deaths")]
covid.mexico.new.lf <-melt(covid.mexico.new, id.vars = c("date"))
ggplot(data=covid.mexico.new.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    ggtitle("COVID-19 Daily cases and deaths in México")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

Daily cases and deaths last month

covid.mexico.new.lastmonth <- covid.mexico.new[covid.mexico.new$date >= enddate-30, ]
covid.mexico.new.lastmonth.lf <- melt(covid.mexico.new.lastmonth, id.vars = c("date"))
ggplot(data=covid.mexico.new.lastmonth.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    ggtitle("COVID-19 Daily cases and deaths in México - Last month")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

Total cases and deaths

covid.mexico.total <- covid.mexico[,c("date", "total_cases", "total_deaths")]
covid.mexico.total.lf <-melt(covid.mexico.total, id.vars = c("date"))
ggplot(data=covid.mexico.total.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    scale_y_continuous(breaks=c(50000, 100000, 200000, 300000, 400000, 500000, 750000),
                     label=c("50k","100k", "200k", "300k", "400k", "500k", "750k"))+
    ggtitle("COVID-19 Cumulative cases and deaths in México")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

Total cases and deaths last month

covid.mexico.total.lasttmonth <- covid.mexico.total[covid.mexico.total$date >= enddate-30, ]
covid.mexico.total.lasttmonth.lf <- melt(covid.mexico.total.lasttmonth, id.vars = c("date"))
ggplot(data=covid.mexico.total.lasttmonth.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    scale_y_continuous(breaks=c(50000, 100000, 200000, 300000, 400000, 500000, 750000),
                     label=c("50k","100k", "200k", "300k", "400k", "500k", "750k"))+
    ggtitle("COVID-19 Cumulative cases and deaths in México - Last month")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Fecha")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

COVID-19 cases and deaths in México per week

covid.mexico <- covid.df[covid.df$location=='Mexico',]
week.agg <- covid.mexico %>%
  group_by(week = week(date)) %>%
  summarise(mean_new_cases = mean(new_cases),
            n = n(), 
            loCI_new_cases = ci(new_cases)[2], 
            hiCI_new_cases = ci(new_cases)[3], 
            mean_new_deaths = mean(new_deaths), 
            loCI_new_deaths = ci(new_deaths)[2], 
            hiCI_new_deaths = ci(new_deaths)[3], 
            total_new_cases = sum(new_cases), 
            total_new_deaths = sum(new_deaths))
# Weeks with zero mean new cases are deleted
week.agg <- week.agg[which(week.agg$mean_new_cases > 0),]
# Finding the date of the first week to be ploted
firstweek <- min(week.agg$week)
rowindex <- match(firstweek, week(covid.mexico[,"date"]))
firstweek.date <- format(covid.mexico[rowindex, ]$date, "%d/%m/%Y")
rm(firstweek, rowindex)
# Finding the date of the last week to be ploted
lastweek <- max(week.agg$week)
rowindex <- match(lastweek, week(covid.mexico[,"date"]))
lastweek.date <- format(covid.mexico[rowindex, ]$date, "%d/%m/%Y")
rm(lastweek, rowindex)
sublabel <- paste("Date range:", firstweek.date, "-", lastweek.date)

Mean cases per week

ggplot(data=week.agg, aes(x=as.character(week),y=mean_new_cases))+
  geom_bar(stat = 'identity', aes(fill = mean_new_cases)) +
  geom_errorbar(aes(ymin=loCI_new_cases, ymax=hiCI_new_cases), width=.2,
                position=position_dodge(.9)) +
  xlab("Week")+
  ylab("Mean cases per week ± CI 95%")+
  labs(title="Mean cases per week of COVID-19 in México",
       subtitle = sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Cases", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Mean deaths per week

ggplot(data=week.agg, aes(x=as.character(week),y=mean_new_deaths))+
  geom_bar(stat = 'identity', aes(fill = mean_new_deaths)) +
  geom_errorbar(aes(ymin=loCI_new_deaths, ymax=hiCI_new_deaths), width=.2,
                position=position_dodge(.9)) +
  xlab("Week")+
  ylab("Mean deaths per week ± CI 95%")+
  labs(title="Mean deaths per week from COVID-19 in México",
       subtitle = sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Deaths", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Total cases per week

ggplot(data=week.agg, aes(x=as.character(week),y=total_new_cases))+
  geom_bar(stat = 'identity', aes(fill = total_new_cases)) +
  xlab("Week")+
  ylab("Total cases per week")+
  labs(title="Total COVID-19 cases per week in México",
       subtitle = sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Total cases", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Total deaths per week

ggplot(data=week.agg, aes(x=as.character(week),y=total_new_deaths))+
  geom_bar(stat = 'identity', aes(fill = total_new_deaths)) +
  xlab("Week")+
  ylab("Total deaths per week")+
  labs(title="Total COVID-19 deaths per week in México",
       subtitle=sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Total deaths", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))
## [1] "es_MX.UTF-8"