Description

This is an exploratory data analysis for educational purposes. The analysis is carried out with data related to COVID-19 reported by various countries. The data is obtained from the Website: https://ourworldindata.org/coronavirus-source-data. The site’s data set is constantly updated. The results of the analysis changes according to the day the code is executed.

Data reading

The data set is loaded directly from the Website to obtain information updated according to the execution day of the code.

covid.df <- read.csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")

Required packages are loaded.

library(ggplot2)
library(knitr)
library(reshape2)
library(kableExtra)
#library(dplyr)
library(tidyverse)
library(lubridate)
library(gmodels)
plotscaption <- "oscarcastrolopez.github.io"

Display of the dataframe structure with its variables and their type.

str(covid.df)
## 'data.frame':    39482 obs. of  40 variables:
##  $ iso_code                       : Factor w/ 212 levels "","ABW","AFG",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ continent                      : Factor w/ 7 levels "","Africa","Asia",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ location                       : Factor w/ 212 levels "Afghanistan",..: 10 10 10 10 10 10 10 10 10 10 ...
##  $ date                           : Factor w/ 239 levels "2019-12-31","2020-01-01",..: 74 80 81 82 83 84 85 86 87 88 ...
##  $ total_cases                    : num  2 NA 4 NA NA NA 12 17 19 28 ...
##  $ new_cases                      : num  2 NA 2 NA NA NA 8 5 2 9 ...
##  $ new_cases_smoothed             : num  NA 0.286 0.286 0.286 0.286 ...
##  $ total_deaths                   : num  0 NA 0 NA NA NA 0 0 0 0 ...
##  $ new_deaths                     : num  0 NA 0 NA NA NA 0 0 0 0 ...
##  $ new_deaths_smoothed            : num  NA 0 0 0 0 0 0 0 0 0 ...
##  $ total_cases_per_million        : num  18.7 NA 37.5 NA NA ...
##  $ new_cases_per_million          : num  18.7 NA 18.7 NA NA ...
##  $ new_cases_smoothed_per_million : num  NA 2.68 2.68 2.68 2.68 ...
##  $ total_deaths_per_million       : num  0 NA 0 NA NA NA 0 0 0 0 ...
##  $ new_deaths_per_million         : num  0 NA 0 NA NA NA 0 0 0 0 ...
##  $ new_deaths_smoothed_per_million: num  NA 0 0 0 0 0 0 0 0 0 ...
##  $ new_tests                      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ total_tests                    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ total_tests_per_thousand       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_tests_per_thousand         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_tests_smoothed             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_tests_smoothed_per_thousand: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ tests_per_case                 : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ positive_rate                  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ tests_units                    : Factor w/ 7 levels "","people tested",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ stringency_index               : num  0 33.3 33.3 44.4 44.4 ...
##  $ population                     : num  106766 106766 106766 106766 106766 ...
##  $ population_density             : num  585 585 585 585 585 ...
##  $ median_age                     : num  41.2 41.2 41.2 41.2 41.2 41.2 41.2 41.2 41.2 41.2 ...
##  $ aged_65_older                  : num  13.1 13.1 13.1 13.1 13.1 ...
##  $ aged_70_older                  : num  7.45 7.45 7.45 7.45 7.45 ...
##  $ gdp_per_capita                 : num  35974 35974 35974 35974 35974 ...
##  $ extreme_poverty                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ cardiovasc_death_rate          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ diabetes_prevalence            : num  11.6 11.6 11.6 11.6 11.6 ...
##  $ female_smokers                 : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ male_smokers                   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ handwashing_facilities         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ hospital_beds_per_thousand     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ life_expectancy                : num  76.3 76.3 76.3 76.3 76.3 ...

A detailed description of each variable of the data set can be found on the following Website: https://github.com/owid/covid-19-data/blob/master/public/data/owid-covid-codebook.csv.

The variable date is class factor is converted to class date.

covid.df$date <- as.Date(covid.df$date, format("%Y-%m-%d"))

List of countries of the data set.

levels(covid.df$location)
##   [1] "Afghanistan"                      "Albania"                         
##   [3] "Algeria"                          "Andorra"                         
##   [5] "Angola"                           "Anguilla"                        
##   [7] "Antigua and Barbuda"              "Argentina"                       
##   [9] "Armenia"                          "Aruba"                           
##  [11] "Australia"                        "Austria"                         
##  [13] "Azerbaijan"                       "Bahamas"                         
##  [15] "Bahrain"                          "Bangladesh"                      
##  [17] "Barbados"                         "Belarus"                         
##  [19] "Belgium"                          "Belize"                          
##  [21] "Benin"                            "Bermuda"                         
##  [23] "Bhutan"                           "Bolivia"                         
##  [25] "Bonaire Sint Eustatius and Saba"  "Bosnia and Herzegovina"          
##  [27] "Botswana"                         "Brazil"                          
##  [29] "British Virgin Islands"           "Brunei"                          
##  [31] "Bulgaria"                         "Burkina Faso"                    
##  [33] "Burundi"                          "Cambodia"                        
##  [35] "Cameroon"                         "Canada"                          
##  [37] "Cape Verde"                       "Cayman Islands"                  
##  [39] "Central African Republic"         "Chad"                            
##  [41] "Chile"                            "China"                           
##  [43] "Colombia"                         "Comoros"                         
##  [45] "Congo"                            "Costa Rica"                      
##  [47] "Cote d'Ivoire"                    "Croatia"                         
##  [49] "Cuba"                             "Curacao"                         
##  [51] "Cyprus"                           "Czech Republic"                  
##  [53] "Democratic Republic of Congo"     "Denmark"                         
##  [55] "Djibouti"                         "Dominica"                        
##  [57] "Dominican Republic"               "Ecuador"                         
##  [59] "Egypt"                            "El Salvador"                     
##  [61] "Equatorial Guinea"                "Eritrea"                         
##  [63] "Estonia"                          "Ethiopia"                        
##  [65] "Faeroe Islands"                   "Falkland Islands"                
##  [67] "Fiji"                             "Finland"                         
##  [69] "France"                           "French Polynesia"                
##  [71] "Gabon"                            "Gambia"                          
##  [73] "Georgia"                          "Germany"                         
##  [75] "Ghana"                            "Gibraltar"                       
##  [77] "Greece"                           "Greenland"                       
##  [79] "Grenada"                          "Guam"                            
##  [81] "Guatemala"                        "Guernsey"                        
##  [83] "Guinea"                           "Guinea-Bissau"                   
##  [85] "Guyana"                           "Haiti"                           
##  [87] "Honduras"                         "Hong Kong"                       
##  [89] "Hungary"                          "Iceland"                         
##  [91] "India"                            "Indonesia"                       
##  [93] "International"                    "Iran"                            
##  [95] "Iraq"                             "Ireland"                         
##  [97] "Isle of Man"                      "Israel"                          
##  [99] "Italy"                            "Jamaica"                         
## [101] "Japan"                            "Jersey"                          
## [103] "Jordan"                           "Kazakhstan"                      
## [105] "Kenya"                            "Kosovo"                          
## [107] "Kuwait"                           "Kyrgyzstan"                      
## [109] "Laos"                             "Latvia"                          
## [111] "Lebanon"                          "Lesotho"                         
## [113] "Liberia"                          "Libya"                           
## [115] "Liechtenstein"                    "Lithuania"                       
## [117] "Luxembourg"                       "Macedonia"                       
## [119] "Madagascar"                       "Malawi"                          
## [121] "Malaysia"                         "Maldives"                        
## [123] "Mali"                             "Malta"                           
## [125] "Mauritania"                       "Mauritius"                       
## [127] "Mexico"                           "Moldova"                         
## [129] "Monaco"                           "Mongolia"                        
## [131] "Montenegro"                       "Montserrat"                      
## [133] "Morocco"                          "Mozambique"                      
## [135] "Myanmar"                          "Namibia"                         
## [137] "Nepal"                            "Netherlands"                     
## [139] "New Caledonia"                    "New Zealand"                     
## [141] "Nicaragua"                        "Niger"                           
## [143] "Nigeria"                          "Northern Mariana Islands"        
## [145] "Norway"                           "Oman"                            
## [147] "Pakistan"                         "Palestine"                       
## [149] "Panama"                           "Papua New Guinea"                
## [151] "Paraguay"                         "Peru"                            
## [153] "Philippines"                      "Poland"                          
## [155] "Portugal"                         "Puerto Rico"                     
## [157] "Qatar"                            "Romania"                         
## [159] "Russia"                           "Rwanda"                          
## [161] "Saint Kitts and Nevis"            "Saint Lucia"                     
## [163] "Saint Vincent and the Grenadines" "San Marino"                      
## [165] "Sao Tome and Principe"            "Saudi Arabia"                    
## [167] "Senegal"                          "Serbia"                          
## [169] "Seychelles"                       "Sierra Leone"                    
## [171] "Singapore"                        "Sint Maarten (Dutch part)"       
## [173] "Slovakia"                         "Slovenia"                        
## [175] "Somalia"                          "South Africa"                    
## [177] "South Korea"                      "South Sudan"                     
## [179] "Spain"                            "Sri Lanka"                       
## [181] "Sudan"                            "Suriname"                        
## [183] "Swaziland"                        "Sweden"                          
## [185] "Switzerland"                      "Syria"                           
## [187] "Taiwan"                           "Tajikistan"                      
## [189] "Tanzania"                         "Thailand"                        
## [191] "Timor"                            "Togo"                            
## [193] "Trinidad and Tobago"              "Tunisia"                         
## [195] "Turkey"                           "Turks and Caicos Islands"        
## [197] "Uganda"                           "Ukraine"                         
## [199] "United Arab Emirates"             "United Kingdom"                  
## [201] "United States"                    "United States Virgin Islands"    
## [203] "Uruguay"                          "Uzbekistan"                      
## [205] "Vatican"                          "Venezuela"                       
## [207] "Vietnam"                          "Western Sahara"                  
## [209] "World"                            "Yemen"                           
## [211] "Zambia"                           "Zimbabwe"

List of continents of the data set.

levels(covid.df$continent)
## [1] ""              "Africa"        "Asia"          "Europe"       
## [5] "North America" "Oceania"       "South America"

Máximum number of days that the data set comprises.

maxdays <- max(table(covid.df$location))
print(paste("Maximum reported days:",maxdays))
## [1] "Maximum reported days: 239"

Countries reporting data of the maximum days of the data set.

names(table(covid.df$location)[table(covid.df$location)==maxdays])
##  [1] "Afghanistan"          "Algeria"              "Armenia"             
##  [4] "Australia"            "Austria"              "Azerbaijan"          
##  [7] "Bahrain"              "Belarus"              "Belgium"             
## [10] "Brazil"               "Cambodia"             "Canada"              
## [13] "China"                "Croatia"              "Czech Republic"      
## [16] "Denmark"              "Dominican Republic"   "Ecuador"             
## [19] "Egypt"                "Estonia"              "Finland"             
## [22] "France"               "Georgia"              "Germany"             
## [25] "Greece"               "Iceland"              "India"               
## [28] "Indonesia"            "International"        "Iran"                
## [31] "Iraq"                 "Ireland"              "Israel"              
## [34] "Italy"                "Japan"                "Kuwait"              
## [37] "Lebanon"              "Lithuania"            "Luxembourg"          
## [40] "Macedonia"            "Malaysia"             "Mexico"              
## [43] "Monaco"               "Nepal"                "Netherlands"         
## [46] "New Zealand"          "Nigeria"              "Norway"              
## [49] "Oman"                 "Pakistan"             "Philippines"         
## [52] "Qatar"                "Romania"              "Russia"              
## [55] "San Marino"           "Singapore"            "South Korea"         
## [58] "Sri Lanka"            "Sweden"               "Switzerland"         
## [61] "Taiwan"               "Thailand"             "United Arab Emirates"
## [64] "United Kingdom"       "United States"        "Vietnam"             
## [67] "World"

Start and end dates of the data set.

startdate <- min(covid.df$date)
enddate <- max(covid.df$date)
todayformatted <- format(Sys.Date(), "%A, %B %d, %Y")
todayformatted <- paste(toupper(substr(todayformatted, 1, 1)), 
                        substr(todayformatted, 2, nchar(todayformatted)), 
                        sep="")
print(paste("Starts:", format(startdate, "%A, %B, %d of %Y"), 
            "Ends:",  format(enddate, "%A, %B, %d of %Y")))
## [1] "Starts: Tuesday, December, 31 of 2019 Ends: Tuesday, August, 25 of 2020"

Rankings of countries with the variables of totals

From the original dataframe, only the rows where date is equal to the maximum date (or compilation date) are obtained. Using this filter only the updated totals or accumulated values of each country are obtained. Additionally, countries with less than a million inhabitants are discarded. Some countries with small population have very high statistics per million of inhabitants.

The rankings are created with the following variables:

# Get one row of each country with the updated totals
covid.total.df <- covid.df[covid.df$date== enddate,]
# Filter countries with less than 1 millón population
covid.total.df <- covid.total.df[covid.total.df$population >= 1000000,]
covid.total.df$location <- droplevels.factor(covid.total.df$location)
print(names(covid.total.df)[c(5,7,9,11)])
## [1] "total_cases"             "new_cases_smoothed"     
## [3] "new_deaths"              "total_cases_per_million"

After discarding the countries that have less than a million inhabitants, the data set now has: 156 countries.

For each of the variables with total values, the 20 countries with the highest and lowest values are obtained. If México is not included in the top or bottom 20 it is added to the list indicating its ranking position. For each variable to be analyzed, the following is done in R:

  1. The data is ordered by the corresponding variable to be analyzed and stored in a new dataframe.
  2. In the new dataframe the information of the 20 countries with the highest values and the 20 countries with the lowest values are kept, the rest is discarded (row filtering).
  3. In the new dataframe only variables of interest are kept, the rest is discarded (column filtering). An additional column is created with the numeric ranking of each country.
  4. The data of the rankings is displayed as tables and bar graphs.

Ranking of countries by total cases of COVID-19 with column total_cases

# Data is ordered according to the total_cases column
ranking.total_cases <- covid.total.df[order(-covid.total.df$total_cases),]
# The row corresponding to World is removed
ranking.total_cases <- ranking.total_cases[ranking.total_cases$location != "World", ]
# A new column indicating the positionin the rank is added
ranking.total_cases$position <- 1:nrow(ranking.total_cases)
# Only columns of interest are kept
columnfilter <- c("position", "location", "total_cases")
bottom20.total_cases <- tail(ranking.total_cases[, columnfilter],20)
top20.total_cases <- head(ranking.total_cases[, columnfilter],20)
rm(ranking.total_cases)
rownames(top20.total_cases) <- c()
rownames(bottom20.total_cases) <- c()

mexrow <- which(top20.total_cases$location=='Mexico')
top20.total_cases$total_cases_formated <- formatC(top20.total_cases$total_cases, 
                                                  format="f", big.mark=",", digits=0)
bottom20.total_cases$total_cases_formated <- formatC(bottom20.total_cases$total_cases, 
                                                     format="f", big.mark=",", digits=0)

Tables

tablecolnames <- c("Position", "Country", "Total cases")
kable(top20.total_cases[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with most cases") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_cases[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with least cases") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most cases
Position Country Total cases
1 United States 5,740,909
2 Brazil 3,622,861
3 India 3,167,323
4 Russia 961,493
5 South Africa 611,450
6 Peru 600,438
7 Mexico 563,705
8 Colombia 551,696
9 Chile 399,568
10 Iran 358,905
11 Argentina 350,854
12 United Kingdom 326,614
13 Saudi Arabia 308,654
14 Bangladesh 297,083
15 Pakistan 293,711
16 Italy 260,298
17 Turkey 259,692
18 France 244,854
19 Germany 234,853
20 Iraq 207,985
Countries with least cases
Position Country Total cases
137 Latvia 1,337
138 Togo 1,295
139 Liberia 1,290
140 Niger 1,172
141 Trinidad and Tobago 1,099
142 Vietnam 1,022
143 Lesotho 1,015
144 Chad 987
145 Tanzania 509
146 Taiwan 487
147 Myanmar 474
148 Burundi 430
149 Papua New Guinea 401
150 Mauritius 346
151 Eritrea 306
152 Mongolia 298
153 Cambodia 273
154 Timor 26
155 Laos 22
156 NA NA

Top 20 graph

ggplot(data=top20.total_cases, aes(x=reorder(paste(position, location),total_cases), 
                                   y=total_cases, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total cases of COVID-19") +
  geom_text(aes(y=max(total_cases)+170000, 
                label=total_cases_formated,
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 countries with most cases of  COVID-19", 
       subtitle = todayformatted,
       caption = plotscaption)+
  scale_y_continuous(breaks=c(100000, 500000, 750000, 1000000, 2000000, 3000000, 4000000, 5000000),
                     label=c("100k", "500k", "750k", "1m", "2m", "3m", "4m", "5m"))+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total cases of COVID-19 with column total_cases - American countries

This ranking is done with American countries only. The data is obtained by filtering the rows where the continent variable is either “North America” or “South America”.

ranking.total_cases.america <- covid.total.df[covid.total.df$continent %in% 
                                                c("North America", "South America") ,]
ranking.total_cases.america <- ranking.total_cases.america[order(-ranking.total_cases.america$total_cases),]
ranking.total_cases.america$position <- 1:nrow(ranking.total_cases.america)
bottom20.total_cases.america <- tail(ranking.total_cases.america[, c("position", "location", "total_cases")],5)
top20.total_cases.america <- head(ranking.total_cases.america[, c("position", "location", "total_cases")],20)
rownames(bottom20.total_cases.america) <- c()
rownames(top20.total_cases.america) <- c()
mexrow <- which(ranking.total_cases.america$location=='Mexico')
rm(ranking.total_cases.america)
top20.total_cases.america$total_cases_formated <- formatC(top20.total_cases.america$total_cases,
                                                          format="f", big.mark=",", digits=0)
bottom20.total_cases.america$total_cases_formated <- formatC(bottom20.total_cases.america$total_cases, 
                                                             format="f", big.mark=",", digits=0)

Table

tablecolnames <- c("Position", "Country", "Total cases")
kable(top20.total_cases.america[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries of the American Continent with most cases") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "center") %>%
row_spec(mexrow, bold = T, color = "black", background = "yellow")
Countries of the American Continent with most cases
Position Country Total cases
1 United States 5,740,909
2 Brazil 3,622,861
3 Peru 600,438
4 Mexico 563,705
5 Colombia 551,696
6 Chile 399,568
7 Argentina 350,854
8 Canada 125,647
9 Bolivia 110,148
10 Ecuador 108,289
11 Dominican Republic 91,608
12 Panama 87,485
13 Guatemala 68,533
14 Honduras 55,479
15 Venezuela 40,338
16 Costa Rica 34,463
17 Puerto Rico 30,618
18 El Salvador 24,811
19 Paraguay 13,602
20 Haiti 8,110

Top 20 graph

ggplot(data=top20.total_cases.america, aes(x=reorder(paste(position, location),total_cases),
                                           y=total_cases, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total cases of COVID-19") +
  geom_text(aes(y=max(total_cases)+170000, 
                label=format(total_cases, big.mark=","),
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 of American countries with most cases of COVID-19", 
       subtitle = todayformatted,
       caption = plotscaption)+
  scale_y_continuous(breaks=c(100000, 250000, 500000, 750000, 1000000, 2000000, 3000000, 4000000, 5000000),
                     label=c("100k", "250k", "500k", "750k", "1m", "2m", "3m", "4m", "5m"))+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total cases of COVID-19 per million inhabitants with column total_cases_per_million

ranking.total_cases_per_million <- covid.total.df[order(-covid.total.df$total_cases_per_million),]
ranking.total_cases_per_million$position <- 1:nrow(ranking.total_cases_per_million)
columnfilter <- c("position", "location", "total_cases_per_million")
mexico.total_cases_per_million <- ranking.total_cases_per_million[
                                  ranking.total_cases_per_million$location == "Mexico", ]
bottom20.total_cases_per_million <- tail(ranking.total_cases_per_million[, columnfilter],20)
top20.total_cases_per_million <- head(ranking.total_cases_per_million[, columnfilter],20)
mexico.total_cases_per_million <- mexico.total_cases_per_million[, columnfilter]
rm(ranking.total_cases_per_million)
rownames(top20.total_cases_per_million) <- c()
rownames(bottom20.total_cases_per_million) <- c()
rownames(mexico.total_cases_per_million) <- c()

top20.total_cases_per_million <- rbind(top20.total_cases_per_million, mexico.total_cases_per_million)
mexrow <- which(top20.total_cases_per_million$location=='Mexico')

top20.total_cases_per_million$total_cases_per_million_formated <- formatC(
  top20.total_cases_per_million$total_cases_per_million, format="f", big.mark=",", digits=2)
bottom20.total_cases_per_million$total_cases_per_million_formated<- formatC(
  bottom20.total_cases_per_million$total_cases_per_million, format="f", big.mark=",", digits=2)

Tables

tablecolnames <- c("Position", "country", "Cases pmi")
kable(top20.total_cases_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with most cases per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_cases_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with least cases per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most cases per million inhabitants (pmi)
Position country Cases pmi
1 Qatar 40,702.38
2 Bahrain 28,990.65
3 Chile 20,902.05
4 Panama 20,275.71
5 Kuwait 18,957.69
6 Peru 18,210.63
7 United States 17,344.00
8 Brazil 17,043.99
9 Oman 16,548.90
10 Armenia 14,452.11
11 Israel 12,138.24
12 Colombia 10,842.47
13 Puerto Rico 10,702.45
14 South Africa 10,309.62
15 Singapore 9,641.14
16 Bolivia 9,436.11
17 Saudi Arabia 8,865.83
18 Sweden 8,586.86
19 Dominican Republic 8,444.76
20 Moldova 8,385.80
41 Mexico 4,372.09
NA NA NA
Countries with least cases per million inhabitants (pmi)
Position country Cases pmi
138 Mongolia 90.90
139 Eritrea 86.28
140 Angola 67.61
141 Yemen 64.24
142 Burkina Faso 64.01
143 China 62.33
144 Chad 60.09
145 Uganda 51.64
146 Thailand 48.74
147 Niger 48.42
148 Papua New Guinea 44.82
149 Burundi 36.16
150 Taiwan 20.45
151 Timor 19.72
152 Cambodia 16.33
153 Vietnam 10.50
154 Myanmar 8.71
155 Tanzania 8.52
156 Laos 3.02
157 NA NA

Top 20 graph

ggplot(data=top20.total_cases_per_million, 
       aes(x=reorder(paste(position, location),total_cases_per_million), 
           y=total_cases_per_million, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total cases of COVID-19 per million inhabitants") +
  geom_text(aes(y=max(total_cases_per_million)+1250, 
                label=total_cases_per_million_formated,
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 of countries with most cases of COVID-19 per million inhabitants (+ México)",
       subtitle = todayformatted,
       caption = plotscaption)+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))
## Warning: Removed 1 rows containing missing values (geom_bar).
## Warning: Removed 22 rows containing missing values (geom_text).

Ranking of countries by total deaths from COVID-19 with the variable total_deaths

ranking.total_deaths <- covid.total.df[order(-covid.total.df$total_deaths),]
ranking.total_deaths <- ranking.total_deaths[ranking.total_deaths$location != "World", ]
ranking.total_deaths$position <- 1:nrow(ranking.total_deaths)
columnfilter <- c("position", "location", "total_deaths")
bottom20.total_deaths <- tail(ranking.total_deaths[, columnfilter],20)
top20.total_deaths <- head(ranking.total_deaths[, columnfilter],20)
mexrow <- which(ranking.total_deaths$location=='Mexico')
rm(ranking.total_deaths)
rownames(top20.total_deaths) <- c()
rownames(bottom20.total_deaths) <- c()

top20.total_deaths$total_deaths_formated <- formatC(top20.total_deaths$total_deaths, 
                                                  format="f", big.mark=",", digits=0)
bottom20.total_deaths$total_deaths_formated <- formatC(bottom20.total_deaths$total_deaths, 
                                                     format="f", big.mark=",", digits=0)

Tables

tablecolnames <- c("Position", "Country", "Total deaths")
kable(top20.total_deaths[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with most deaths from COVID-19") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_deaths[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with least deaths from COVID-19") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most deaths from COVID-19
Position Country Total deaths
1 United States 177,279
2 Brazil 115,309
3 Mexico 60,800
4 India 58,390
5 United Kingdom 41,433
6 Italy 35,441
7 France 30,528
8 Peru 27,813
9 Iran 20,643
10 Colombia 17,612
11 Russia 16,448
12 South Africa 13,159
13 Chile 10,916
14 Belgium 9,996
15 Germany 9,277
16 Canada 9,083
17 Argentina 7,366
18 Indonesia 6,759
19 Iraq 6,519
20 Ecuador 6,322
Countries with least deaths from COVID-19
Position Country Total deaths
137 Mozambique 21
138 Tanzania 21
139 Georgia 18
140 Jamaica 16
141 Trinidad and Tobago 15
142 Jordan 14
143 Rwanda 14
144 Sri Lanka 12
145 Mauritius 10
146 Taiwan 7
147 Myanmar 6
148 Papua New Guinea 4
149 Botswana 3
150 Burundi 1
151 Eritrea 0
152 Cambodia 0
153 Laos 0
154 Mongolia 0
155 Timor 0
156 NA NA

Top 20 graph

ggplot(data=top20.total_deaths, 
       aes(x=reorder(paste(position, location),total_deaths), 
                                   y=total_deaths, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total deaths from COVID-19") +
  geom_text(aes(y=max(total_deaths)+7500, 
                label=total_deaths_formated,
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 of countries with total deaths from COVID-19",
       subtitle = todayformatted,
       caption = plotscaption)+
  scale_y_continuous(breaks=c(10000, 25000, 50000, 75000, 100000, 150000, 170000), 
                     label=c("10k", "25k", "50k", "75k", "100k", "150k", "170k"))+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total deaths from COVID-19 per million inhabitants using the variable total_deaths_per_million

ranking.total_deaths_per_million <- covid.total.df[order(-covid.total.df$total_deaths_per_million),]
ranking.total_deaths_per_million$position <- 1:nrow(ranking.total_deaths_per_million)
columnfilter <- c("position", "location", "total_deaths_per_million")
bottom20.total_deaths_per_million <- tail(ranking.total_deaths_per_million[, columnfilter],20)
top20.total_deaths_per_million <- head(ranking.total_deaths_per_million[, columnfilter],20)
mexrow <- which(ranking.total_deaths_per_million$location=='Mexico')
rm(ranking.total_deaths_per_million)
rownames(top20.total_deaths_per_million) <- c()
rownames(bottom20.total_deaths_per_million) <- c()

top20.total_deaths_per_million$total_deaths_per_million_formated <- formatC(
  top20.total_deaths_per_million$total_deaths_per_million, format="f", big.mark=",", digits=2)
bottom20.total_deaths_per_million$total_deaths_per_million_formated <- formatC(
  bottom20.total_deaths_per_million$total_deaths_per_million, format="f", big.mark=",", digits=2)

Table

tablecolnames <- c("Position", "Country", "Deaths pmi")

kable(top20.total_deaths_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, 
      caption="Countries with most deaths from COVID-19 per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_deaths_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, 
      caption="Countries with least deaths from COVID-19 per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most deaths from COVID-19 per million inhabitants (pmi)
Position Country Deaths pmi
1 Belgium 862.50
2 Peru 843.54
3 United Kingdom 610.33
4 Italy 586.17
5 Sweden 575.59
6 Chile 571.03
7 Brazil 542.48
8 United States 535.58
9 Mexico 471.56
10 France 467.69
11 Panama 441.74
12 Bolivia 392.19
13 Netherlands 361.43
14 Ireland 359.88
15 Ecuador 358.33
16 Colombia 346.13
17 Armenia 288.20
18 Macedonia 270.71
19 Iran 245.77
20 Kosovo 241.62
Countries with least deaths from COVID-19 per million inhabitants (pmi)
Position Country Deaths pmi
138 Burkina Faso 2.63
139 Jordan 1.37
140 Botswana 1.28
141 Rwanda 1.08
142 Thailand 0.83
143 Mozambique 0.67
144 Sri Lanka 0.56
145 Uganda 0.48
146 Papua New Guinea 0.45
147 Tanzania 0.35
148 Taiwan 0.29
149 Vietnam 0.28
150 Myanmar 0.11
151 Burundi 0.08
152 Eritrea 0.00
153 Cambodia 0.00
154 Laos 0.00
155 Mongolia 0.00
156 Timor 0.00
157 NA NA

Top 20 graph

ggplot(data=top20.total_deaths_per_million, 
       aes(x=reorder(paste(position, location),total_deaths_per_million), 
           y=total_deaths_per_million, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Deaths from COVID-19 per million inhabitants") +
  geom_text(aes(y=max(total_deaths_per_million)+25, 
                label=total_deaths_per_million_formated,
                fontface="bold"),
            color="black")+
  labs(title="Top 20 countries by deaths from COVID-19 per million inhabitants",
       subtitle = todayformatted,
       caption = plotscaption)+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Cumulative COVID-19 Cases and Deaths

For the following graphs, the variables total_cases and total_deaths are used.

Curves of total cases of the five countries with most cases (+ México)

Line graphs with date ranges. The starting date varies, whereas the end date is always Tuesday, August 25, 2020.

Functions to build the line graphs.

##Line plot of new_total_cases with date breaks by month
plot.trend.total_cases.month <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_cases, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Countries")+
    scale_x_date(date_breaks = "month", date_labels = "%B")+
    ggtitle(graphtitle,
            subtitle = graphsubtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative cases of COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}
##Line plot of new_deaths with date breaks by short month and day
plot.trend.total_cases.monthdays <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_cases, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Países")+
    scale_x_date(date_breaks = "day", date_labels = "%b %d")+
    ggtitle(graphtitle,
            subtitle = graphsubtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative cases of COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}

All

sublabel.tmp <- paste("Date range:", format(startdate, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
title.tmp <- "Cumulative cases of COVID-19"
plot.trend.total_cases.month(startdate, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-5 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-5
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-3 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-3
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-2 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-2
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-1 Month

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-1 Month TOP[3:7]

The amount of cases of United States and Brazil do not allow to appreciate in detail the curves of the rest of the countries. Those two countries are removed from the graph, the countries in positions 6 and 7 of the top 20 of countries with most cases are added.

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
total_cases.others <- as.character(top20.total_cases$location)[3:7]
plot.trend.total_cases.month(startdate.tmp, enddate, total_cases.others, title.tmp, sublabel.tmp)

-3 Weeks TOP[3:7]

sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_cases.monthdays(enddate-21, enddate, total_cases.others, title.tmp, sublabel.tmp)

-2 Weeks TOP[3:7]

sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_cases.monthdays(enddate-14, enddate, total_cases.others, title.tmp, sublabel.tmp)

-2 Weeks MX, PE y SA

total_cases.others <- as.character(top20.total_cases$location)[3:7]
total_cases.others <- total_cases.others[c(-1,-2)]
countrieslabels <- paste(total_cases.others, collapse = ', ')
sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
title.tmp <- paste(title.tmp, "of", countrieslabels)
plot.trend.total_cases.monthdays(enddate-14, enddate, total_cases.others, title.tmp, sublabel.tmp)

Curves of daily deaths of the five countries with most deaths from COVID-19 (+ México)

Line graphs with date ranges. The starting date varies, whereas the end date is always Tuesday, August 25, 2020.

Functions to build the line graphs.

# Generate total_deaths line plot with month breaks
plot.trend.total_deaths.month <- function(startdate, enddate, countries, graphtitle, subgraphtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_deaths, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Countries")+
    scale_x_date(date_breaks = "month", date_labels = "%B")+
    ggtitle(graphtitle,
            subtitle = subgraphtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative deaths from COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}
# Generate total_deaths line plot with month and day breaks
plot.trend.total_deaths.monthdays <- function(startdate, enddate, countries, graphtitle, subgraphtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_deaths, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Countries")+
    scale_x_date(date_breaks = "day", date_labels = "%b %d")+
    ggtitle(graphtitle,
            subtitle = subgraphtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative deaths from COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}

All

top5.total_deaths <-  head(as.character(top20.total_deaths$location),5)
sublabel.tmp <- paste("Date range:", format(startdate, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
title.tmp <- "Cumulative deaths from COVID-19"
plot.trend.total_deaths.month(startdate, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-5 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-5
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-3 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-3
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-2 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-2
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-1 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-1 Month TOP[3:7]

The amount of deaths of United States and Brazil do not allow to appreciate in detail the curves of the rest of the countries. Those two countries are removed from the graph, the countries in positions 6 and 7 of the top 20 of countries with the most total deaths are added.

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
total_deaths.others <- as.character(top20.total_deaths$location)[3:7]
plot.trend.total_deaths.month(startdate.tmp, enddate, total_deaths.others, title.tmp, sublabel.tmp)

-3 Weeks TOP[3:7]

sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-21, enddate, total_deaths.others, title.tmp, sublabel.tmp)

-3 Weeks IN, IR, MX, PE, RU

The acumulative curves of France, Italy, and the United Kindgdom are almost flat, the number of deaths from COVID-19 has stagnated. One possible reason is that the pandemic started first in Europe and the number of recently reported deaths is zero or very low. The curves for Mexico and India continues to rise. The countries of France, Italy, and the United Kingdom are discarded from the data. Other countries are added where it seems that the number of deaths from COVID-19 is still increasing. The graph is generated with the accumulated death curves of the last 3 weeks of India, Iran, Mexico, Peru and Russia:

total_deaths.others <- as.character(top20.total_deaths$location)[c(3,5,8,9,10)]
countrieslabels <- paste(total_deaths.others, collapse = ', ')
title.tmp <- paste(title.tmp, "de", countrieslabels)
sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-21, enddate, total_deaths.others, title.tmp, sublabel.tmp)

-2 Weeks IN, IR, MX, PE, RU

total_deaths.others <- as.character(top20.total_deaths$location)[c(3,5,8,9,10)]
countrieslabels <- paste(total_deaths.others, collapse = ', ')
title.tmp <- paste(title.tmp, "de", countrieslabels)
sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-14, enddate, total_deaths.others, title.tmp, sublabel.tmp)

Comparison of COVID-19 cases and deaths curves in Mexico

Daily cases and deaths

covid.mexico <- covid.df[covid.df$location=='Mexico',]
covid.mexico <- covid.mexico[covid.mexico$date > "2020-04-01",]
covid.mexico.new <- covid.mexico[,c("date", "new_cases", "new_deaths")]
covid.mexico.new.lf <-melt(covid.mexico.new, id.vars = c("date"))
ggplot(data=covid.mexico.new.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    ggtitle("COVID-19 Daily cases and deaths in México")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

Daily cases and deaths last month

covid.mexico.new.lastmonth <- covid.mexico.new[covid.mexico.new$date >= enddate-30, ]
covid.mexico.new.lastmonth.lf <- melt(covid.mexico.new.lastmonth, id.vars = c("date"))
ggplot(data=covid.mexico.new.lastmonth.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    ggtitle("COVID-19 Daily cases and deaths in México - Last month")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

Total cases and deaths

covid.mexico.total <- covid.mexico[,c("date", "total_cases", "total_deaths")]
covid.mexico.total.lf <-melt(covid.mexico.total, id.vars = c("date"))
ggplot(data=covid.mexico.total.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    scale_y_continuous(breaks=c(50000, 100000, 200000, 300000, 400000, 500000, 750000),
                     label=c("50k","100k", "200k", "300k", "400k", "500k", "750k"))+
    ggtitle("COVID-19 Cumulative cases and deaths in México")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

Total cases and deaths last month

covid.mexico.total.lasttmonth <- covid.mexico.total[covid.mexico.total$date >= enddate-30, ]
covid.mexico.total.lasttmonth.lf <- melt(covid.mexico.total.lasttmonth, id.vars = c("date"))
ggplot(data=covid.mexico.total.lasttmonth.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    scale_y_continuous(breaks=c(50000, 100000, 200000, 300000, 400000, 500000, 750000),
                     label=c("50k","100k", "200k", "300k", "400k", "500k", "750k"))+
    ggtitle("COVID-19 Cumulative cases and deaths in México - Last month")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Fecha")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

COVID-19 cases and deaths in México per week

covid.mexico <- covid.df[covid.df$location=='Mexico',]
week.agg <- covid.mexico %>%
  group_by(week = week(date)) %>%
  summarise(mean_new_cases = mean(new_cases),
            n = n(), 
            loCI_new_cases = ci(new_cases)[2], 
            hiCI_new_cases = ci(new_cases)[3], 
            mean_new_deaths = mean(new_deaths), 
            loCI_new_deaths = ci(new_deaths)[2], 
            hiCI_new_deaths = ci(new_deaths)[3], 
            total_new_cases = sum(new_cases), 
            total_new_deaths = sum(new_deaths))
# Weeks with zero mean new cases are deleted
week.agg <- week.agg[which(week.agg$mean_new_cases > 0),]
# Finding the date of the first week to be ploted
firstweek <- min(week.agg$week)
rowindex <- match(firstweek, week(covid.mexico[,"date"]))
firstweek.date <- format(covid.mexico[rowindex, ]$date, "%d/%m/%Y")
rm(firstweek, rowindex)
# Finding the date of the last week to be ploted
lastweek <- max(week.agg$week)
rowindex <- match(lastweek, week(covid.mexico[,"date"]))
lastweek.date <- format(covid.mexico[rowindex, ]$date, "%d/%m/%Y")
rm(lastweek, rowindex)
sublabel <- paste("Date range:", firstweek.date, "-", lastweek.date)

Mean cases per week

ggplot(data=week.agg, aes(x=as.character(week),y=mean_new_cases))+
  geom_bar(stat = 'identity', aes(fill = mean_new_cases)) +
  geom_errorbar(aes(ymin=loCI_new_cases, ymax=hiCI_new_cases), width=.2,
                position=position_dodge(.9)) +
  xlab("Week")+
  ylab("Mean cases per week ± CI 95%")+
  labs(title="Mean cases per week of COVID-19 in México",
       subtitle = sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Cases", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Mean deaths per week

ggplot(data=week.agg, aes(x=as.character(week),y=mean_new_deaths))+
  geom_bar(stat = 'identity', aes(fill = mean_new_deaths)) +
  geom_errorbar(aes(ymin=loCI_new_deaths, ymax=hiCI_new_deaths), width=.2,
                position=position_dodge(.9)) +
  xlab("Week")+
  ylab("Mean deaths per week ± CI 95%")+
  labs(title="Mean deaths per week from COVID-19 in México",
       subtitle = sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Deaths", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Total cases per week

ggplot(data=week.agg, aes(x=as.character(week),y=total_new_cases))+
  geom_bar(stat = 'identity', aes(fill = total_new_cases)) +
  xlab("Week")+
  ylab("Total cases per week")+
  labs(title="Total COVID-19 cases per week in México",
       subtitle = sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Total cases", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Total deaths per week

ggplot(data=week.agg, aes(x=as.character(week),y=total_new_deaths))+
  geom_bar(stat = 'identity', aes(fill = total_new_deaths)) +
  xlab("Week")+
  ylab("Total deaths per week")+
  labs(title="Total COVID-19 deaths per week in México",
       subtitle=sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Total deaths", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))
## [1] "es_MX.UTF-8"