Introduction

Goal of this Visualization task is to create a visualization for the Drug and Medicine Exports data for different countries.

In this blog post, I’m trying to find the leading countries in Export across these 5 years.

Analysis

Cleaning up workspace and loading required libraries

rm(list = ls())
library(tidyverse) #Data Wrangling
library("httr")
library(readxl) #Data Ingestion
library(ggplot2) #Data Visualization

Obtaining Data

Reading and viewing the dataset

GET("https://query.data.world/s/utmlfljjzc2naoeielefxsf4fh5qkf", write_disk(tf <- tempfile(fileext = ".xlsx")))
drugs <- read_excel(tf)
drugs
## # A tibble: 1,100 x 3
##    Exporter                  Year `Exports (USD)`
##    <chr>                    <dbl>           <dbl>
##  1 World                     2013    326445385000
##  2 Germany                   2013     48493611000
##  3 Switzerland               2013     32337891000
##  4 Belgium                   2013     33329615000
##  5 France                    2013     27848920000
##  6 United States of America  2013     23098676000
##  7 United Kingdom            2013     20885936000
##  8 Ireland                   2013     18152573000
##  9 Italy                     2013     20898532000
## 10 Netherlands               2013     13480651000
## # … with 1,090 more rows

Summarizing and getting stats to better understand the dataset

drugs %>% 
  glimpse()
## Rows: 1,100
## Columns: 3
## $ Exporter        <chr> "World", "Germany", "Switzerland", "Belgium", "France…
## $ Year            <dbl> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013,…
## $ `Exports (USD)` <dbl> 326445385000, 48493611000, 32337891000, 33329615000, …
drugs %>% 
  summary()
##    Exporter              Year      Exports (USD)      
##  Length:1100        Min.   :2013   Min.   :0.000e+00  
##  Class :character   1st Qu.:2014   1st Qu.:1.212e+05  
##  Mode  :character   Median :2015   Median :7.393e+06  
##                     Mean   :2015   Mean   :3.315e+09  
##                     3rd Qu.:2016   3rd Qu.:3.293e+08  
##                     Max.   :2017   Max.   :3.405e+11  
##                                    NA's   :266

Scrubbing data

Removing rows with NA for the purposes of this visualization

drugs = drugs %>% 
  filter(!is.na(`Exports (USD)`))

Let’s see overall which are the Countries with highest export over these 5 years.

drugs %>% 
  group_by(Exporter) %>% 
  summarise(TotalExport = sum(`Exports (USD)`)) %>% 
  arrange(desc(TotalExport))
## # A tibble: 220 x 2
##    Exporter                   TotalExport
##    <chr>                            <dbl>
##  1 World                    1309425248000
##  2 Germany                   197463838000
##  3 Switzerland               183296295000
##  4 France                    123482590000
##  5 Belgium                   117396245000
##  6 United States of America  114868001000
##  7 United Kingdom            110713945000
##  8 Ireland                    96686923000
##  9 Italy                      76012918000
## 10 Netherlands                59659401000
## # … with 210 more rows

Exploring Data

Lets plot the countries which were among the top 5 exporters each year and each of their performance over these 5 years.

top5ExportersByYear = drugs %>% 
  filter(Exporter!="World") %>% 
  group_by(Year) %>% 
  top_n(5, `Exports (USD)`) %>% 
  ungroup()
g = ggplot(data = top5ExportersByYear, aes(x = Year, y = `Exports (USD)`))
g + geom_line(aes(color = Exporter)) + labs(title = 'Trend for top 5 exporters in the world over the years')

Evaluating Top 10 exporters per Year. Excluding 2017 since we do not have numbers for total export in the world.

getTop10ForYear = function(df){
  top10ForYear = df %>% 
    filter(Exporter!="World") %>% 
    top_n(10, `Exports (USD)`)
  
  othersExports = (df %>% filter(Exporter=="World") %>% select(`Exports (USD)`)) - (top10ForYear %>% summarise(Total = sum(`Exports (USD)`)))
  
  YEAR = df %>% select(Year) %>% unique() %>% .$Year
  top10ForYear = top10ForYear %>% 
    add_row(Exporter = "Others", Year = YEAR, `Exports (USD)` = othersExports %>% .$`Exports (USD)`)
  return(top10ForYear)
}
yearlyTop10s = drugs %>% 
  filter(Year != 2017) %>% 
  group_by(Year) %>% 
  do(getTop10ForYear(.))
yearlyTop10s
## # A tibble: 44 x 3
##    Exporter                  Year `Exports (USD)`
##    <chr>                    <dbl>           <dbl>
##  1 Germany                   2013     48493611000
##  2 Switzerland               2013     32337891000
##  3 Belgium                   2013     33329615000
##  4 France                    2013     27848920000
##  5 United States of America  2013     23098676000
##  6 United Kingdom            2013     20885936000
##  7 Ireland                   2013     18152573000
##  8 Italy                     2013     20898532000
##  9 Netherlands               2013     13480651000
## 10 India                     2013     10313989000
## # … with 34 more rows

Writing function to plot a Donut Chart for each year showing percentage export contribution for top 10 exporters of that year as compared to all others.

plotTop10 = function(df){
  YEAR = df %>% select(Year) %>% unique() %>% .$Year
  plotTitle = paste("World Medicine Export in", YEAR, sep = " ")
  
  df = df %>% 
    mutate(tot = sum(`Exports (USD)`),
           prop = round(100*`Exports (USD)`/tot,2))
  
  p = ggplot(df, aes(x=2, y=prop, fill=Exporter)) + 
    geom_bar(stat="identity") + 
    geom_text( aes(label = prop), position = position_stack(vjust = 0.5)) + 
    xlim(0.5, 2.5) +
    coord_polar(theta = "y") +
    labs(x=NULL, y=NULL) + 
    labs(fill="") + 
    ggtitle(plotTitle) + 
    theme_bw() + 
    theme(plot.title = element_text(face="bold",family=c("sans"),size=15),
          legend.text=element_text(size=10),
          axis.ticks=element_blank(),
          axis.text=element_blank(),
          axis.title=element_blank(),
          panel.grid=element_blank(),
          panel.border=element_blank())
  return(p)
}

Plotting the donuts for each year

plotTop10(yearlyTop10s %>% filter(Year==2013))

plotTop10(yearlyTop10s %>% filter(Year==2014))

plotTop10(yearlyTop10s %>% filter(Year==2015))

plotTop10(yearlyTop10s %>% filter(Year==2016))

As can be seen Germany remains the biggest exporter of Drugs and Medicines over the past 5 years.