1. Ex

Load according to the following predefined parameters from two different sources (WITS and COMTRADE) and compare the results:

Questions:
1. Load both data sets to R under the names ctPolandTotals and witsPolandTotals
2. Compare two data sets using different functions such as: glimpse(), str(), nrows(), all_equal() or more from dplyr package. 3. Briefly describe what is the difference between two data sets.

1. Ex Answers

Step 1. general setup of the working area:

library(tidyverse)
  1. Loading CT data using the read_csv() because the are no customized function developed for reading regular CT data download. 0(Loading data after we manually loaded it from the website).
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   Year = col_integer(),
##   Period = col_integer(),
##   `Period Desc.` = col_integer(),
##   `Aggregate Level` = col_integer(),
##   `Is Leaf Code` = col_integer(),
##   `Trade Flow Code` = col_integer(),
##   `Reporter Code` = col_integer(),
##   `Partner Code` = col_integer(),
##   `Qty Unit Code` = col_integer(),
##   `Trade Value (US$)` = col_double(),
##   Flag = col_integer()
## )
## See spec(...) for full column specifications.
## Observations: 453
## Variables: 35
## $ Classification           <chr> "H4", "H4", "H4", "H4", "H4", "H4", "...
## $ Year                     <int> 2014, 2014, 2014, 2014, 2014, 2014, 2...
## $ Period                   <int> 2014, 2014, 2014, 2014, 2014, 2014, 2...
## $ `Period Desc.`           <int> 2014, 2014, 2014, 2014, 2014, 2014, 2...
## $ `Aggregate Level`        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ `Is Leaf Code`           <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ `Trade Flow Code`        <int> 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 1, 2, 1...
## $ `Trade Flow`             <chr> "Import", "Export", "Import", "Export...
## $ `Reporter Code`          <int> 616, 616, 616, 616, 616, 616, 616, 61...
## $ Reporter                 <chr> "Poland", "Poland", "Poland", "Poland...
## $ `Reporter ISO`           <chr> "POL", "POL", "POL", "POL", "POL", "P...
## $ `Partner Code`           <int> 0, 0, 4, 4, 8, 8, 10, 12, 12, 16, 20,...
## $ Partner                  <chr> "World", "World", "Afghanistan", "Afg...
## $ `Partner ISO`            <chr> "WLD", "WLD", "AFG", "AFG", "ALB", "A...
## $ `2nd Partner Code`       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `2nd Partner`            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `2nd Partner ISO`        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `Customs Proc. Code`     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Customs                  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `Mode of Transport Code` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `Mode of Transport`      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `Commodity Code`         <chr> "TOTAL", "TOTAL", "TOTAL", "TOTAL", "...
## $ Commodity                <chr> "All Commodities", "All Commodities",...
## $ `Qty Unit Code`          <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## $ `Qty Unit`               <chr> "No Quantity", "No Quantity", "No Qua...
## $ Qty                      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `Alt Qty Unit Code`      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `Alt Qty Unit`           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `Alt Qty`                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `Netweight (kg)`         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `Gross weight (kg)`      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `Trade Value (US$)`      <dbl> 216687292345, 214476794173, 482913, 1...
## $ `CIF Trade Value (US$)`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `FOB Trade Value (US$)`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Flag                     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...

Loading WITS data using the read_csv(). (Loading data also after we manually loaded it from the website).

witsPolandTotals <- read_csv("01-data-import/wits_poland_data.csv")
## Parsed with column specification:
## cols(
##   Nomenclature = col_character(),
##   ReporterISO3 = col_character(),
##   ProductCode = col_character(),
##   ReporterName = col_character(),
##   PartnerISO3 = col_character(),
##   PartnerName = col_character(),
##   Year = col_integer(),
##   TradeFlowName = col_character(),
##   TradeFlowCode = col_integer(),
##   `TradeValue in 1000 USD` = col_double(),
##   Quantity = col_character(),
##   QuantityToken = col_integer(),
##   QtyUnit = col_character(),
##   ProductDescription = col_character(),
##   `NetWeight in KGM` = col_character(),
##   `Reporter Region` = col_character(),
##   `Reporter Income Group` = col_character(),
##   `Partner Region` = col_character(),
##   `Partner Income Group` = col_character()
## )
glimpse(witsPolandTotals)
## Observations: 449
## Variables: 19
## $ Nomenclature             <chr> "H4", "H4", "H4", "H4", "H4", "H4", "...
## $ ReporterISO3             <chr> "POL", "POL", "POL", "POL", "POL", "P...
## $ ProductCode              <chr> "Total", "Total", "Total", "Total", "...
## $ ReporterName             <chr> "Poland", "Poland", "Poland", "Poland...
## $ PartnerISO3              <chr> "ABW", "ABW", "AFG", "AFG", "AGO", "A...
## $ PartnerName              <chr> "Aruba", "Aruba", "Afghanistan", "Afg...
## $ Year                     <int> 2014, 2014, 2014, 2014, 2014, 2014, 2...
## $ TradeFlowName            <chr> "Export", "Import", "Export", "Import...
## $ TradeFlowCode            <int> 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6...
## $ `TradeValue in 1000 USD` <dbl> 512.927, 1.075, 12536.755, 482.913, 3...
## $ Quantity                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ QuantityToken            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## $ QtyUnit                  <chr> "N.Q.", "N.Q.", "N.Q.", "N.Q.", "N.Q....
## $ ProductDescription       <chr> "Total Trade", "Total Trade", "Total ...
## $ `NetWeight in KGM`       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `Reporter Region`        <chr> "Europe & Central Asia", "Europe & Ce...
## $ `Reporter Income Group`  <chr> "High income", "High income", "High i...
## $ `Partner Region`         <chr> "Latin America & Caribbean", "Latin A...
## $ `Partner Income Group`   <chr> "High income", "High income", "Low in...

It is clear, that both tables have different number of rows as well as sets of columns. Therefore, we need to go one step ahead and use dplyr package and commands select() and rename() to clean and unify variables across two data frames.

  1. Cleaning WITS data frame.
witsPolandTotals <- 
  witsPolandTotals %>% 
  select(Nomenclature, ReporterISO3, PartnerISO3, 
         ProductCode, Year, TradeFlowName, `TradeValue in 1000 USD`)
glimpse(witsPolandTotals)
## Observations: 449
## Variables: 7
## $ Nomenclature             <chr> "H4", "H4", "H4", "H4", "H4", "H4", "...
## $ ReporterISO3             <chr> "POL", "POL", "POL", "POL", "POL", "P...
## $ PartnerISO3              <chr> "ABW", "ABW", "AFG", "AFG", "AGO", "A...
## $ ProductCode              <chr> "Total", "Total", "Total", "Total", "...
## $ Year                     <int> 2014, 2014, 2014, 2014, 2014, 2014, 2...
## $ TradeFlowName            <chr> "Export", "Import", "Export", "Import...
## $ `TradeValue in 1000 USD` <dbl> 512.927, 1.075, 12536.755, 482.913, 3...

Cleaning CT data frame.

ctPolandTotals <- 
  ctPolandTotals %>% 
  select(Classification, `Reporter ISO`, `Partner ISO`, 
         `Commodity Code`, Year, `Trade Flow`, `Trade Value (US$)`) %>% 
  rename(Nomenclature = Classification, 
         ReporterISO3 = `Reporter ISO`, 
         PartnerISO3 = `Partner ISO`, 
         ProductCode = `Commodity Code`, 
         Year = Year, 
         TradeFlowName = `Trade Flow`, 
         `TradeValue in 1000 USD` = `Trade Value (US$)`)
glimpse(ctPolandTotals)
## Observations: 453
## Variables: 7
## $ Nomenclature             <chr> "H4", "H4", "H4", "H4", "H4", "H4", "...
## $ ReporterISO3             <chr> "POL", "POL", "POL", "POL", "POL", "P...
## $ PartnerISO3              <chr> "WLD", "WLD", "AFG", "AFG", "ALB", "A...
## $ ProductCode              <chr> "TOTAL", "TOTAL", "TOTAL", "TOTAL", "...
## $ Year                     <int> 2014, 2014, 2014, 2014, 2014, 2014, 2...
## $ TradeFlowName            <chr> "Import", "Export", "Import", "Export...
## $ `TradeValue in 1000 USD` <dbl> 216687292345, 214476794173, 482913, 1...
  1. The differences are in the units of the TradeValue in both data sets, different number of rows, and coding for ProductCode.

Copyright © 2017 Eduard Bukin. All rights reserved.