global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?global_GDP_per_cap <- global_economy %>%
mutate(GDP_per_cap = GDP/Population) %>%
select(Country, Year, GDP_per_cap) %>%
group_by(Country)
autoplot(global_GDP_per_cap, GDP_per_cap) +
theme(legend.position="none")
## `mutate_if()` ignored the following grouping variables:
## Column `Country`
## Warning: Removed 3242 row(s) containing missing values (geom_path).
global_GDP_per_cap %>%
filter(Year == 2017) %>%
arrange(desc(GDP_per_cap))
## # A tsibble: 262 x 3 [1Y]
## # Key: Country [262]
## # Groups: Country [262]
## Country Year GDP_per_cap
## <fct> <dbl> <dbl>
## 1 Luxembourg 2017 104103.
## 2 Macao SAR, China 2017 80893.
## 3 Switzerland 2017 80190.
## 4 Norway 2017 75505.
## 5 Iceland 2017 70057.
## 6 Ireland 2017 69331.
## 7 Qatar 2017 63249.
## 8 United States 2017 59532.
## 9 North America 2017 58070.
## 10 Singapore 2017 57714.
## # ... with 252 more rows
It appears Luxembourg has the highest GDP per cap. It appears they emerged as a leader in the early 80s and their GDP peaked around 2015.
global_economyglobal_economy %>%
mutate(GDP_per_cap = GDP/Population) %>%
select(Country, Year, GDP, GDP_per_cap) %>%
filter(Country == 'United States') %>%
pivot_longer(c(GDP, GDP_per_cap),
values_to = 'GDP') %>%
ggplot(aes(x=Year, y=GDP)) +
geom_line() +
facet_grid(name ~ ., scales='free_y') +
labs(y='US$',
x='',
title='GDP vs GDP Per Capita: USA')
I assumed that a population transformation would make sense here, but the graph did not change much besides the y-axis scale.
aus_livestockvictorian_bulls <- aus_livestock %>%
filter(Animal == 'Bulls, bullocks and steers' & State == 'Victoria')
autoplot(victorian_bulls,Count) +
labs(y='Total Slaughtered',
x='',
title='Slaughter of Victorian Bulls, Bullocks, and Steers')
# box cox transformation
lambda <- victorian_bulls %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
victorian_bulls %>%
autoplot(box_cox(Count, lambda)) +
labs(y='Slaughtered Count (log)',
x='',
title='BoxCox Transform Slaughter of Victorian Bulls, Bullocks, and Steers')
Box-cox performed here to make the variation over time more consistent
vic_elec.autoplot(vic_elec, Demand)
avg_elec_demand_daily <- aggregate(vic_elec['Demand'], by=vic_elec['Date'], mean)
avg_elec_demand_daily %>%
as_tsibble(index=Date) %>%
autoplot(Demand)
Transforming from half hour increments to the average daily makes the graph more readable and the data easier to work with while retaining its shape. Could also potentially use a box_cox transform
aus_productionautoplot(aus_production, Gas)+
labs(y='',
title='Gas Production with No Tranform')
lambda <- aus_production %>%
features(Gas, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Gas, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed gas production with $\\lambda$ = ",
round(lambda,2))))
Variance over time has been standardized with Box Cox tranform
canadian_gas data?autoplot(canadian_gas, Volume)
While the variance changes over time, variance does not increase consistently over time with the increase in level. Variance increases and then decreases around 1991.
set.seed(12345678)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries, Turnover)
myseries %>%
autoplot(box_cox(Turnover, lambda=0))
If I had to choose a lambda manually between 0 and 1, I would choose the natural log or 0 for this series, though a power calculation using .1 also seems to make the series more consistent.
aus_production, Economy class passengers between Melbourne and Sydney from ansett, and Pedestrian counts at Southern Cross Station from pedestrian.I’m only going to be doing one of these since it is the same code for every box_cox
autoplot(aus_production, Tobacco)
## Warning: Removed 24 row(s) containing missing values (geom_path).
lambda <- aus_production %>%
features(Tobacco, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Tobacco, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed Tobacco Production with $\\lambda$ = ",
round(lambda,2))))
## Warning: Removed 24 row(s) containing missing values (geom_path).
apple_stock <- gafa_stock %>%
select(Adj_Close) %>%
filter(Symbol == "AAPL" & year(Date) == 2014)
apple_stock %>%
head(7) %>%
mutate(
`5-MA` = slider::slide_dbl(Adj_Close, mean, .before=2, .after=2, .complete=TRUE),
`3x5-MA` = slider::slide_dbl(`5-MA`, mean, .before=1, .after=1, .complete=TRUE)
)
## # A tsibble: 7 x 5 [!]
## # Key: Symbol [1]
## Adj_Close Date Symbol `5-MA` `3x5-MA`
## <dbl> <date> <chr> <dbl> <dbl>
## 1 67.0 2014-01-02 AAPL NA NA
## 2 65.5 2014-01-03 AAPL NA NA
## 3 65.9 2014-01-06 AAPL 65.9 NA
## 4 65.4 2014-01-07 AAPL 65.5 65.6
## 5 65.8 2014-01-08 AAPL 65.3 NA
## 6 65.0 2014-01-09 AAPL NA NA
## 7 64.5 2014-01-10 AAPL NA NA
This is how to construct a 3x5MA, but I’m unsure how to prove the weights.
aus_production.
gas <- tail(aus_production, 5*4) %>%
select(Gas) # data is quarterly hence the suggested "5 X 4" for 5 yrs
autoplot(gas, Gas)
Seasonal fluctuation is pretty obvious here surging in the summer.
# Classical multiplicative decomp
gas %>%
model(
classical_decomposition(Gas, type = 'multiplicative')
) %>%
components() %>%
autoplot() +
labs(title = 'Classical Multiplicative Decomposition of Aus Gas Production')
## Warning: Removed 2 row(s) containing missing values (geom_path).
Results of the decomp seem to support the conclusions for part a
# plotting seasonally adjusted component
gas %>%
model(
classical_decomposition(Gas, type = 'multiplicative')
) %>%
components() %>%
ggplot(aes(x=Quarter)) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted"))
# adding 300 to an observation
gas$Gas[[10]] <- gas$Gas[[10]] + 300
gas %>%
model(
classical_decomposition(Gas, type = 'multiplicative')
) %>%
components() %>%
ggplot(aes(x=Quarter)) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted"))
Outlier doesn’t affect the seasonality as it is still visible once seasonally adjusted. It does not seem to matter where in the time series the outlier is.
set.seed(222)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components() %>%
autoplot()
Decomposing by X-11 shows a change in the seasonality over time, but no other outliers or unusual features that went unnoticed previously.
autoplot(canadian_gas, Volume)
gg_subseries(canadian_gas, Volume)
gg_season(canadian_gas, Volume)
Variance of the data fluctuates over time. Subseries plot shows lagging volume in the 70’s and 80s before continuing its upward trend.
canadian_gas %>%
model(STL(Volume ~ trend(window = 21) +
season(window=12),
robust = TRUE)) %>%
components() %>%
autoplot()
We can see seasonal shifts over time
canadian_gas %>%
model(STL(Volume ~ trend(window = 21) +
season(window=12),
robust = TRUE)) %>%
components() %>%
ggplot(aes(x=Month)) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted"))
Above is a plausible seasonally adjusted series
canadian_gas %>%
model(x11 = X_13ARIMA_SEATS(Volume ~ x11())) %>%
components() %>%
autoplot()
canadian_gas %>%
model(seats = X_13ARIMA_SEATS(Volume ~ seats())) %>%
components() %>%
autoplot()
I think the X-11 does a better job than the SEATS or STL decomp. It picks up on the fluctuation of seasonality and the residual appears to have less white noise.