PBS %>%
as_tibble()%>%
group_by(ATC1_desc) %>%
summarize(Avg_Cost = mean(Cost),
Std_Cost = sd(Cost)) %>%
arrange(desc(Avg_Cost))
## # A tibble: 15 x 3
## ATC1_desc Avg_Cost Std_Cost
## <chr> <dbl> <dbl>
## 1 Cardiovascular system 2594071. 6471171.
## 2 Nervous system 1985150. 3748857.
## 3 Antineoplastic and immunomodulating agents 1386001. 2178577.
## 4 Respiratory system 1308753. 3466564.
## 5 Alimentary tract and metabolism 924237. 3011654.
## 6 Musculo-skeletal system 780143. 2039964.
## 7 Antiinfectives for systemic use 734496. 1781448.
## 8 Genito urinary system and sex hormones 478357. 1107672.
## 9 Blood and blood forming organs 444450. 1616023.
## 10 Sensory organs 409280. 1246218.
## 11 Various 140458. 291258.
## 12 Dermatologicals 125144. 283694.
## 13 Systemic hormonal preparations, excl. sex hormones and ins~ 81698. 150833.
## 14 Antiparasitic products, insecticides and repellents 27123. 64686.
## 15 <NA> 15078. 45375.
PBS %>%
group_by(ATC1_desc) %>%
filter(ATC1_desc == 'Cardiovascular system') %>%
summarize(Total_Cost = sum(Cost)) %>%
autoplot(Total_Cost) +
labs(title = "Cardiovascular System Cost")
PBS %>%
group_by(ATC1_desc) %>%
filter(ATC1_desc == 'Antiparasitic products, insecticides and repellents') %>%
summarize(Total_Cost = sum(Cost)) %>%
autoplot(Total_Cost) +
labs(title = "Antiparasitic products, insecticides and repellents Cost")
holiday_series <- tourism %>%
filter(Purpose == 'Holiday')
holiday_series_features <- holiday_series %>%
features(Trips, feature_set(pkgs = 'feasts'))
## Warning: `n_flat_spots()` was deprecated in feasts 0.1.5.
## Please use `longest_flat_spot()` instead.
holiday_series_features %>%
select(seasonal_peak_year, seasonal_trough_year, State) %>%
mutate(
seasonal_peak_year = seasonal_peak_year +
4*(seasonal_peak_year == 0),
seasonal_trough_year = seasonal_trough_year +
4*(seasonal_trough_year == 0),
seasonal_peak_year = glue('Q{seasonal_peak_year}'),
seasonal_trough_year =glue('Q{seasonal_trough_year}'),
) %>%
ggpairs(mapping = aes(color = State)) +
theme(axis.text.x.bottom = element_text(angle=45, hjust=1))
ACT, NSW, South Australia, Tasmania, Victoria, Western Australia peak in Q1 Nothern Territory and Queensland peak in Q3
PBS
## # A tsibble: 67,596 x 9 [1M]
## # Key: Concession, Type, ATC1, ATC2 [336]
## Month Concession Type ATC1 ATC1_desc ATC2 ATC2_desc Scripts Cost
## <mth> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 1991 Jul Concession~ Co-pa~ A Alimentary~ A01 STOMATOLOG~ 18228 67877
## 2 1991 Aug Concession~ Co-pa~ A Alimentary~ A01 STOMATOLOG~ 15327 57011
## 3 1991 Sep Concession~ Co-pa~ A Alimentary~ A01 STOMATOLOG~ 14775 55020
## 4 1991 Oct Concession~ Co-pa~ A Alimentary~ A01 STOMATOLOG~ 15380 57222
## 5 1991 Nov Concession~ Co-pa~ A Alimentary~ A01 STOMATOLOG~ 14371 52120
## 6 1991 Dec Concession~ Co-pa~ A Alimentary~ A01 STOMATOLOG~ 15028 54299
## 7 1992 Jan Concession~ Co-pa~ A Alimentary~ A01 STOMATOLOG~ 11040 39753
## 8 1992 Feb Concession~ Co-pa~ A Alimentary~ A01 STOMATOLOG~ 15165 54405
## 9 1992 Mar Concession~ Co-pa~ A Alimentary~ A01 STOMATOLOG~ 16898 61108
## 10 1992 Apr Concession~ Co-pa~ A Alimentary~ A01 STOMATOLOG~ 18141 65356
## # ... with 67,586 more rows
pbs_features <- PBS %>%
features(Scripts, feature_set(pkgs = 'feasts'))
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Error in ar.burg.default(x, aic = aic, order.max = order.max, na.action = na.action, :
## zero-variance series
## Error in ar.burg.default(x, aic = aic, order.max = order.max, na.action = na.action, :
## zero-variance series
## Warning: 2 errors (1 unique) encountered for feature 7
## [2] subscript out of bounds
pbs_features <- pbs_features %>%
drop_na()
pca <- pbs_features %>%
select(-Concession, -Type, -ATC1, -ATC2) %>%
prcomp(scale = TRUE) %>%
augment(pbs_features)
pca %>%
ggplot(aes(x = .fittedPC1, y = .fittedPC2, col = ATC1)) +
geom_point() +
theme(aspect.ratio = 1)
We can see there are outliers here in the bottom right corner.
outliers <- pca %>%
filter(.fittedPC2 < -10) %>%
select(Concession, Type, ATC1)
outliers %>%
left_join(PBS, by = c("Concession", "Type","ATC1")) %>%
mutate(
Series = glue("{Concession}","{Type}","{ATC1}", .sep = '\n\n')
) %>%
ggplot(aes(x = Month, y=Cost)) +
geom_line() +
facet_grid(Series ~ ., scales = 'free') +
labs(title = "PCA Outliers for PBS")
I have tried the cost and the total scripts variables and I am still unsure as to why these are outliers. Possible reasons: