nycflights13
flights
datavignettes/chisq_test.Rmd
chisq_test.Rmd
Note: The type
argument in generate()
is automatically filled based on the entries for specify()
and hypothesize()
. It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed.
library(nycflights13)
library(dplyr)
library(ggplot2)
library(stringr)
library(infer)
set.seed(2017)
fli_small <- flights %>%
na.omit() %>%
sample_n(size = 500) %>%
mutate(season = case_when(
month %in% c(10:12, 1:3) ~ "winter",
month %in% c(4:9) ~ "summer"
)) %>%
mutate(day_hour = case_when(
between(hour, 1, 12) ~ "morning",
between(hour, 13, 24) ~ "not morning"
)) %>%
select(arr_delay, dep_delay, season,
day_hour, origin, carrier)
arr_delay
, dep_delay
season
("winter"
, "summer"
),day_hour
("morning"
, "not morning"
)origin
("EWR"
, "JFK"
, "LGA"
)carrier
The recommended approach is to use specify() %>% calculate()
:
obs_chisq <- fli_small %>%
specify(origin ~ season) %>% # alt: response = origin, explanatory = season
calculate(stat = "Chisq")
The observed \(\chi^2\) statistic is 0.571898.
Or using chisq_test
in infer
obs_chisq <- fli_small %>%
chisq_test(formula = origin ~ season) %>%
dplyr::select(statistic)
Again, the observed \(\chi^2\) statistic is 0.571898.
Or using another shortcut function in infer
:
obs_chisq <- fli_small %>%
chisq_stat(formula = origin ~ season)
Lastly, the observed \(\chi^2\) statistic is 0.571898.
chisq_null_distn <- fli_small %>%
specify(origin ~ season) %>% # alt: response = origin, explanatory = season
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "Chisq")
chisq_null_distn %>% visualize(obs_stat = obs_chisq, direction = "greater")
chisq_null_distn %>%
get_pvalue(obs_stat = obs_chisq, direction = "greater")
## # A tibble: 1 x 1
## p_value
## <dbl>
## 1 0.748
fli_small %>%
specify(origin ~ season) %>%
hypothesize(null = "independence") %>%
# generate() ## Not used for theoretical
calculate(stat = "Chisq") %>%
visualize(method = "theoretical", obs_stat = obs_chisq, direction = "right")
## Warning: Check to make sure the conditions have been met for the
## theoretical method. {infer} currently does not check these for you.
fli_small %>%
specify(origin ~ season) %>% # alt: response = origin, explanatory = season
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "Chisq") %>%
visualize(method = "both", obs_stat = obs_chisq, direction = "right")
## Warning: Check to make sure the conditions have been met for the
## theoretical method. `infer` currently does not check these for you.
fli_small %>%
chisq_test(formula = origin ~ season) %>%
dplyr::select(p_value) %>%
dplyr::pull()
## [1] 0.7513009