library(tidyverse)
<- 300
nmax <- 0.05
alpha
<- tibble(n = seq(2, nmax))
df_n
<- df_n |>
df0 mutate(p2 = qf(alpha, 2, 2 * n, lower.tail = FALSE) / (n + qf(alpha, 2, 2 *
lower.tail = FALSE)))
n,
ggplot(df0, aes(x = n, y = p2)) +
geom_point()
1 母集団比率の区間推定
\(\hat{p}=0\)の場合
\(\hat{p}≠0\)の場合
<- 1
x <- df_n |>
df2 mutate(
F1 = qf(alpha/2, 2 * (n - x + 1), 2 * x, lower.tail = FALSE),
F2 = qf(alpha/2, 2 * (x + 1), 2 * (n - x), lower.tail = FALSE),
p1 = x / ((n - x + 1) * F1 + x),
p2 = (x + 1) * F2 / ((x + 1) * F2 + (n - x))
)ggplot(df2, aes(x = n, y = p2)) +
geom_point()
2 比率の検定
- \(H_0:p=p_0,H_1:p>p_0\)の場合
<- 0.95
pmax <- tibble(p = seq(0, pmax, 0.01))
df_p <- cross_join(df_n, df_p)
df1
<- 1
x <- df1 |>
df3 mutate(
c = 2 * (n - x + 1),
d = 2 * x,
f2 = d * (1 - p) / (c * p),
F2 = qf(alpha, c, d, lower.tail = FALSE),
significant = (f2 >= F2)
)
ggplot(df3, aes(x = n, y = p, colour = significant)) +
geom_point()
- \(H_0:p=p_0,H_1:p≠p_0\)の場合
<- 1
x <- df1 |>
df4 mutate(
a = 2 * (x + 1),
b = 2 * (n - x),
f1 = b * p / (a * (1 - p)),
c = 2 * (n - x + 1),
d = 2 * x,
f2 = d * (1 - p) / (c * p),
F1 = qf(alpha / 2, a, b, lower.tail = FALSE),
F2 = qf(alpha / 2, c, d, lower.tail = FALSE),
significant = (f1 >= F1) | (f2 >= F2)
)
ggplot(df4, aes(x = n, y = p, colour = significant)) +
geom_point()
- \(H_0:p=p_0,H_1:p<p_0\)の場合
<- 0.05/2
alpha <- 1
x <- df1 |>
df5 mutate(
a = 2 * (x + 1),
b = 2 * (n - x),
f1 = b * p / (a * (1 - p)),
F1 = qf(alpha, a, b, lower.tail = FALSE),
significant = (f1 >= F1)
)
ggplot(df5, aes(x = n, y = p, colour = significant)) +
geom_point()
上記の境界線を計算して\(\hat{p}≠0\)の場合の信頼区間上限と比較する
<- df5 |>
df6 filter(!significant) |>
group_by(n) |>
summarise(pmax = max(p))
ggplot(df6, aes(x = n, y = pmax)) +
geom_point(colour="black")+
geom_point(data=df2,aes(x=n,y=p2),colour="red",alpha=0.5)
概ね一致している。
3 参考文献
4 R
R.version.string
[1] "R version 4.4.0 (2024-04-24)"