library(IRTest)
#> Thank you for using IRTest!
#> Please cite the package as:
#> Li, S. (2022). IRTest: Parameter estimation of item response theory with estimation of latent distribution (Version 1.0.0). R package.
#>
#> URL: https://CRAN.R-project.org/package=IRTest
library(ggplot2)
The function DataGeneration
can be used for the
pre-analysis step. This function returns a set of artificial data and
some useful objects for analysis (e.g., theta
,
data_D
, item_D
, &
initialitem_D
).
In the parameter estimation process, the initialitem_D
can
be used for an input of the function IRTest_Dich
(i.e.,
initialitem = initialitem_D
). The data_D
is an
artificial item response data that could be used for some analyses such
as computer simulation techniques, but would be unnecessary if
user-imported item response data is used. The theta
and
item_D
are not used for the estimation process, but could
play a role as true parameters if the artificial data
(data_D
) is used for an analysis.
<- DataGeneration(seed = 123456789,
Alldata model_D = rep(1:2, each=5),
N=500,
nitem_D = 10,
nitem_P = 0,
latent_dist = "2NM",
d = 1.664,
sd_ratio = 2,
prob = 0.3)
<- Alldata$data_D
data <- Alldata$item_D
item <- Alldata$initialitem_D
initialitem <- Alldata$theta theta
If the artificial data (data_D
) is used, the true latent
distribution looks like;
###### ######
###### Empirical histogram method ######
###### ######
<- IRTest_Dich(initialitem = initialitem,
Mod1 data = data,
model = rep(1:2, each=5),
latent_dist = "EHM",
max_iter = 200,
threshold = .0001)
###### ######
###### Kernel density estimation method ######
###### ######
# Mod1 <- IRTest_Dich(initialitem = initialitem,
# data = data,
# model = rep(1:2, each=5),
# latent_dist = "KDE",
# bandwidth = "SJ-ste",
# max_iter = 200,
# threshold = .001)
###### ######
###### Normality assumption ######
###### ######
# Mod1 <- IRTest_Dich(initialitem = initialitem,
# data = data,
# model = rep(1:2, each=5),
# latent_dist = "Normal",
# max_iter = 200,
# threshold = .0001)
###### ######
###### Two-component Gaussian mixture distribution ######
###### ######
# Mod1 <- IRTest_Dich(initialitem = initialitem,
# data = data,
# model = rep(1:2, each=5),
# latent_dist = "Mixture",
# max_iter = 200,
# threshold = .0001)
###### ######
###### Davidian curve (for an arbitrarily chosen case of h=4)######
###### ######
# Mod1 <- IRTest_Dich(initialitem = initialitem,
# data = data,
# model = rep(1:2, each=5),
# latent_dist = "DC",
# max_iter = 200,
# threshold = .0001,
# h=4)
### Summary
summary(Mod1)
#> Convergence:
#> Successfully converged below the threshold of 1e-04 on 54th iterations.
#>
#> Model Fit:
#> deviance 5763.024
#> AIC 6031.024
#> BIC 6595.781
#>
#> The Number of Parameters:
#> item 15
#> dist 119
#> total 134
#>
#> The Number of Items:
#> dichotomous 10
#> polyotomous 0
#>
#> The Estimated Latent Distribution:
#> method - EHM
#> ----------------------------------------
#>
#>
#> @
#> @ @ @ @ @
#> @ @ @ @ @ @ .
#> @ @ @ @ @ . @ @ @ @
#> @ @ @ @ @ @ . @ @ @ @ @ @
#> @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
#> . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
#> . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ .
#> +---------+---------+---------+---------+
#> -2 -1 0 1 2
### The estimated item parameters
$par_est
Mod1#> a b c
#> [1,] 1.000000 -0.74508109 0
#> [2,] 1.000000 0.51116617 0
#> [3,] 1.000000 0.80336947 0
#> [4,] 1.000000 0.53158883 0
#> [5,] 1.000000 -0.39455364 0
#> [6,] 1.700258 0.01012807 0
#> [7,] 1.526307 0.89420050 0
#> [8,] 2.203176 -1.12635131 0
#> [9,] 1.566285 0.39084407 0
#> [10,] 1.385190 0.76792824 0
### The asymptotic standard errors of item parameters
$se
Mod1#> a b c
#> [1,] NA 0.10304406 NA
#> [2,] NA 0.10098172 NA
#> [3,] NA 0.10355444 NA
#> [4,] NA 0.10112153 NA
#> [5,] NA 0.10034057 NA
#> [6,] 0.1446766 0.06711641 NA
#> [7,] 0.1537574 0.08561540 NA
#> [8,] 0.2434543 0.07258288 NA
#> [9,] 0.1404522 0.07275011 NA
#> [10,] 0.1388418 0.08841969 NA
### The estimated ability parameters
head(Mod1$theta)
#> [1] -0.8551470 -0.6567867 -0.7198031 -1.0039922 -1.2714637 -0.7823374
### The estimated latent distribution
plot(Mod1) +
lims(y = c(0, .5))
As in the case of dichotomous items, the function
DataGeneration
can be used for the pre-analysis step. This
function returns a set of artificial data and some useful objects for
analysis (e.g., theta
, data_P
,
item_P
, & initialitem_P
).
In the parameter estimation process, the initialitem_P
can
be used for an input of the function IRTest_Poly
(i.e.,
initialitem = initialitem_P
). The data_P
is an
artificial item response data that could be used for some analyses such
as computer simulation techniques, but would be unnecessary if
user-imported item response data is used. The theta
and
item_P
are not used for the estimation process, but could
play a role as true parameters if the artificial data
(data_P
) is used for an analysis.
<- DataGeneration(seed = 123456789,
Alldata model_P = "GPCM",
categ = rep(c(3,7), each = 5),
N=1000,
nitem_D = 0,
nitem_P = 10,
latent_dist = "2NM",
d = 1.414,
sd_ratio = 2,
prob = 0.5)
<- Alldata$data_P
data <- Alldata$item_P
item <- Alldata$initialitem_P
initialitem <- Alldata$theta theta
If the artificial data (data_P
) is used, the true latent
distribution looks like;
###### ######
###### Kernel density estimation method ######
###### ######
<- IRTest_Poly(initialitem = initialitem,
Mod1 data = data,
model = "GPCM",
latent_dist = "KDE",
bandwidth = "SJ-ste",
max_iter = 200,
threshold = .001)
###### ######
###### Normality assumption ######
###### ######
# Mod1 <- IRTest_Poly(initialitem = initialitem,
# data = data,
# model = "GPCM",
# latent_dist = "Normal",
# max_iter = 200,
# threshold = .001)
###### ######
###### Empirical histogram method ######
###### ######
# Mod1 <- IRTest_Poly(initialitem = initialitem,
# data = data,
# model = "GPCM",
# latent_dist = "EHM",
# max_iter = 200,
# threshold = .001)
###### ######
###### Two-component Gaussian mixture distribution ######
###### ######
# Mod1 <- IRTest_Poly(initialitem = initialitem,
# data = data,
# model = "GPCM",
# latent_dist = "Mixture",
# max_iter = 200,
# threshold = .001)
###### ######
###### Davidian curve (for an arbitrarily chosen case of h=4) ######
###### ######
# Mod1 <- IRTest_Poly(initialitem = initialitem,
# data = data,
# model = "GPCM",
# latent_dist = "DC",
# max_iter = 200,
# threshold = .001,
# h=4)
### Summary
summary(Mod1)
#> Convergence:
#> Successfully converged below the threshold of 0.001 on 39th iterations.
#>
#> Model Fit:
#> deviance 20359.2
#> AIC 20461.2
#> BIC 20711.5
#>
#> The Number of Parameters:
#> item 50
#> dist 1
#> total 51
#>
#> The Number of Items:
#> dichotomous 0
#> polyotomous 10
#>
#> The Estimated Latent Distribution:
#> method - KDE
#> ----------------------------------------
#>
#> . .
#> @ @ @ .
#> . @ @ @ @ .
#> @ @ @ @ @ @
#> . @ @ @ @ @ @ @ .
#> @ @ @ @ @ @ @ @ @ @ @ . .
#> . @ @ @ @ @ @ @ @ @ @ @ @ @ .
#> @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ .
#> . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
#> +---------+---------+---------+---------+
#> -2 -1 0 1 2
### The estimated item parameters
$par_est
Mod1#> a b_1 b_2 b_3 b_4 b_5
#> [1,] 1.9320419 0.396698909 0.46603651 NA NA NA
#> [2,] 1.6880632 -0.303763927 0.05188864 NA NA NA
#> [3,] 2.0504530 -0.376056390 -0.24941074 NA NA NA
#> [4,] 1.0605873 -0.159282350 0.16815184 NA NA NA
#> [5,] 0.7835049 0.003729855 0.22658723 NA NA NA
#> [6,] 1.8907281 0.253908544 0.29706486 0.5563451 0.56108999 0.7435219
#> [7,] 1.5439636 -1.603051232 -1.12357477 -1.0013097 -1.06426560 -0.7066684
#> [8,] 0.9479375 -0.332417438 -0.17196848 0.3086190 0.06595696 -0.2510836
#> [9,] 1.4230350 -0.784687384 -0.69487372 -0.6851528 -0.46813907 -0.5758896
#> [10,] 2.5827215 0.546921194 0.94939512 0.7099745 1.04467562 1.0309654
#> b_6
#> [1,] NA
#> [2,] NA
#> [3,] NA
#> [4,] NA
#> [5,] NA
#> [6,] 0.8505778
#> [7,] -0.6593098
#> [8,] 0.1465100
#> [9,] -0.4351178
#> [10,] 1.2435975
### The asymptotic standard errors of item parameters
$se
Mod1#> a b_1 b_2 b_3 b_4 b_5
#> [1,] 0.11560425 0.06407208 0.06475432 NA NA NA
#> [2,] 0.09873818 0.05692682 0.05873271 NA NA NA
#> [3,] 0.12254146 0.05222493 0.05290338 NA NA NA
#> [4,] 0.06700339 0.08275252 0.08438107 NA NA NA
#> [5,] 0.05524448 0.10839246 0.10990234 NA NA NA
#> [6,] 0.11338544 0.06983861 0.08428678 0.1076463 0.11530990 0.10459915
#> [7,] 0.09071435 0.14185389 0.12458750 0.1208955 0.10620807 0.08435877
#> [8,] 0.05380619 0.11427616 0.13330931 0.1692535 0.18844552 0.17129060
#> [9,] 0.08124190 0.10135122 0.11385157 0.1183858 0.11737822 0.10510023
#> [10,] 0.16806178 0.05840518 0.08680555 0.1003863 0.09807777 0.09198491
#> b_6
#> [1,] NA
#> [2,] NA
#> [3,] NA
#> [4,] NA
#> [5,] NA
#> [6,] 0.08789139
#> [7,] 0.07567987
#> [8,] 0.13258224
#> [9,] 0.08666445
#> [10,] 0.07509506
### The estimated ability parameters
head(Mod1$theta)
#> [1] -0.5375323 -0.5787214 -0.2605974 -1.0428218 -0.9306040 -1.2750381
### The estimated latent distribution
plot(Mod1) +
lims(y = c(0, .5))
As in the case of dichotomous and polytomous items, the function
DataGeneration
can be used for the pre-analysis step. This
function returns artificial data and some useful objects for analysis
(i.e., theta
, data_D
, item_D
,
initialitem_D
, data_P
, item_P
,
& initialitem_P
).
In the parameter estimation process, the initialitem_D
and
initialitem_P
can be used for an input of the function
IRTest_Mix
(i.e.,
initialitem_D = initialitem_D
, &
initialitem_P = initialitem_P
). The data_D
and
data_P
are artificial item response data sets that could be
used for some analyses such as computer simulation techniques, but would
be unnecessary if user-imported item response data is used. The
theta
and item_D
and item_P
are
not used for the estimation process, but could play a role as true
parameters if the artificial data (data_D
&
data_P
) is used for an analysis.
<- DataGeneration(seed = 123456789,
Alldata model_D = rep(2,5),
model_P = "GPCM",
categ = rep(3,5),
N=1000,
nitem_D = 5,
nitem_P = 5,
latent_dist = "2NM",
d = 1.664,
sd_ratio = 1,
prob = 0.5)
<- Alldata$data_D
DataD <- Alldata$data_P
DataP <- Alldata$item_D
itemD <- Alldata$item_P
itemP <- Alldata$initialitem_D
initialitemD <- Alldata$initialitem_P
initialitemP <- Alldata$theta theta
If the artificial data (data
) is used, the true latent
distribution looks like,
#> Scale for y is already present.
#> Adding another scale for y, which will replace the existing scale.
###### ######
###### Kernel density estimation method ######
###### ######
<- IRTest_Mix(initialitem_D = initialitemD,
Mod1 initialitem_P = initialitemP,
data_D = DataD,
data_P = DataP,
model_D = rep(2,5),
model_P = "GPCM",
latent_dist = "KDE",
bandwidth = "SJ-ste",
max_iter = 200,
threshold = .001)
###### ######
###### Normality assumption ######
###### ######
# Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
# initialitem_P = initialitemP,
# data_D = DataD,
# data_P = DataP,
# model_D = rep(2,5),
# model_P = "GPCM",
# latent_dist = "Normal",
# max_iter = 200,
# threshold = .001)
###### ######
###### Empirical histogram method ######
###### ######
# Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
# initialitem_P = initialitemP,
# data_D = DataD,
# data_P = DataP,
# model_D = rep(2,5),
# model_P = "GPCM",
# latent_dist = "EHM",
# max_iter = 200,
# threshold = .001)
###### ######
###### Two-component Gaussian mixture distribution ######
###### ######
# Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
# initialitem_P = initialitemP,
# data_D = DataD,
# data_P = DataP,
# model_D = rep(2,5),
# model_P = "GPCM",
# latent_dist = "Mixture",
# max_iter = 200,
# threshold = .001)
###### ######
###### Davidian curve (for an arbitrarily chosen case of h=4) ######
###### ######
# Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
# initialitem_P = initialitemP,
# data_D = DataD,
# data_P = DataP,
# model_D = rep(2,5),
# model_P = "GPCM",
# latent_dist = "DC",
# max_iter = 200,
# threshold = .001,
# h = 4)
### Summary
summary(Mod1)
#> Convergence:
#> Successfully converged below the threshold of 0.001 on 31th iterations.
#>
#> Model Fit:
#> deviance 2854157
#> AIC 2854209
#> BIC 2854337
#>
#> The Number of Parameters:
#> item 25
#> dist 1
#> total 26
#>
#> The Number of Items:
#> dichotomous 5
#> polyotomous 5
#>
#> The Estimated Latent Distribution:
#> method - KDE
#> ----------------------------------------
#>
#>
#>
#> . .
#> . . . . . @ @ @ @
#> . @ @ @ @ @ @ @ @ @ @ @
#> @ @ @ @ @ @ @ @ @ @ @ @ @
#> @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
#> @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
#> . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ .
#> +---------+---------+---------+---------+
#> -2 -1 0 1 2
### The estimated item parameters
$par_est
Mod1#> $Dichotomous
#> a b c
#> [1,] 2.200954 0.9115074 0
#> [2,] 1.984903 -1.0332999 0
#> [3,] 1.110728 0.4980855 0
#> [4,] 1.258792 0.5392781 0
#> [5,] 2.287480 1.4436252 0
#>
#> $Polytomous
#> a b_1 b_2 b_3 b_4 b_5 b_6
#> [1,] 1.966701 0.3937469 0.42467910 NA NA NA NA
#> [2,] 2.007852 -0.3261131 0.06844343 NA NA NA NA
#> [3,] 2.056885 -0.3897486 -0.22236003 NA NA NA NA
#> [4,] 1.030497 -0.1731280 0.19751324 NA NA NA NA
#> [5,] 0.785141 0.2433940 0.08376259 NA NA NA NA
### The asymptotic standard errors of item parameters
$se
Mod1#> $Dichotomous
#> a b c
#> [1,] 0.15475720 0.04668629 NA
#> [2,] 0.14171212 0.05355501 NA
#> [3,] 0.08520384 0.06947386 NA
#> [4,] 0.09141200 0.06332382 NA
#> [5,] 0.19665345 0.06471224 NA
#>
#> $Polytomous
#> a b_1 b_2 b_3 b_4 b_5 b_6
#> [1,] 0.11688799 0.05981212 0.05958615 NA NA NA NA
#> [2,] 0.11456065 0.05304505 0.05249768 NA NA NA NA
#> [3,] 0.11996224 0.05626596 0.05611498 NA NA NA NA
#> [4,] 0.06432937 0.08543217 0.08539457 NA NA NA NA
#> [5,] 0.05401889 0.11481480 0.11424988 NA NA NA NA
### The estimated ability parameters
head(Mod1$theta)
#> [1] -0.5775496 -0.7558405 0.6185433 -0.8691925 -1.3624809 -1.5623822
### The estimated latent distribution
plot(Mod1) +
lims(y = c(0, .5))
—-