Merge pull request #225 from StructuralEquationModels/simulation-wrapper

Maximilian-Stefan-Ernst · web-flow · commit 8b0322f3cf90 · 2024-11-21T12:48:26.000+01:00
Add data simulation function
diff --git a/src/additional_functions/simulation.jl b/src/additional_functions/simulation.jl
@@ -6,7 +6,7 @@
 Return a new model with swaped observed part.
 
 # Arguments
-- `model::AbstractSemSingle`: optimization algorithm.
+- `model::AbstractSemSingle`: model to swap the observed part of.
 - `kwargs`: additional keyword arguments; typically includes `data = ...`
 - `observed`: Either an object of subtype of `SemObserved` or a subtype of `SemObserved`
 
@@ -98,3 +98,44 @@ function update_observed(loss::SemLoss, new_observed; kwargs...)
     )
     return SemLoss(new_functions, loss.weights)
 end
+
+############################################################################################
+# simulate data
+############################################################################################
+"""
+    (1) rand(model::AbstractSemSingle, params, n)
+
+    (2) rand(model::AbstractSemSingle, n)
+
+Sample normally distributed data from the model-implied covariance matrix and mean vector.
+
+# Arguments
+- `model::AbstractSemSingle`: model to simulate from.
+- `params`: parameter values to simulate from.
+- `n::Integer`: Number of samples.
+
+# Examples
+```julia
+rand(model, start_simple(model), 100)
+```
+"""
+function Distributions.rand(
+    model::AbstractSemSingle{O, I, L, D},
+    params,
+    n::Integer,
+) where {O, I <: Union{RAM, RAMSymbolic}, L, D}
+    update!(EvaluationTargets{true, false, false}(), model.imply, model, params)
+    return rand(model, n)
+end
+
+function Distributions.rand(
+    model::AbstractSemSingle{O, I, L, D},
+    n::Integer,
+) where {O, I <: Union{RAM, RAMSymbolic}, L, D}
+    if MeanStruct(model.imply) === NoMeanStruct
+        data = permutedims(rand(MvNormal(Symmetric(model.imply.Σ)), n))
+    elseif MeanStruct(model.imply) === HasMeanStruct
+        data = permutedims(rand(MvNormal(model.imply.μ, Symmetric(model.imply.Σ)), n))
+    end
+    return data
+end
diff --git a/test/examples/political_democracy/constructor.jl b/test/examples/political_democracy/constructor.jl
@@ -1,4 +1,5 @@
 using Statistics: cov, mean
+using Random
 
 ############################################################################################
 ### models w.o. meanstructure
@@ -161,6 +162,44 @@ end
     )
 end
 
+############################################################################################
+### data simulation
+############################################################################################
+
+@testset "data_simulation_wo_mean" begin
+    # parameters to recover
+    params = start_simple(
+        model_ml;
+        start_loadings = 0.5,
+        start_regressions = 0.5,
+        start_variances_observed = 0.5,
+        start_variances_latent = 1.0,
+        start_covariances_observed = 0.2,
+    )
+    # set seed for simulation
+    Random.seed!(83472834)
+    colnames = Symbol.(names(example_data("political_democracy")))
+    # simulate data
+    model_ml_new = swap_observed(
+        model_ml,
+        data = rand(model_ml, params, 1_000_000),
+        specification = spec,
+        obs_colnames = colnames,
+    )
+    model_ml_sym_new = swap_observed(
+        model_ml_sym,
+        data = rand(model_ml_sym, params, 1_000_000),
+        specification = spec,
+        obs_colnames = colnames,
+    )
+    # fit models
+    sol_ml = solution(sem_fit(model_ml_new))
+    sol_ml_sym = solution(sem_fit(model_ml_sym_new))
+    # check solution
+    @test maximum(abs.(sol_ml - params)) < 0.01
+    @test maximum(abs.(sol_ml_sym - params)) < 0.01
+end
+
 ############################################################################################
 ### test hessians
 ############################################################################################
@@ -332,6 +371,47 @@ end
     )
 end
 
+############################################################################################
+### data simulation
+############################################################################################
+
+@testset "data_simulation_with_mean" begin
+    # parameters to recover
+    params = start_simple(
+        model_ml;
+        start_loadings = 0.5,
+        start_regressions = 0.5,
+        start_variances_observed = 0.5,
+        start_variances_latent = 1.0,
+        start_covariances_observed = 0.2,
+        start_means = 0.5,
+    )
+    # set seed for simulation
+    Random.seed!(83472834)
+    colnames = Symbol.(names(example_data("political_democracy")))
+    # simulate data
+    model_ml_new = swap_observed(
+        model_ml,
+        data = rand(model_ml, params, 1_000_000),
+        specification = spec,
+        obs_colnames = colnames,
+        meanstructure = true,
+    )
+    model_ml_sym_new = swap_observed(
+        model_ml_sym,
+        data = rand(model_ml_sym, params, 1_000_000),
+        specification = spec,
+        obs_colnames = colnames,
+        meanstructure = true,
+    )
+    # fit models
+    sol_ml = solution(sem_fit(model_ml_new))
+    sol_ml_sym = solution(sem_fit(model_ml_sym_new))
+    # check solution
+    @test maximum(abs.(sol_ml - params)) < 0.01
+    @test maximum(abs.(sol_ml_sym - params)) < 0.01
+end
+
 ############################################################################################
 ### fiml
 ############################################################################################
diff --git a/test/examples/recover_parameters/recover_parameters_twofact.jl b/test/examples/recover_parameters/recover_parameters_twofact.jl
@@ -60,7 +60,7 @@ imply_ml.Σ_function(imply_ml.Σ, true_val)
 true_dist = MultivariateNormal(imply_ml.Σ)
 
 Random.seed!(1234)
-x = transpose(rand(true_dist, 100000))
+x = transpose(rand(true_dist, 100_000))
 semobserved = SemObservedData(data = x, specification = nothing)
 
 loss_ml = SemLoss(SemML(; observed = semobserved, nparams = length(start)))