diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index a032ab724..944542379 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -95,6 +95,12 @@ export AbstractSem, Sem, SemFiniteDiff, SemEnsemble, + MeanStruct, + NoMeanStruct, + HasMeanStruct, + HessianEval, + ExactHessian, + ApproxHessian, SemImply, RAMSymbolic, RAM, diff --git a/src/additional_functions/helper.jl b/src/additional_functions/helper.jl index 138ae431e..be559b0d9 100644 --- a/src/additional_functions/helper.jl +++ b/src/additional_functions/helper.jl @@ -33,15 +33,7 @@ function semvec(observed, imply, loss, optimizer) return sem_vec end -function get_observed(rowind, data, semobserved; args = (), kwargs = NamedTuple()) - observed_vec = Vector{semobserved}(undef, length(rowind)) - for i in 1:length(rowind) - observed_vec[i] = semobserved(args...; data = Matrix(data[rowind[i], :]), kwargs...) - end - return observed_vec -end - -skipmissing_mean(mat::AbstractMatrix) = +skipmissing_mean(mat::AbstractMatrix) = [mean(skipmissing(coldata)) for coldata in eachcol(mat)] function F_one_person(imp_mean, meandiff, inverse, data, logdet) diff --git a/src/frontend/fit/standard_errors/hessian.jl b/src/frontend/fit/standard_errors/hessian.jl index afcb570bc..6ae53407f 100644 --- a/src/frontend/fit/standard_errors/hessian.jl +++ b/src/frontend/fit/standard_errors/hessian.jl @@ -1,58 +1,54 @@ """ - se_hessian(semfit::SemFit; hessian = :finitediff) + se_hessian(fit::SemFit; method = :finitediff) -Return hessian based standard errors. +Return hessian-based standard errors. # Arguments -- `hessian`: how to compute the hessian. Options are +- `method`: how to compute the hessian. Options are - `:analytic`: (only if an analytic hessian for the model can be computed) - `:finitediff`: for finite difference approximation """ -function se_hessian(sem_fit::SemFit; hessian = :finitediff) - c = H_scaling(sem_fit.model) - - if hessian == :analytic - par = solution(sem_fit) - H = zeros(eltype(par), length(par), length(par)) - hessian!(H, sem_fit.model, sem_fit.solution) - elseif hessian == :finitediff - H = FiniteDiff.finite_difference_hessian( - Base.Fix1(objective!, sem_fit.model), - sem_fit.solution, - ) - elseif hessian == :optimizer - throw( - ArgumentError( - "standard errors from the optimizer hessian are not implemented yet", - ), - ) - elseif hessian == :expected - throw( - ArgumentError( - "standard errors based on the expected hessian are not implemented yet", - ), +function se_hessian(fit::SemFit; method = :finitediff) + c = H_scaling(fit.model) + params = solution(fit) + H = similar(params, (length(params), length(params))) + + if method == :analytic + evaluate!(nothing, nothing, H, fit.model, params) + elseif method == :finitediff + FiniteDiff.finite_difference_hessian!( + H, + p -> evaluate!(zero(eltype(H)), nothing, nothing, fit.model, p), + params, ) + elseif method == :optimizer + error("Standard errors from the optimizer hessian are not implemented yet") + elseif method == :expected + error("Standard errors based on the expected hessian are not implemented yet") else - throw(ArgumentError("I don't know how to compute `$hessian` standard-errors")) + throw(ArgumentError("Unsupported hessian calculation method :$method")) end - invH = c * inv(H) - se = sqrt.(diag(invH)) - - return se + H_chol = cholesky!(Symmetric(H)) + H_inv = LinearAlgebra.inv!(H_chol) + return [sqrt(c * H_inv[i]) for i in diagind(H_inv)] end # Addition functions ------------------------------------------------------------- -H_scaling(model::AbstractSemSingle) = - H_scaling(model, model.observed, model.imply, model.optimizer, model.loss.functions...) +function H_scaling(model::AbstractSemSingle) + if length(model.loss.functions) > 1 + @warn "Hessian scaling for multiple loss functions is not implemented yet" + end + return H_scaling(model.loss.functions[1], model) +end -H_scaling(model, obs, imp, optimizer, lossfun::SemML) = 2 / (nsamples(model) - 1) +H_scaling(lossfun::SemML, model::AbstractSemSingle) = 2 / (nsamples(model) - 1) -function H_scaling(model, obs, imp, optimizer, lossfun::SemWLS) +function H_scaling(lossfun::SemWLS, model::AbstractSemSingle) @warn "Standard errors for WLS are only correct if a GLS weight matrix (the default) is used." return 2 / (nsamples(model) - 1) end -H_scaling(model, obs, imp, optimizer, lossfun::SemFIML) = 2 / nsamples(model) +H_scaling(lossfun::SemFIML, model::AbstractSemSingle) = 2 / nsamples(model) H_scaling(model::SemEnsemble) = 2 / nsamples(model) diff --git a/src/frontend/specification/ParameterTable.jl b/src/frontend/specification/ParameterTable.jl index 8970b7430..687b712ba 100644 --- a/src/frontend/specification/ParameterTable.jl +++ b/src/frontend/specification/ParameterTable.jl @@ -362,12 +362,12 @@ end update_se_hessian!( partable::AbstractParameterTable, fit::SemFit; - hessian = :finitediff) + method = :finitediff) Write hessian standard errors computed for `fit` to the `:se` column of `partable` # Arguments -- `hessian::Symbol`: how to compute the hessian, see [se_hessian](@ref) for more information. +- `method::Symbol`: how to compute the hessian, see [se_hessian](@ref) for more information. # Examples @@ -375,9 +375,9 @@ Write hessian standard errors computed for `fit` to the `:se` column of `partabl function update_se_hessian!( partable::AbstractParameterTable, fit::SemFit; - hessian = :finitediff, + method = :finitediff, ) - se = se_hessian(fit; hessian = hessian) + se = se_hessian(fit; method) return update_partable!(partable, :se, params(fit), se) end diff --git a/src/frontend/specification/StenoGraphs.jl b/src/frontend/specification/StenoGraphs.jl index 035d9588b..64a33f13e 100644 --- a/src/frontend/specification/StenoGraphs.jl +++ b/src/frontend/specification/StenoGraphs.jl @@ -73,12 +73,9 @@ function ParameterTable( ) end if element isa ModifiedEdge - if any(Base.Fix2(isa, Fixed), values(element.modifiers)) & any(Base.Fix2(isa, Label), values(element.modifiers)) - throw( - ArgumentError( - "It is not allowed to label fixed parameters." - ) - ) + if any(Base.Fix2(isa, Fixed), values(element.modifiers)) && + any(Base.Fix2(isa, Label), values(element.modifiers)) + throw(ArgumentError("It is not allowed to label fixed parameters.")) end for modifier in values(element.modifiers) if isnothing(group) && diff --git a/src/imply/RAM/generic.jl b/src/imply/RAM/generic.jl index 9ff46bd2e..e7e0b36f5 100644 --- a/src/imply/RAM/generic.jl +++ b/src/imply/RAM/generic.jl @@ -65,8 +65,29 @@ Additional interfaces Only available in gradient! calls: - `I_A⁻¹(::RAM)` -> ``(I-A)^{-1}`` """ -mutable struct RAM{A1, A2, A3, A4, A5, A6, V2, I1, I2, I3, M1, M2, M3, M4, S1, S2, S3, B} <: - SemImply +mutable struct RAM{ + MS, + A1, + A2, + A3, + A4, + A5, + A6, + V2, + I1, + I2, + I3, + M1, + M2, + M3, + M4, + S1, + S2, + S3, +} <: SemImply + meanstruct::MS + hessianeval::ExactHessian + Σ::A1 A::A2 S::A3 @@ -75,7 +96,6 @@ mutable struct RAM{A1, A2, A3, A4, A5, A6, V2, I1, I2, I3, M1, M2, M3, M4, S1, S M::A6 ram_matrices::V2 - has_meanstructure::B A_indices::I1 S_indices::I2 @@ -89,9 +109,10 @@ mutable struct RAM{A1, A2, A3, A4, A5, A6, V2, I1, I2, I3, M1, M2, M3, M4, S1, S ∇A::S1 ∇S::S2 ∇M::S3 -end -using StructuralEquationModels + RAM{MS}(args...) where {MS <: MeanStruct} = + new{MS, map(typeof, args)...}(MS(), ExactHessian(), args...) +end ############################################################################################ ### Constructors @@ -100,7 +121,7 @@ using StructuralEquationModels function RAM(; specification::SemSpecification, #vech = false, - gradient = true, + gradient_required = true, meanstructure = false, kwargs..., ) @@ -133,7 +154,7 @@ function RAM(; F⨉I_A⁻¹S = zeros(n_obs, n_var) I_A = similar(A_pre) - if gradient + if gradient_required ∇A = matrix_gradient(A_indices, n_var^2) ∇S = matrix_gradient(S_indices, n_var^2) else @@ -143,23 +164,23 @@ function RAM(; # μ if meanstructure - has_meanstructure = Val(true) + MS = HasMeanStruct !isnothing(M_indices) || throw( ArgumentError( "You set `meanstructure = true`, but your model specification contains no mean parameters.", ), ) - ∇M = gradient ? matrix_gradient(M_indices, n_var) : nothing + ∇M = gradient_required ? matrix_gradient(M_indices, n_var) : nothing μ = zeros(n_obs) else - has_meanstructure = Val(false) + MS = NoMeanStruct M_indices = nothing M_pre = nothing μ = nothing ∇M = nothing end - return RAM( + return RAM{MS}( Σ, A_pre, S_pre, @@ -167,7 +188,6 @@ function RAM(; μ, M_pre, ram_matrices, - has_meanstructure, A_indices, S_indices, M_indices, @@ -185,14 +205,7 @@ end ### methods ############################################################################################ -# dispatch on meanstructure -objective!(imply::RAM, par, model::AbstractSemSingle) = - objective!(imply, par, model, imply.has_meanstructure) -gradient!(imply::RAM, par, model::AbstractSemSingle) = - gradient!(imply, par, model, imply.has_meanstructure) - -# objective and gradient -function objective!(imply::RAM, params, model, has_meanstructure::Val{T}) where {T} +function update!(targets::EvaluationTargets, imply::RAM, model::AbstractSemSingle, params) fill_A_S_M!( imply.A, imply.S, @@ -206,56 +219,22 @@ function objective!(imply::RAM, params, model, has_meanstructure::Val{T}) where @. imply.I_A = -imply.A @view(imply.I_A[diagind(imply.I_A)]) .+= 1 - copyto!(imply.F⨉I_A⁻¹, imply.F) - rdiv!(imply.F⨉I_A⁻¹, factorize(imply.I_A)) - - Σ_RAM!(imply.Σ, imply.F⨉I_A⁻¹, imply.S, imply.F⨉I_A⁻¹S) - - if T - μ_RAM!(imply.μ, imply.F⨉I_A⁻¹, imply.M) + if is_gradient_required(targets) || is_hessian_required(targets) + imply.I_A⁻¹ = LinearAlgebra.inv!(factorize(imply.I_A)) + mul!(imply.F⨉I_A⁻¹, imply.F, imply.I_A⁻¹) + else + copyto!(imply.F⨉I_A⁻¹, imply.F) + rdiv!(imply.F⨉I_A⁻¹, factorize(imply.I_A)) end -end -function gradient!( - imply::RAM, - params, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} - fill_A_S_M!( - imply.A, - imply.S, - imply.M, - imply.A_indices, - imply.S_indices, - imply.M_indices, - params, - ) + mul!(imply.F⨉I_A⁻¹S, imply.F⨉I_A⁻¹, imply.S) + mul!(imply.Σ, imply.F⨉I_A⁻¹S, imply.F⨉I_A⁻¹') - @. imply.I_A = -imply.A - @view(imply.I_A[diagind(imply.I_A)]) .+= 1 - - imply.I_A⁻¹ = LinearAlgebra.inv!(factorize(imply.I_A)) - mul!(imply.F⨉I_A⁻¹, imply.F, imply.I_A⁻¹) - - Σ_RAM!(imply.Σ, imply.F⨉I_A⁻¹, imply.S, imply.F⨉I_A⁻¹S) - - if T - μ_RAM!(imply.μ, imply.F⨉I_A⁻¹, imply.M) + if MeanStruct(imply) === HasMeanStruct + mul!(imply.μ, imply.F⨉I_A⁻¹, imply.M) end end -hessian!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) -objective_gradient!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) -objective_hessian!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) -gradient_hessian!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) -objective_gradient_hessian!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) - ############################################################################################ ### Recommended methods ############################################################################################ @@ -268,48 +247,10 @@ function update_observed(imply::RAM, observed::SemObserved; kwargs...) end end -############################################################################################ -### additional methods -############################################################################################ - -Σ(imply::RAM) = imply.Σ -μ(imply::RAM) = imply.μ - -A(imply::RAM) = imply.A -S(imply::RAM) = imply.S -F(imply::RAM) = imply.F -M(imply::RAM) = imply.M - -∇A(imply::RAM) = imply.∇A -∇S(imply::RAM) = imply.∇S -∇M(imply::RAM) = imply.∇M - -A_indices(imply::RAM) = imply.A_indices -S_indices(imply::RAM) = imply.S_indices -M_indices(imply::RAM) = imply.M_indices - -F⨉I_A⁻¹(imply::RAM) = imply.F⨉I_A⁻¹ -F⨉I_A⁻¹S(imply::RAM) = imply.F⨉I_A⁻¹S -I_A(imply::RAM) = imply.I_A -I_A⁻¹(imply::RAM) = imply.I_A⁻¹ # only for gradient available! - -has_meanstructure(imply::RAM) = imply.has_meanstructure - -ram_matrices(imply::RAM) = imply.ram_matrices - ############################################################################################ ### additional functions ############################################################################################ -function Σ_RAM!(Σ, F⨉I_A⁻¹, S, pre2) - mul!(pre2, F⨉I_A⁻¹, S) - mul!(Σ, pre2, F⨉I_A⁻¹') -end - -function μ_RAM!(μ, F⨉I_A⁻¹, M) - mul!(μ, F⨉I_A⁻¹, M) -end - function check_acyclic(A_pre, n_par, A_indices) # fill copy of A-matrix with random parameters A_rand = copy(A_pre) diff --git a/src/imply/RAM/symbolic.jl b/src/imply/RAM/symbolic.jl index b8da20148..9a96942ae 100644 --- a/src/imply/RAM/symbolic.jl +++ b/src/imply/RAM/symbolic.jl @@ -62,8 +62,10 @@ and for models with a meanstructure, the model implied means are computed as \mu = F(I-A)^{-1}M ``` """ -struct RAMSymbolic{F1, F2, F3, A1, A2, A3, S1, S2, S3, V2, F4, A4, F5, A5, B} <: +struct RAMSymbolic{MS, F1, F2, F3, A1, A2, A3, S1, S2, S3, V2, F4, A4, F5, A5} <: SemImplySymbolic + meanstruct::MS + hessianeval::ExactHessian Σ_function::F1 ∇Σ_function::F2 ∇²Σ_function::F3 @@ -78,7 +80,9 @@ struct RAMSymbolic{F1, F2, F3, A1, A2, A3, S1, S2, S3, V2, F4, A4, F5, A5, B} <: μ::A4 ∇μ_function::F5 ∇μ::A5 - has_meanstructure::B + + RAMSymbolic{MS}(args...) where {MS <: MeanStruct} = + new{MS, map(typeof, args)...}(MS(), ExactHessian(), args...) end ############################################################################################ @@ -140,7 +144,7 @@ function RAMSymbolic(; ∇Σ = nothing end - if hessian & !approximate_hessian + if hessian && !approximate_hessian n_sig = length(Σ_symbolic) ∇²Σ_symbolic_vec = [Symbolics.sparsehessian(σᵢ, [par...]) for σᵢ in vec(Σ_symbolic)] @@ -161,7 +165,7 @@ function RAMSymbolic(; # μ if meanstructure - has_meanstructure = Val(true) + MS = HasMeanStruct μ_symbolic = eval_μ_symbolic(M, I_A⁻¹, F) μ_function = Symbolics.build_function(μ_symbolic, par, expression = Val{false})[2] μ = zeros(size(μ_symbolic)) @@ -175,14 +179,14 @@ function RAMSymbolic(; ∇μ = nothing end else - has_meanstructure = Val(false) + MS = NoMeanStruct μ_function = nothing μ = nothing ∇μ_function = nothing ∇μ = nothing end - return RAMSymbolic( + return RAMSymbolic{MS}( Σ_function, ∇Σ_function, ∇²Σ_function, @@ -197,7 +201,6 @@ function RAMSymbolic(; μ, ∇μ_function, ∇μ, - has_meanstructure, ) end @@ -205,32 +208,25 @@ end ### objective, gradient, hessian ############################################################################################ -# dispatch on meanstructure -objective!(imply::RAMSymbolic, par, model) = - objective!(imply, par, model, imply.has_meanstructure) -gradient!(imply::RAMSymbolic, par, model) = - gradient!(imply, par, model, imply.has_meanstructure) - -# objective -function objective!(imply::RAMSymbolic, par, model, has_meanstructure::Val{T}) where {T} +function update!( + targets::EvaluationTargets, + imply::RAMSymbolic, + model::AbstractSemSingle, + par, +) imply.Σ_function(imply.Σ, par) - T && imply.μ_function(imply.μ, par) -end + if MeanStruct(imply) === HasMeanStruct + imply.μ_function(imply.μ, par) + end -# gradient -function gradient!(imply::RAMSymbolic, par, model, has_meanstructure::Val{T}) where {T} - objective!(imply, par, model, imply.has_meanstructure) - imply.∇Σ_function(imply.∇Σ, par) - T && imply.∇μ_function(imply.∇μ, par) + if is_gradient_required(targets) || is_hessian_required(targets) + imply.∇Σ_function(imply.∇Σ, par) + if MeanStruct(imply) === HasMeanStruct + imply.∇μ_function(imply.∇μ, par) + end + end end -# other methods -hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) -objective_gradient!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) -objective_hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) -gradient_hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) -objective_gradient_hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) - ############################################################################################ ### Recommended methods ############################################################################################ @@ -243,25 +239,6 @@ function update_observed(imply::RAMSymbolic, observed::SemObserved; kwargs...) end end -############################################################################################ -### additional methods -############################################################################################ - -Σ(imply::RAMSymbolic) = imply.Σ -∇Σ(imply::RAMSymbolic) = imply.∇Σ -∇²Σ(imply::RAMSymbolic) = imply.∇²Σ - -μ(imply::RAMSymbolic) = imply.μ -∇μ(imply::RAMSymbolic) = imply.∇μ - -Σ_function(imply::RAMSymbolic) = imply.Σ_function -∇Σ_function(imply::RAMSymbolic) = imply.∇Σ_function -∇²Σ_function(imply::RAMSymbolic) = imply.∇²Σ_function - -has_meanstructure(imply::RAMSymbolic) = imply.has_meanstructure - -ram_matrices(imply::RAMSymbolic) = imply.ram_matrices - ############################################################################################ ### additional functions ############################################################################################ diff --git a/src/imply/empty.jl b/src/imply/empty.jl index f1af2ec42..66373bc1b 100644 --- a/src/imply/empty.jl +++ b/src/imply/empty.jl @@ -26,6 +26,8 @@ model per group and an additional model with `ImplyEmpty` and `SemRidge` for the Subtype of `SemImply`. """ struct ImplyEmpty{V2} <: SemImply + hessianeval::ExactHessian + meanstruct::NoMeanStruct ram_matrices::V2 end @@ -34,16 +36,14 @@ end ############################################################################################ function ImplyEmpty(; specification, kwargs...) - return ImplyEmpty(convert(RAMMatrices, specification)) + return ImplyEmpty(hessianeval, meanstruct, convert(RAMMatrices, specification)) end ############################################################################################ ### methods ############################################################################################ -objective!(imply::ImplyEmpty, par, model) = nothing -gradient!(imply::ImplyEmpty, par, model) = nothing -hessian!(imply::ImplyEmpty, par, model) = nothing +update!(targets::EvaluationTargets, imply::ImplyEmpty, par, model) = nothing ############################################################################################ ### Recommended methods diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index cd5d0270f..20c81b831 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -25,6 +25,7 @@ Analytic gradients are available. Subtype of `SemLossFunction`. """ mutable struct SemFIML{INV, C, L, O, M, IM, I, T, W} <: SemLossFunction + hessianeval::ExactHessian inverses::INV #preallocated inverses of imp_cov choleskys::C #preallocated choleskys logdets::L #logdets of implied covmats @@ -65,6 +66,7 @@ function SemFIML(; observed, specification, kwargs...) [findall(x -> !(x[1] ∈ ind || x[2] ∈ ind), ∇ind) for ind in patterns_not(observed)] return SemFIML( + ExactHessian(), inverses, choleskys, logdets, @@ -82,43 +84,32 @@ end ### methods ############################################################################################ -function objective!(semfiml::SemFIML, params, model) - if !check_fiml(semfiml, model) - return non_posdef_return(params) - end - - prepare_SemFIML!(semfiml, model) - - objective = F_FIML(pattern_rows(observed(model)), semfiml, model, params) - return objective / nsamples(observed(model)) -end - -function gradient!(semfiml::SemFIML, params, model) - if !check_fiml(semfiml, model) - return ones(eltype(params), size(params)) - end - - prepare_SemFIML!(semfiml, model) - - gradient = - ∇F_FIML(pattern_rows(observed(model)), semfiml, model) / nsamples(observed(model)) - return gradient -end +function evaluate!( + objective, + gradient, + hessian, + semfiml::SemFIML, + implied::SemImply, + model::AbstractSemSingle, + params, +) + isnothing(hessian) || error("Hessian not implemented for FIML") -function objective_gradient!(semfiml::SemFIML, params, model) if !check_fiml(semfiml, model) - return non_posdef_return(params), ones(eltype(params), size(params)) + isnothing(objective) || (objective = non_posdef_return(params)) + isnothing(gradient) || fill!(gradient, 1) + return objective end prepare_SemFIML!(semfiml, model) - objective = - F_FIML(pattern_rows(observed(model)), semfiml, model, params) / - nsamples(observed(model)) - gradient = - ∇F_FIML(pattern_rows(observed(model)), semfiml, model) / nsamples(observed(model)) + scale = inv(nsamples(observed(model))) + obs_rows = pattern_rows(observed(model)) + isnothing(objective) || (objective = scale * F_FIML(obs_rows, semfiml, model, params)) + isnothing(gradient) || + (∇F_FIML!(gradient, obs_rows, semfiml, model); gradient .*= scale) - return objective, gradient + return objective end ############################################################################################ @@ -133,13 +124,11 @@ update_observed(lossfun::SemFIML, observed::SemObserved; kwargs...) = ############################################################################################ function F_one_pattern(meandiff, inverse, obs_cov, logdet, N) - F = logdet - F += meandiff' * inverse * meandiff + F = logdet + dot(meandiff, inverse, meandiff) if N > one(N) F += dot(obs_cov, inverse) end - F = N * F - return F + return F * N end function ∇F_one_pattern(μ_diff, Σ⁻¹, S, pattern, ∇ind, N, Jμ, JΣ, model) @@ -155,26 +144,23 @@ function ∇F_one_pattern(μ_diff, Σ⁻¹, S, pattern, ∇ind, N, Jμ, JΣ, mod end end -function ∇F_fiml_outer(JΣ, Jμ, imply::SemImplySymbolic, model, semfiml) - G = transpose(JΣ' * ∇Σ(imply) - Jμ' * ∇μ(imply)) - return G +function ∇F_fiml_outer!(G, JΣ, Jμ, imply::SemImplySymbolic, model, semfiml) + mul!(G, imply.∇Σ', JΣ) # should be transposed + mul!(G, imply.∇μ', Jμ, -1, 1) end -function ∇F_fiml_outer(JΣ, Jμ, imply, model, semfiml) - Iₙ = sparse(1.0I, size(A(imply))...) - P = kron(F⨉I_A⁻¹(imply), F⨉I_A⁻¹(imply)) - Q = kron(S(imply) * I_A⁻¹(imply)', Iₙ) +function ∇F_fiml_outer!(G, JΣ, Jμ, imply, model, semfiml) + Iₙ = sparse(1.0I, size(imply.A)...) + P = kron(imply.F⨉I_A⁻¹, imply.F⨉I_A⁻¹) + Q = kron(imply.S * imply.I_A⁻¹', Iₙ) Q .+= semfiml.commutator * Q - ∇Σ = P * (∇S(imply) + Q * ∇A(imply)) - - ∇μ = - F⨉I_A⁻¹(imply) * ∇M(imply) + - kron((I_A⁻¹(imply) * M(imply))', F⨉I_A⁻¹(imply)) * ∇A(imply) + ∇Σ = P * (imply.∇S + Q * imply.∇A) - G = transpose(JΣ' * ∇Σ - Jμ' * ∇μ) + ∇μ = imply.F⨉I_A⁻¹ * imply.∇M + kron((imply.I_A⁻¹ * imply.M)', imply.F⨉I_A⁻¹) * imply.∇A - return G + mul!(G, ∇Σ', JΣ) # actually transposed + mul!(G, ∇μ', Jμ, -1, 1) end function F_FIML(rows, semfiml, model, params) @@ -191,7 +177,7 @@ function F_FIML(rows, semfiml, model, params) return F end -function ∇F_FIML(rows, semfiml, model) +function ∇F_FIML!(G, rows, semfiml, model) Jμ = zeros(nobserved_vars(model)) JΣ = zeros(nobserved_vars(model)^2) @@ -208,7 +194,7 @@ function ∇F_FIML(rows, semfiml, model) model, ) end - return ∇F_fiml_outer(JΣ, Jμ, imply(model), model, semfiml) + return ∇F_fiml_outer!(G, JΣ, Jμ, imply(model), model, semfiml) end function prepare_SemFIML!(semfiml, model) @@ -233,9 +219,9 @@ end copy_per_pattern!(semfiml, model::M where {M <: AbstractSem}) = copy_per_pattern!( semfiml.inverses, - Σ(imply(model)), + imply(model).Σ, semfiml.imp_mean, - μ(imply(model)), + imply(model).μ, patterns(observed(model)), ) @@ -248,7 +234,7 @@ function batch_cholesky!(semfiml, model) end function check_fiml(semfiml, model) - copyto!(semfiml.imp_inv, Σ(imply(model))) + copyto!(semfiml.imp_inv, imply(model).Σ) a = cholesky!(Symmetric(semfiml.imp_inv); check = false) return isposdef(a) end diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index 7811cda7f..e81d27de7 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -27,26 +27,29 @@ Analytic gradients are available, and for models without a meanstructure, also a ## Implementation Subtype of `SemLossFunction`. """ -struct SemML{INV, M, M2, B, V} <: SemLossFunction +struct SemML{HE <: HessianEval, INV, M, M2} <: SemLossFunction + hessianeval::HE Σ⁻¹::INV Σ⁻¹Σₒ::M meandiff::M2 - approximate_hessian::B - has_meanstructure::V + + SemML{HE}(args...) where {HE <: HessianEval} = + new{HE, map(typeof, args)...}(HE(), args...) end ############################################################################################ ### Constructors ############################################################################################ -function SemML(; observed, meanstructure = false, approximate_hessian = false, kwargs...) - isnothing(obs_mean(observed)) ? meandiff = nothing : meandiff = copy(obs_mean(observed)) - return SemML( - similar(obs_cov(observed)), - similar(obs_cov(observed)), +function SemML(; observed::SemObserved, approximate_hessian::Bool = false, kwargs...) + obsmean = obs_mean(observed) + obscov = obs_cov(observed) + meandiff = isnothing(obsmean) ? nothing : copy(obsmean) + + return SemML{approximate_hessian ? ApproxHessian : ExactHessian}( + similar(obscov), + similar(obscov), meandiff, - approximate_hessian, - Val(meanstructure), ) end @@ -54,511 +57,151 @@ end ### objective, gradient, hessian methods ############################################################################################ -# first, dispatch for meanstructure -objective!(semml::SemML, par, model::AbstractSemSingle) = - objective!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) -gradient!(semml::SemML, par, model::AbstractSemSingle) = - gradient!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) -hessian!(semml::SemML, par, model::AbstractSemSingle) = - hessian!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) -objective_gradient!(semml::SemML, par, model::AbstractSemSingle) = - objective_gradient!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) -objective_hessian!(semml::SemML, par, model::AbstractSemSingle) = - objective_hessian!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) -gradient_hessian!(semml::SemML, par, model::AbstractSemSingle) = - gradient_hessian!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) -objective_gradient_hessian!(semml::SemML, par, model::AbstractSemSingle) = - objective_gradient_hessian!( - semml::SemML, - par, - model, - semml.has_meanstructure, - imply(model), - ) - ############################################################################################ ### Symbolic Imply Types -function objective!( +function evaluate!( + objective, + gradient, + hessian, semml::SemML, - par, + implied::SemImplySymbolic, model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::SemImplySymbolic, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return non_posdef_return(par) - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - #mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + par, +) + if !isnothing(hessian) + (MeanStruct(implied) === HasMeanStruct) && + throw(DomainError(H, "hessian of ML + meanstructure is not available")) + end - if T - μ₋ = μₒ - μ - return ld + dot(Σ⁻¹, Σₒ) + dot(μ₋, Σ⁻¹, μ₋) - else - return ld + dot(Σ⁻¹, Σₒ) - end + Σ = implied.Σ + Σₒ = obs_cov(observed(model)) + Σ⁻¹Σₒ = semml.Σ⁻¹Σₒ + Σ⁻¹ = semml.Σ⁻¹ + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) + if !isposdef(Σ_chol) + #@warn "∑⁻¹ is not positive definite" + isnothing(objective) || (objective = non_posdef_return(par)) + isnothing(gradient) || fill!(gradient, 1) + isnothing(hessian) || copyto!(hessian, I) + return objective end -end + ld = logdet(Σ_chol) + Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + isnothing(objective) || (objective = ld + tr(Σ⁻¹Σₒ)) -function gradient!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::SemImplySymbolic, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇Σ = ∇Σ(imply(model)), - μ = μ(imply(model)), - ∇μ = ∇μ(imply(model)), + if MeanStruct(implied) === HasMeanStruct + μ = implied.μ μₒ = obs_mean(observed(model)) + μ₋ = μₒ - μ - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return ones(eltype(par), size(par)) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - - if T - μ₋ = μₒ - μ + isnothing(objective) || (objective += dot(μ₋, Σ⁻¹, μ₋)) + if !isnothing(gradient) + ∇Σ = implied.∇Σ + ∇μ = implied.∇μ μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ - gradient = vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹ - μ₋ᵀΣ⁻¹'μ₋ᵀΣ⁻¹)' * ∇Σ - 2 * μ₋ᵀΣ⁻¹ * ∇μ - return gradient' - else - gradient = vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹)' * ∇Σ - return gradient' + mul!(gradient, ∇Σ', vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹ - μ₋ᵀΣ⁻¹'μ₋ᵀΣ⁻¹)) + mul!(gradient, ∇μ', μ₋ᵀΣ⁻¹', -2, 1) end - end -end - -function hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{false}, - imp::SemImplySymbolic, -) - let Σ = Σ(imply(model)), - ∇Σ = ∇Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return diagm(fill(one(eltype(par)), length(par))) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - - if semml.approximate_hessian - hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ - else - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ - # inner - J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' - ∇²Σ_function!(∇²Σ, J, par) - # outer - H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹) - hessian = ∇Σ' * H_outer * ∇Σ - hessian .+= ∇²Σ - end - - return hessian - end -end - -function hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, - imp::SemImplySymbolic, -) - throw(DomainError(H, "hessian of ML + meanstructure is not available")) -end - -function objective_gradient!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::SemImplySymbolic, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)), - ∇Σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if !isposdef(Σ_chol) - return non_posdef_return(par), ones(eltype(par), size(par)) - else - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - - if T - μ₋ = μₒ - μ - μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ - - objective = ld + tr(Σ⁻¹Σₒ) + dot(μ₋, Σ⁻¹, μ₋) - gradient = vec(Σ⁻¹ * (I - Σₒ * Σ⁻¹ - μ₋ * μ₋ᵀΣ⁻¹))' * ∇Σ - 2 * μ₋ᵀΣ⁻¹ * ∇μ - return objective, gradient' - else - objective = ld + tr(Σ⁻¹Σₒ) - gradient = (vec(Σ⁻¹) - vec(Σ⁻¹Σₒ * Σ⁻¹))' * ∇Σ - return objective, gradient' - end + elseif !isnothing(gradient) || !isnothing(hessian) + ∇Σ = implied.∇Σ + Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ + J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' + if !isnothing(gradient) + mul!(gradient, ∇Σ', J') end - end -end - -function objective_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::SemImplySymbolic, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇Σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if !isposdef(Σ_chol) - return non_posdef_return(par), diagm(fill(one(eltype(par)), length(par))) - else - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - objective = ld + tr(Σ⁻¹Σₒ) - - if semml.approximate_hessian - hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ + if !isnothing(hessian) + if HessianEval(semml) === ApproxHessian + mul!(hessian, ∇Σ' * kron(Σ⁻¹, Σ⁻¹), ∇Σ, 2, 0) else - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ + ∇²Σ_function! = implied.∇²Σ_function + ∇²Σ = implied.∇²Σ # inner - J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' ∇²Σ_function!(∇²Σ, J, par) # outer H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹) - hessian = ∇Σ' * H_outer * ∇Σ + mul!(hessian, ∇Σ' * H_outer, ∇Σ) hessian .+= ∇²Σ end - - return objective, hessian - end - end -end - -function objective_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, - imp::SemImplySymbolic, -) - throw(DomainError(H, "hessian of ML + meanstructure is not available")) -end - -function gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{false}, - imp::SemImplySymbolic, -) - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇Σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || - return ones(eltype(par), size(par)), diagm(fill(one(eltype(par)), length(par))) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ - - J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' - gradient = J * ∇Σ - - if semml.approximate_hessian - hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ - else - # inner - ∇²Σ_function!(∇²Σ, J, par) - # outer - H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹) - hessian = ∇Σ' * H_outer * ∇Σ - hessian .+= ∇²Σ end - - return gradient', hessian end -end - -function gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, - imp::SemImplySymbolic, -) - throw(DomainError(H, "hessian of ML + meanstructure is not available")) -end - -function objective_gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{false}, - imp::SemImplySymbolic, -) - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇Σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if !isposdef(Σ_chol) - objective = non_posdef_return(par) - gradient = ones(eltype(par), size(par)) - hessian = diagm(fill(one(eltype(par)), length(par))) - return objective, gradient, hessian - end - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - objective = ld + tr(Σ⁻¹Σₒ) - - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ - - J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' - gradient = J * ∇Σ - - if semml.approximate_hessian - hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ - else - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ - # inner - ∇²Σ_function!(∇²Σ, J, par) - # outer - H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹) - hessian = ∇Σ' * H_outer * ∇Σ - hessian .+= ∇²Σ - end - - return objective, gradient', hessian - end -end - -function objective_gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, - imp::SemImplySymbolic, -) - throw(DomainError(H, "hessian of ML + meanstructure is not available")) + return objective end ############################################################################################ ### Non-Symbolic Imply Types -# no hessians ------------------------------------------------------------------------------ - -function hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure, imp::RAM) - throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) -end - -function objective_hessian!( +function evaluate!( + objective, + gradient, + hessian, semml::SemML, - par, + implied::RAM, model::AbstractSemSingle, - has_meanstructure, - imp::RAM, -) - throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) -end - -function gradient_hessian!( - semml::SemML, par, - model::AbstractSemSingle, - has_meanstructure, - imp::RAM, ) - throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) -end - -function objective_gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure, - imp::RAM, -) - throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) -end - -# objective, gradient ---------------------------------------------------------------------- + if !isnothing(hessian) + error("hessian of ML + non-symbolic imply type is not available") + end -function objective!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::RAM, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)) + Σ = implied.Σ + Σₒ = obs_cov(observed(model)) + Σ⁻¹Σₒ = semml.Σ⁻¹Σₒ + Σ⁻¹ = semml.Σ⁻¹ + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) + if !isposdef(Σ_chol) + #@warn "Σ⁻¹ is not positive definite" + isnothing(objective) || (objective = non_posdef_return(par)) + isnothing(gradient) || fill!(gradient, 1) + isnothing(hessian) || copyto!(hessian, I) + return objective + end + ld = logdet(Σ_chol) + Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return non_posdef_return(par) - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + if !isnothing(objective) + objective = ld + tr(Σ⁻¹Σₒ) - if T + if MeanStruct(implied) === HasMeanStruct + μ = implied.μ + μₒ = obs_mean(observed(model)) μ₋ = μₒ - μ - return ld + tr(Σ⁻¹Σₒ) + dot(μ₋, Σ⁻¹, μ₋) - else - return ld + tr(Σ⁻¹Σₒ) + objective += dot(μ₋, Σ⁻¹, μ₋) end end -end - -function gradient!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::RAM, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - S = S(imply(model)), - M = M(imply(model)), - F⨉I_A⁻¹ = F⨉I_A⁻¹(imply(model)), - I_A⁻¹ = I_A⁻¹(imply(model)), - ∇A = ∇A(imply(model)), - ∇S = ∇S(imply(model)), - ∇M = ∇M(imply(model)), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)) - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return ones(eltype(par), size(par)) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + if !isnothing(gradient) + S = implied.S + F⨉I_A⁻¹ = implied.F⨉I_A⁻¹ + I_A⁻¹ = implied.I_A⁻¹ + ∇A = implied.∇A + ∇S = implied.∇S C = F⨉I_A⁻¹' * (I - Σ⁻¹Σₒ) * Σ⁻¹ * F⨉I_A⁻¹ - gradient = 2vec(C * S * I_A⁻¹')'∇A + vec(C)'∇S - - if T + mul!(gradient, ∇A', vec(C * S * I_A⁻¹'), 2, 0) + mul!(gradient, ∇S', vec(C), 1, 1) + + if MeanStruct(implied) === HasMeanStruct + μ = implied.μ + μₒ = obs_mean(observed(model)) + ∇M = implied.∇M + M = implied.M μ₋ = μₒ - μ μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ k = μ₋ᵀΣ⁻¹ * F⨉I_A⁻¹ - - gradient .+= -2k * ∇M - 2vec(k' * (M' + k * S) * I_A⁻¹')'∇A - vec(k'k)'∇S + mul!(gradient, ∇M', k', -2, 1) + mul!(gradient, ∇A', vec(k' * (I_A⁻¹ * (M + S * k'))'), -2, 1) + mul!(gradient, ∇S', vec(k'k), -1, 1) end - - return gradient' end -end - -function objective_gradient!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::RAM, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - S = S(imply(model)), - M = M(imply(model)), - F⨉I_A⁻¹ = F⨉I_A⁻¹(imply(model)), - I_A⁻¹ = I_A⁻¹(imply(model)), - ∇A = ∇A(imply(model)), - ∇S = ∇S(imply(model)), - ∇M = ∇M(imply(model)), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)) - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if !isposdef(Σ_chol) - objective = non_posdef_return(par) - gradient = ones(eltype(par), size(par)) - return objective, gradient - else - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - objective = ld + tr(Σ⁻¹Σₒ) - - C = F⨉I_A⁻¹' * (I - Σ⁻¹Σₒ) * Σ⁻¹ * F⨉I_A⁻¹ - gradient = 2vec(C * S * I_A⁻¹')'∇A + vec(C)'∇S - - if T - μ₋ = μₒ - μ - objective += dot(μ₋, Σ⁻¹, μ₋) - - μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ - k = μ₋ᵀΣ⁻¹ * F⨉I_A⁻¹ - gradient .+= -2k * ∇M - 2vec(k' * (M' + k * S) * I_A⁻¹')'∇A - vec(k'k)'∇S - end - - return objective, gradient' - end - end + return objective end ############################################################################################ @@ -578,7 +221,7 @@ end ############################################################################################ update_observed(lossfun::SemML, observed::SemObservedMissing; kwargs...) = - throw(ArgumentError("ML estimation does not work with missing data - use FIML instead")) + error("ML estimation does not work with missing data - use FIML instead") function update_observed(lossfun::SemML, observed::SemObserved; kwargs...) if size(lossfun.Σ⁻¹) == size(obs_cov(observed)) @@ -587,10 +230,3 @@ function update_observed(lossfun::SemML, observed::SemObserved; kwargs...) return SemML(; observed = observed, kwargs...) end end - -############################################################################################ -### additional methods -############################################################################################ - -Σ⁻¹(semml::SemML) = semml.Σ⁻¹ -Σ⁻¹Σₒ(semml::SemML) = semml.Σ⁻¹Σₒ diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl index 8fcc84a99..9702a9cf4 100644 --- a/src/loss/WLS/WLS.jl +++ b/src/loss/WLS/WLS.jl @@ -38,18 +38,20 @@ Analytic gradients are available, and for models without a meanstructure, also a ## Implementation Subtype of `SemLossFunction`. """ -struct SemWLS{Vt, St, B, C, B2} <: SemLossFunction +struct SemWLS{HE <: HessianEval, Vt, St, C} <: SemLossFunction + hessianeval::HE V::Vt σₒ::St - approximate_hessian::B V_μ::C - has_meanstructure::B2 end ############################################################################################ ### Constructors ############################################################################################ +SemWLS{HE}(args...) where {HE <: HessianEval} = + SemWLS{HE, map(typeof, args)...}(HE(), args...) + function SemWLS(; observed, wls_weight_matrix = nothing, @@ -58,269 +60,98 @@ function SemWLS(; meanstructure = false, kwargs..., ) - ind = CartesianIndices(obs_cov(observed)) - ind = filter(x -> (x[1] >= x[2]), ind) - s = obs_cov(observed)[ind] + nobs_vars = nobserved_vars(observed) + tril_ind = filter(x -> (x[1] >= x[2]), CartesianIndices(obs_cov(observed))) + s = obs_cov(observed)[tril_ind] # compute V here if isnothing(wls_weight_matrix) - D = duplication_matrix(nobserved_vars(observed)) + D = duplication_matrix(nobs_vars) S = inv(obs_cov(observed)) S = kron(S, S) wls_weight_matrix = 0.5 * (D' * S * D) + else + size(wls_weight_matrix) == (length(tril_ind), length(tril_ind)) || + DimensionMismatch( + "wls_weight_matrix has to be of size $(length(tril_ind))×$(length(tril_ind))", + ) end if meanstructure if isnothing(wls_weight_matrix_mean) wls_weight_matrix_mean = inv(obs_cov(observed)) + else + size(wls_weight_matrix_mean) == (nobs_vars, nobs_vars) || DimensionMismatch( + "wls_weight_matrix_mean has to be of size $(nobs_vars)×$(nobs_vars)", + ) end else + isnothing(wls_weight_matrix_mean) || + @warn "Ignoring wls_weight_matrix_mean since meanstructure is disabled" wls_weight_matrix_mean = nothing end + HE = approximate_hessian ? ApproxHessian : ExactHessian - return SemWLS( - wls_weight_matrix, - s, - approximate_hessian, - wls_weight_matrix_mean, - Val(meanstructure), - ) + return SemWLS{HE}(wls_weight_matrix, s, wls_weight_matrix_mean) end ############################################################################ ### methods ############################################################################ -objective!(semwls::SemWLS, par, model::AbstractSemSingle) = - objective!(semwls::SemWLS, par, model, semwls.has_meanstructure) -gradient!(semwls::SemWLS, par, model::AbstractSemSingle) = - gradient!(semwls::SemWLS, par, model, semwls.has_meanstructure) -hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = - hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) - -objective_gradient!(semwls::SemWLS, par, model::AbstractSemSingle) = - objective_gradient!(semwls::SemWLS, par, model, semwls.has_meanstructure) -objective_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = - objective_hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) -gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = - gradient_hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) - -objective_gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = - objective_gradient_hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) - -function objective!( +function evaluate!( + objective, + gradient, + hessian, semwls::SemWLS, - par, + implied::SemImplySymbolic, model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} - let σ = Σ(imply(model)), - μ = μ(imply(model)), - σₒ = semwls.σₒ, - μₒ = obs_mean(observed(model)), - V = semwls.V, - V_μ = semwls.V_μ, - - σ₋ = σₒ - σ - - if T - μ₋ = μₒ - μ - return dot(σ₋, V, σ₋) + dot(μ₋, V_μ, μ₋) - else - return dot(σ₋, V, σ₋) - end - end -end - -function gradient!( - semwls::SemWLS, par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} - let σ = Σ(imply(model)), - μ = μ(imply(model)), - σₒ = semwls.σₒ, - μₒ = obs_mean(observed(model)), - V = semwls.V, - V_μ = semwls.V_μ, - ∇σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)) - - σ₋ = σₒ - σ - - if T - μ₋ = μₒ - μ - return -2 * (σ₋' * V * ∇σ + μ₋' * V_μ * ∇μ)' - else - return -2 * (σ₋' * V * ∇σ)' - end +) + if !isnothing(hessian) && (MeanStruct(implied) === HasMeanStruct) + error("hessian of WLS with meanstructure is not available") end -end -function hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} - let σ = Σ(imply(model)), - σₒ = semwls.σₒ, - V = semwls.V, - ∇σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) + V = semwls.V + ∇σ = implied.∇Σ - σ₋ = σₒ - σ + σ = implied.Σ + σₒ = semwls.σₒ + σ₋ = σₒ - σ - if T - throw(DomainError(H, "hessian of WLS with meanstructure is not available")) - else - hessian = 2 * ∇σ' * V * ∇σ - if !semwls.approximate_hessian - J = -2 * (σ₋' * semwls.V)' - ∇²Σ_function!(∇²Σ, J, par) - hessian .+= ∇²Σ - end - return hessian + isnothing(objective) || (objective = dot(σ₋, V, σ₋)) + if !isnothing(gradient) + if issparse(∇σ) + gradient .= (σ₋' * V * ∇σ)' + else # save one allocation + mul!(gradient, σ₋' * V, ∇σ) # actually transposed, but should be fine for vectors end + gradient .*= -2 end -end - -function objective_gradient!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} - let σ = Σ(imply(model)), - μ = μ(imply(model)), - σₒ = semwls.σₒ, - μₒ = obs_mean(observed(model)), - V = semwls.V, - V_μ = semwls.V_μ, - ∇σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)) - - σ₋ = σₒ - σ - - if T - μ₋ = μₒ - μ - objective = dot(σ₋, V, σ₋) + dot(μ₋', V_μ, μ₋) - gradient = -2 * (σ₋' * V * ∇σ + μ₋' * V_μ * ∇μ)' - return objective, gradient - else - objective = dot(σ₋, V, σ₋) - gradient = -2 * (σ₋' * V * ∇σ)' - return objective, gradient - end + isnothing(hessian) || (mul!(hessian, ∇σ' * V, ∇σ, 2, 0)) + if !isnothing(hessian) && (HessianEval(semwls) === ExactHessian) + ∇²Σ_function! = implied.∇²Σ_function + ∇²Σ = implied.∇²Σ + J = -2 * (σ₋' * semwls.V)' + ∇²Σ_function!(∇²Σ, J, par) + hessian .+= ∇²Σ end -end - -function objective_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} - let σ = Σ(imply(model)), - σₒ = semwls.σₒ, - V = semwls.V, - ∇σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - σ₋ = σₒ - σ - - objective = dot(σ₋, V, σ₋) - - hessian = 2 * ∇σ' * V * ∇σ - if !semwls.approximate_hessian - J = -2 * (σ₋' * semwls.V)' - ∇²Σ_function!(∇²Σ, J, par) - hessian .+= ∇²Σ + if MeanStruct(implied) === HasMeanStruct + μ = implied.μ + μₒ = obs_mean(observed(model)) + μ₋ = μₒ - μ + V_μ = semwls.V_μ + if !isnothing(objective) + objective += dot(μ₋, V_μ, μ₋) end - - return objective, hessian - end -end - -objective_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, -) = throw(DomainError(H, "hessian of WLS with meanstructure is not available")) - -function gradient_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{false}, -) - let σ = Σ(imply(model)), - σₒ = semwls.σₒ, - V = semwls.V, - ∇σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - σ₋ = σₒ - σ - - gradient = -2 * (σ₋' * V * ∇σ)' - - hessian = 2 * ∇σ' * V * ∇σ - if !semwls.approximate_hessian - J = -2 * (σ₋' * semwls.V)' - ∇²Σ_function!(∇²Σ, J, par) - hessian .+= ∇²Σ + if !isnothing(gradient) + mul!(gradient, (V_μ * implied.∇μ)', μ₋, -2, 1) end - - return gradient, hessian end -end - -gradient_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, -) = throw(DomainError(H, "hessian of WLS with meanstructure is not available")) -function objective_gradient_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{false}, -) - let σ = Σ(imply(model)), - σₒ = semwls.σₒ, - V = semwls.V, - ∇σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - σ₋ = σₒ - σ - - objective = dot(σ₋, V, σ₋) - gradient = -2 * (σ₋' * V * ∇σ)' - hessian = 2 * ∇σ' * V * ∇σ - if !semwls.approximate_hessian - J = -2 * (σ₋' * semwls.V)' - ∇²Σ_function!(∇²Σ, J, par) - hessian .+= ∇²Σ - end - return objective, gradient, hessian - end + return objective end -objective_gradient_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, -) = throw(DomainError(H, "hessian of WLS with meanstructure is not available")) - ############################################################################################ ### Recommended methods ############################################################################################ diff --git a/src/loss/constant/constant.jl b/src/loss/constant/constant.jl index f3165b541..cb5157346 100644 --- a/src/loss/constant/constant.jl +++ b/src/loss/constant/constant.jl @@ -26,6 +26,7 @@ Analytic gradients and hessians are available. Subtype of `SemLossFunction`. """ struct SemConstant{C} <: SemLossFunction + hessianeval::ExactHessian c::C end @@ -34,16 +35,17 @@ end ############################################################################################ function SemConstant(; constant_loss, kwargs...) - return SemConstant(constant_loss) + return SemConstant(ExactHessian(), constant_loss) end ############################################################################################ ### methods ############################################################################################ -objective!(constant::SemConstant, par, model) = constant.c -gradient!(constant::SemConstant, par, model) = zero(par) -hessian!(constant::SemConstant, par, model) = zeros(eltype(par), length(par), length(par)) +objective(constant::SemConstant, model::AbstractSem, par) = constant.c +gradient(constant::SemConstant, model::AbstractSem, par) = zero(par) +hessian(constant::SemConstant, model::AbstractSem, par) = + zeros(eltype(par), length(par), length(par)) ############################################################################################ ### Recommended methods diff --git a/src/loss/regularization/ridge.jl b/src/loss/regularization/ridge.jl index a61dd2af0..6ec59ec39 100644 --- a/src/loss/regularization/ridge.jl +++ b/src/loss/regularization/ridge.jl @@ -30,6 +30,7 @@ Analytic gradients and hessians are available. Subtype of `SemLossFunction`. """ struct SemRidge{P, W1, W2, GT, HT} <: SemLossFunction + hessianeval::ExactHessian α::P which::W1 which_H::W2 @@ -62,11 +63,11 @@ function SemRidge(; which_ridge = getindex.(Ref(par2ind), which_ridge) end end - which = [CartesianIndex(x) for x in which_ridge] which_H = [CartesianIndex(x, x) for x in which_ridge] return SemRidge( + ExactHessian(), α_ridge, - which, + which_ridge, which_H, zeros(parameter_type, nparams), zeros(parameter_type, nparams, nparams), @@ -77,15 +78,16 @@ end ### methods ############################################################################################ -objective!(ridge::SemRidge, par, model) = @views ridge.α * sum(x -> x^2, par[ridge.which]) +objective(ridge::SemRidge, model::AbstractSem, par) = + @views ridge.α * sum(abs2, par[ridge.which]) -function gradient!(ridge::SemRidge, par, model) - @views ridge.gradient[ridge.which] .= 2 * ridge.α * par[ridge.which] +function gradient(ridge::SemRidge, model::AbstractSem, par) + @views ridge.gradient[ridge.which] .= (2 * ridge.α) * par[ridge.which] return ridge.gradient end -function hessian!(ridge::SemRidge, par, model) - @views @. ridge.hessian[ridge.which_H] += ridge.α * 2.0 +function hessian(ridge::SemRidge, model::AbstractSem, par) + @views @. ridge.hessian[ridge.which_H] .= 2 * ridge.α return ridge.hessian end diff --git a/src/objective_gradient_hessian.jl b/src/objective_gradient_hessian.jl index 2debbcd40..f07b572aa 100644 --- a/src/objective_gradient_hessian.jl +++ b/src/objective_gradient_hessian.jl @@ -1,298 +1,150 @@ -############################################################################################ -# methods for AbstractSem -############################################################################################ - -function objective!(model::AbstractSemSingle, params) - objective!(imply(model), params, model) - return objective!(loss(model), params, model) -end - -function gradient!(gradient, model::AbstractSemSingle, params) - fill!(gradient, zero(eltype(gradient))) - gradient!(imply(model), params, model) - gradient!(gradient, loss(model), params, model) -end - -function hessian!(hessian, model::AbstractSemSingle, params) - fill!(hessian, zero(eltype(hessian))) - hessian!(imply(model), params, model) - hessian!(hessian, loss(model), params, model) -end - -function objective_gradient!(gradient, model::AbstractSemSingle, params) - fill!(gradient, zero(eltype(gradient))) - objective_gradient!(imply(model), params, model) - objective_gradient!(gradient, loss(model), params, model) -end - -function objective_hessian!(hessian, model::AbstractSemSingle, params) - fill!(hessian, zero(eltype(hessian))) - objective_hessian!(imply(model), params, model) - objective_hessian!(hessian, loss(model), params, model) -end - -function gradient_hessian!(gradient, hessian, model::AbstractSemSingle, params) - fill!(gradient, zero(eltype(gradient))) - fill!(hessian, zero(eltype(hessian))) - gradient_hessian!(imply(model), params, model) - gradient_hessian!(gradient, hessian, loss(model), params, model) -end - -function objective_gradient_hessian!(gradient, hessian, model::AbstractSemSingle, params) - fill!(gradient, zero(eltype(gradient))) - fill!(hessian, zero(eltype(hessian))) - objective_gradient_hessian!(imply(model), params, model) - return objective_gradient_hessian!(gradient, hessian, loss(model), params, model) -end +"Specifies whether objective (O), gradient (G) or hessian (H) evaluation is required" +struct EvaluationTargets{O, G, H} end + +EvaluationTargets(objective, gradient, hessian) = + EvaluationTargets{!isnothing(objective), !isnothing(gradient), !isnothing(hessian)}() + +# convenience methods to check type params +is_objective_required(::EvaluationTargets{O}) where {O} = O +is_gradient_required(::EvaluationTargets{<:Any, G}) where {G} = G +is_hessian_required(::EvaluationTargets{<:Any, <:Any, H}) where {H} = H + +# return the tuple of the required results +(::EvaluationTargets{true, false, false})(objective, gradient, hessian) = objective +(::EvaluationTargets{false, true, false})(objective, gradient, hessian) = gradient +(::EvaluationTargets{false, false, true})(objective, gradient, hessian) = hessian +(::EvaluationTargets{true, true, false})(objective, gradient, hessian) = + (objective, gradient) +(::EvaluationTargets{true, false, true})(objective, gradient, hessian) = + (objective, hessian) +(::EvaluationTargets{false, true, true})(objective, gradient, hessian) = (gradient, hessian) +(::EvaluationTargets{true, true, true})(objective, gradient, hessian) = + (objective, gradient, hessian) + +(targets::EvaluationTargets)(arg_tuple::Tuple) = targets(arg_tuple...) + +# dispatch on SemImply +evaluate!(objective, gradient, hessian, loss::SemLossFunction, model::AbstractSem, params) = + evaluate!(objective, gradient, hessian, loss, imply(model), model, params) + +# fallback method +function evaluate!(obj, grad, hess, loss::SemLossFunction, imply::SemImply, model, params) + isnothing(obj) || (obj = objective(loss, imply, model, params)) + isnothing(grad) || copyto!(grad, gradient(loss, imply, model, params)) + isnothing(hess) || copyto!(hess, hessian(loss, imply, model, params)) + return obj +end + +# fallback methods +objective(f::SemLossFunction, imply::SemImply, model, params) = objective(f, model, params) +gradient(f::SemLossFunction, imply::SemImply, model, params) = gradient(f, model, params) +hessian(f::SemLossFunction, imply::SemImply, model, params) = hessian(f, model, params) + +# fallback method for SemImply that calls update_xxx!() methods +function update!(targets::EvaluationTargets, imply::SemImply, model, params) + is_objective_required(targets) && update_objective!(imply, model, params) + is_gradient_required(targets) && update_gradient!(imply, model, params) + is_hessian_required(targets) && update_hessian!(imply, model, params) +end + +# guess objective type +objective_type(model::AbstractSem, params::Any) = Float64 +objective_type(model::AbstractSem, params::AbstractVector{T}) where {T <: Number} = T +objective_zero(model::AbstractSem, params::Any) = zero(objective_type(model, params)) + +objective_type(objective::T, gradient, hessian) where {T <: Number} = T +objective_type( + objective::Nothing, + gradient::AbstractArray{T}, + hessian, +) where {T <: Number} = T +objective_type( + objective::Nothing, + gradient::Nothing, + hessian::AbstractArray{T}, +) where {T <: Number} = T +objective_zero(objective, gradient, hessian) = + zero(objective_type(objective, gradient, hessian)) ############################################################################################ -# methods for SemFiniteDiff +# methods for AbstractSem ############################################################################################ -gradient!(gradient, model::SemFiniteDiff, par) = - FiniteDiff.finite_difference_gradient!(gradient, x -> objective!(model, x), par) - -hessian!(hessian, model::SemFiniteDiff, par) = - FiniteDiff.finite_difference_hessian!(hessian, x -> objective!(model, x), par) - -function objective_gradient!(gradient, model::SemFiniteDiff, params) - gradient!(gradient, model, params) - return objective!(model, params) -end - -# other methods -function gradient_hessian!(gradient, hessian, model::SemFiniteDiff, params) - gradient!(gradient, model, params) - hessian!(hessian, model, params) -end - -function objective_hessian!(hessian, model::SemFiniteDiff, params) - hessian!(hessian, model, params) - return objective!(model, params) -end - -function objective_gradient_hessian!(gradient, hessian, model::SemFiniteDiff, params) - hessian!(hessian, model, params) - return objective_gradient!(gradient, model, params) +function evaluate!(objective, gradient, hessian, model::AbstractSemSingle, params) + targets = EvaluationTargets(objective, gradient, hessian) + # update imply state, its gradient and hessian (if required) + update!(targets, imply(model), model, params) + return evaluate!( + !isnothing(objective) ? zero(objective) : nothing, + gradient, + hessian, + loss(model), + model, + params, + ) end ############################################################################################ -# methods for SemLoss +# methods for SemFiniteDiff (approximate gradient and hessian with finite differences of objective) ############################################################################################ -function objective!(loss::SemLoss, par, model) - return mapreduce( - (fun, weight) -> weight * objective!(fun, par, model), - +, - loss.functions, - loss.weights, - ) -end - -function gradient!(gradient, loss::SemLoss, par, model) - for (lossfun, w) in zip(loss.functions, loss.weights) - new_gradient = gradient!(lossfun, par, model) - gradient .+= w * new_gradient - end -end - -function hessian!(hessian, loss::SemLoss, par, model) - for (lossfun, w) in zip(loss.functions, loss.weights) - hessian .+= w * hessian!(lossfun, par, model) - end -end - -function objective_gradient!(gradient, loss::SemLoss, par, model) - return mapreduce( - (fun, weight) -> objective_gradient_wrap_(gradient, fun, par, model, weight), - +, - loss.functions, - loss.weights, - ) -end - -function objective_hessian!(hessian, loss::SemLoss, par, model) - return mapreduce( - (fun, weight) -> objective_hessian_wrap_(hessian, fun, par, model, weight), - +, - loss.functions, - loss.weights, - ) -end - -function gradient_hessian!(gradient, hessian, loss::SemLoss, par, model) - for (lossfun, w) in zip(loss.functions, loss.weights) - new_gradient, new_hessian = gradient_hessian!(lossfun, par, model) - gradient .+= w * new_gradient - hessian .+= w * new_hessian +function evaluate!(objective, gradient, hessian, model::SemFiniteDiff, params) + function obj(p) + # recalculate imply state for p + update!(EvaluationTargets{true, false, false}(), imply(model), model, p) + evaluate!( + objective_zero(objective, gradient, hessian), + nothing, + nothing, + loss(model), + model, + p, + ) end + isnothing(gradient) || FiniteDiff.finite_difference_gradient!(gradient, obj, params) + isnothing(hessian) || FiniteDiff.finite_difference_hessian!(hessian, obj, params) + return !isnothing(objective) ? obj(params) : nothing end -function objective_gradient_hessian!(gradient, hessian, loss::SemLoss, par, model) - return mapreduce( - (fun, weight) -> - objective_gradient_hessian_wrap_(gradient, hessian, fun, par, model, weight), - +, - loss.functions, - loss.weights, - ) -end - -# wrapper to update gradient/hessian and return objective value -function objective_gradient_wrap_(gradient, lossfun, par, model, w) - new_objective, new_gradient = objective_gradient!(lossfun, par, model) - gradient .+= w * new_gradient - return w * new_objective -end - -function objective_hessian_wrap_(hessian, lossfun, par, model, w) - new_objective, new_hessian = objective_hessian!(lossfun, par, model) - hessian .+= w * new_hessian - return w * new_objective -end - -function objective_gradient_hessian_wrap_(gradient, hessian, lossfun, par, model, w) - new_objective, new_gradient, new_hessian = - objective_gradient_hessian!(lossfun, par, model) - gradient .+= w * new_gradient - hessian .+= w * new_hessian - return w * new_objective -end +objective(model::AbstractSem, params) = + evaluate!(objective_zero(model, params), nothing, nothing, model, params) ############################################################################################ -# methods for SemEnsemble +# methods for SemLoss (weighted sum of individual SemLossFunctions) ############################################################################################ -function objective!(ensemble::SemEnsemble, par) - return mapreduce( - (model, weight) -> weight * objective!(model, par), - +, - ensemble.sems, - ensemble.weights, - ) -end - -function gradient!(gradient, ensemble::SemEnsemble, par) - fill!(gradient, zero(eltype(gradient))) - for (model, w) in zip(ensemble.sems, ensemble.weights) - gradient_new = similar(gradient) - gradient!(gradient_new, model, par) - gradient .+= w * gradient_new - end -end - -function hessian!(hessian, ensemble::SemEnsemble, par) - fill!(hessian, zero(eltype(hessian))) - for (model, w) in zip(ensemble.sems, ensemble.weights) - hessian_new = similar(hessian) - hessian!(hessian_new, model, par) - hessian .+= w * hessian_new - end -end - -function objective_gradient!(gradient, ensemble::SemEnsemble, par) - fill!(gradient, zero(eltype(gradient))) - return mapreduce( - (model, weight) -> objective_gradient_wrap_(gradient, model, par, weight), - +, - ensemble.sems, - ensemble.weights, - ) -end - -function objective_hessian!(hessian, ensemble::SemEnsemble, par) - fill!(hessian, zero(eltype(hessian))) - return mapreduce( - (model, weight) -> objective_hessian_wrap_(hessian, model, par, weight), - +, - ensemble.sems, - ensemble.weights, - ) -end - -function gradient_hessian!(gradient, hessian, ensemble::SemEnsemble, par) - fill!(gradient, zero(eltype(gradient))) - fill!(hessian, zero(eltype(hessian))) - for (model, w) in zip(ensemble.sems, ensemble.weights) - new_gradient = similar(gradient) - new_hessian = similar(hessian) - - gradient_hessian!(new_gradient, new_hessian, model, par) - - gradient .+= w * new_gradient - hessian .+= w * new_hessian +function evaluate!(objective, gradient, hessian, loss::SemLoss, model::AbstractSem, params) + isnothing(objective) || (objective = zero(objective)) + isnothing(gradient) || fill!(gradient, zero(eltype(gradient))) + isnothing(hessian) || fill!(hessian, zero(eltype(hessian))) + f_grad = isnothing(gradient) ? nothing : similar(gradient) + f_hess = isnothing(hessian) ? nothing : similar(hessian) + for (f, weight) in zip(loss.functions, loss.weights) + f_obj = evaluate!(objective, f_grad, f_hess, f, model, params) + isnothing(objective) || (objective += weight * f_obj) + isnothing(gradient) || (gradient .+= weight * f_grad) + isnothing(hessian) || (hessian .+= weight * f_hess) end -end - -function objective_gradient_hessian!(gradient, hessian, ensemble::SemEnsemble, par) - fill!(gradient, zero(eltype(gradient))) - fill!(hessian, zero(eltype(hessian))) - return mapreduce( - (model, weight) -> - objective_gradient_hessian_wrap_(gradient, hessian, model, par, model, weight), - +, - ensemble.sems, - ensemble.weights, - ) -end - -# wrapper to update gradient/hessian and return objective value -function objective_gradient_wrap_(gradient, model::AbstractSemSingle, par, w) - gradient_pre = similar(gradient) - new_objective = objective_gradient!(gradient_pre, model, par) - gradient .+= w * gradient_pre - return w * new_objective -end - -function objective_hessian_wrap_(hessian, model::AbstractSemSingle, par, w) - hessian_pre = similar(hessian) - new_objective = objective_hessian!(hessian_pre, model, par) - hessian .+= w * new_hessian - return w * new_objective -end - -function objective_gradient_hessian_wrap_( - gradient, - hessian, - model::AbstractSemSingle, - par, - w, -) - gradient_pre = similar(gradient) - hessian_pre = similar(hessian) - new_objective = objective_gradient_hessian!(gradient_pre, hessian_pre, model, par) - gradient .+= w * new_gradient - hessian .+= w * new_hessian - return w * new_objective + return objective end ############################################################################################ -# generic methods for loss functions +# methods for SemEnsemble (weighted sum of individual AbstractSemSingle models) ############################################################################################ -function objective_gradient!(lossfun::SemLossFunction, par, model) - objective = objective!(lossfun::SemLossFunction, par, model) - gradient = gradient!(lossfun::SemLossFunction, par, model) - return objective, gradient -end - -function objective_hessian!(lossfun::SemLossFunction, par, model) - objective = objective!(lossfun::SemLossFunction, par, model) - hessian = hessian!(lossfun::SemLossFunction, par, model) - return objective, hessian -end - -function gradient_hessian!(lossfun::SemLossFunction, par, model) - gradient = gradient!(lossfun::SemLossFunction, par, model) - hessian = hessian!(lossfun::SemLossFunction, par, model) - return gradient, hessian -end - -function objective_gradient_hessian!(lossfun::SemLossFunction, par, model) - objective = objective!(lossfun::SemLossFunction, par, model) - gradient = gradient!(lossfun::SemLossFunction, par, model) - hessian = hessian!(lossfun::SemLossFunction, par, model) - return objective, gradient, hessian +function evaluate!(objective, gradient, hessian, ensemble::SemEnsemble, params) + isnothing(objective) || (objective = zero(objective)) + isnothing(gradient) || fill!(gradient, zero(eltype(gradient))) + isnothing(hessian) || fill!(hessian, zero(eltype(hessian))) + sem_grad = isnothing(gradient) ? nothing : similar(gradient) + sem_hess = isnothing(hessian) ? nothing : similar(hessian) + for (sem, weight) in zip(ensemble.sems, ensemble.weights) + sem_obj = evaluate!(objective, sem_grad, sem_hess, sem, params) + isnothing(objective) || (objective += weight * sem_obj) + isnothing(gradient) || (gradient .+= weight * sem_grad) + isnothing(hessian) || (hessian .+= weight * sem_hess) + end + return objective end # throw an error by default if gradient! and hessian! are not implemented @@ -303,35 +155,6 @@ end hessian!(lossfun::SemLossFunction, par, model) = throw(ArgumentError("hessian for $(typeof(lossfun).name.wrapper) is not available")) =# -############################################################################################ -# generic methods for imply -############################################################################################ - -function objective_gradient!(semimp::SemImply, par, model) - objective!(semimp::SemImply, par, model) - gradient!(semimp::SemImply, par, model) - return nothing -end - -function objective_hessian!(semimp::SemImply, par, model) - objective!(semimp::SemImply, par, model) - hessian!(semimp::SemImply, par, model) - return nothing -end - -function gradient_hessian!(semimp::SemImply, par, model) - gradient!(semimp::SemImply, par, model) - hessian!(semimp::SemImply, par, model) - return nothing -end - -function objective_gradient_hessian!(semimp::SemImply, par, model) - objective!(semimp::SemImply, par, model) - gradient!(semimp::SemImply, par, model) - hessian!(semimp::SemImply, par, model) - return nothing -end - ############################################################################################ # Documentation ############################################################################################ @@ -377,3 +200,18 @@ To implement a new `AbstractSem` subtype, you can add a method for hessian!(hessian, model::MyNewType, params) """ function hessian! end + +objective!(model::AbstractSem, params) = + evaluate!(objective_zero(model, params), nothing, nothing, model, params) +gradient!(gradient, model::AbstractSem, params) = + evaluate!(nothing, gradient, nothing, model, params) +hessian!(hessian, model::AbstractSem, params) = + evaluate!(nothing, nothing, hessian, model, params) +objective_gradient!(gradient, model::AbstractSem, params) = + evaluate!(objective_zero(model, params), gradient, nothing, model, params) +objective_hessian!(hessian, model::AbstractSem, params) = + evaluate!(objective_zero(model, params), nothing, hessian, model, params) +gradient_hessian!(gradient, hessian, model::AbstractSem, params) = + evaluate!(nothing, gradient, hessian, model, params) +objective_gradient_hessian!(gradient, hessian, model::AbstractSem, params) = + evaluate!(objective_zero(model, params), gradient, hessian, model, params) diff --git a/src/optimizer/NLopt.jl b/src/optimizer/NLopt.jl index ffe2ffed0..7f4f61e1e 100644 --- a/src/optimizer/NLopt.jl +++ b/src/optimizer/NLopt.jl @@ -2,16 +2,6 @@ ### connect to NLopt.jl as backend ############################################################################################ -# wrapper to define the objective -function sem_wrap_nlopt(par, G, model::AbstractSem) - need_gradient = length(G) != 0 - if need_gradient - return objective_gradient!(G, model, par) - else - return objective!(model, par) - end -end - mutable struct NLoptResult result::Any problem::Any @@ -34,13 +24,14 @@ end # sem_fit method function sem_fit( - model::Sem{O, I, L, D}; + optimizer::SemOptimizerNLopt, + model::AbstractSem; start_val = start_val, kwargs..., -) where {O, I, L, D <: SemOptimizerNLopt} +) # starting values - if !isa(start_val, Vector) + if !isa(start_val, AbstractVector) start_val = start_val(model; kwargs...) end @@ -51,42 +42,14 @@ function sem_fit( length(start_val), ) set_NLopt_constraints!(opt, model.optimizer) - opt.min_objective = (par, G) -> sem_wrap_nlopt(par, G, model) - - if !isnothing(model.optimizer.local_algorithm) - opt_local = construct_NLopt_problem( - model.optimizer.local_algorithm, - model.optimizer.local_options, - length(start_val), + opt.min_objective = + (par, G) -> evaluate!( + eltype(par), + !isnothing(G) && !isempty(G) ? G : nothing, + nothing, + model, + par, ) - opt.local_optimizer = opt_local - end - - # fit - result = NLopt.optimize(opt, start_val) - - return SemFit_NLopt(result, model, start_val, opt) -end - -function sem_fit( - model::SemEnsemble{N, T, V, D, S}; - start_val = start_val, - kwargs..., -) where {N, T, V, D <: SemOptimizerNLopt, S} - - # starting values - if !isa(start_val, Vector) - start_val = start_val(model; kwargs...) - end - - # construct the NLopt problem - opt = construct_NLopt_problem( - model.optimizer.algorithm, - model.optimizer.options, - length(start_val), - ) - set_NLopt_constraints!(opt, model.optimizer) - opt.min_objective = (par, G) -> sem_wrap_nlopt(par, G, model) if !isnothing(model.optimizer.local_algorithm) opt_local = construct_NLopt_problem( diff --git a/src/optimizer/documentation.jl b/src/optimizer/documentation.jl index 83b4f7a98..7c17e6ce2 100644 --- a/src/optimizer/documentation.jl +++ b/src/optimizer/documentation.jl @@ -20,3 +20,10 @@ sem_fit( ``` """ function sem_fit end + +# dispatch on optimizer +sem_fit(model::AbstractSem; kwargs...) = sem_fit(model.optimizer, model; kwargs...) + +# fallback method +sem_fit(optimizer::SemOptimizer, model::AbstractSem; kwargs...) = + error("Optimizer $(optimizer) support not implemented.") diff --git a/src/optimizer/optim.jl b/src/optimizer/optim.jl index 68617fdb8..bb1bf507e 100644 --- a/src/optimizer/optim.jl +++ b/src/optimizer/optim.jl @@ -1,30 +1,4 @@ ## connect to Optim.jl as backend -function sem_wrap_optim(par, F, G, H, model::AbstractSem) - if !isnothing(F) - if !isnothing(G) - if !isnothing(H) - return objective_gradient_hessian!(G, H, model, par) - else - return objective_gradient!(G, model, par) - end - else - if !isnothing(H) - return objective_hessian!(H, model, par) - else - return objective!(model, par) - end - end - else - if !isnothing(G) - if !isnothing(H) - gradient_hessian!(G, H, model, par) - else - gradient!(G, model, par) - end - end - end - return nothing -end function SemFit( optimization_result::Optim.MultivariateOptimizationResults, @@ -45,34 +19,17 @@ n_iterations(res::Optim.MultivariateOptimizationResults) = Optim.iterations(res) convergence(res::Optim.MultivariateOptimizationResults) = Optim.converged(res) function sem_fit( - model::AbstractSemSingle{O, I, L, D}; + optim::SemOptimizerOptim, + model::AbstractSem; start_val = start_val, kwargs..., -) where {O, I, L, D <: SemOptimizerOptim} - if !isa(start_val, Vector) - start_val = start_val(model; kwargs...) - end - - result = Optim.optimize( - Optim.only_fgh!((F, G, H, par) -> sem_wrap_optim(par, F, G, H, model)), - start_val, - model.optimizer.algorithm, - model.optimizer.options, - ) - return SemFit(result, model, start_val) -end - -function sem_fit( - model::SemEnsemble{N, T, V, D, S}; - start_val = start_val, - kwargs..., -) where {N, T, V, D <: SemOptimizerOptim, S} - if !isa(start_val, Vector) +) + if !isa(start_val, AbstractVector) start_val = start_val(model; kwargs...) end result = Optim.optimize( - Optim.only_fgh!((F, G, H, par) -> sem_wrap_optim(par, F, G, H, model)), + Optim.only_fgh!((F, G, H, par) -> evaluate!(F, G, H, model, par)), start_val, model.optimizer.algorithm, model.optimizer.options, diff --git a/src/types.jl b/src/types.jl index 0493da8fa..020f6e77d 100644 --- a/src/types.jl +++ b/src/types.jl @@ -10,6 +10,32 @@ abstract type AbstractSemSingle{O, I, L, D} <: AbstractSem end "Supertype for all collections of multiple SEMs" abstract type AbstractSemCollection <: AbstractSem end +"Meanstructure trait for `SemImply` subtypes" +abstract type MeanStruct end +"Indicates that `SemImply` subtype supports mean structure" +struct HasMeanStruct <: MeanStruct end +"Indicates that `SemImply` subtype does not support mean structure" +struct NoMeanStruct <: MeanStruct end + +# default implementation +MeanStruct(::Type{T}) where {T} = + hasfield(T, :meanstruct) ? fieldtype(T, :meanstruct) : + error("Objects of type $T do not support MeanStruct trait") + +MeanStruct(semobj) = MeanStruct(typeof(semobj)) + +"Hessian Evaluation trait for `SemImply` and `SemLossFunction` subtypes" +abstract type HessianEval end +struct ApproxHessian <: HessianEval end +struct ExactHessian <: HessianEval end + +# default implementation +HessianEval(::Type{T}) where {T} = + hasfield(T, :hessianeval) ? fieldtype(T, :hessianeval) : + error("Objects of type $T do not support HessianEval trait") + +HessianEval(semobj) = HessianEval(typeof(semobj)) + "Supertype for all loss functions of SEMs. If you want to implement a custom loss function, it should be a subtype of `SemLossFunction`." abstract type SemLossFunction end diff --git a/test/examples/multigroup/build_models.jl b/test/examples/multigroup/build_models.jl index 70d2bb914..2e1af38a2 100644 --- a/test/examples/multigroup/build_models.jl +++ b/test/examples/multigroup/build_models.jl @@ -70,7 +70,7 @@ end grad = similar(start_test) gradient!(grad, model_ml_multigroup, rand(36)) grad_fd = FiniteDiff.finite_difference_gradient( - x -> objective!(model_ml_multigroup, x), + Base.Fix1(SEM.objective, model_ml_multigroup), start_test, ) @@ -114,7 +114,11 @@ end # ML estimation - user defined loss function ############################################################################################ -struct UserSemML <: SemLossFunction end +struct UserSemML <: SemLossFunction + hessianeval::ExactHessian + + UserSemML() = new(ExactHessian()) +end ############################################################################################ ### functors @@ -122,7 +126,7 @@ struct UserSemML <: SemLossFunction end using LinearAlgebra: isposdef, logdet, tr, inv -function SEM.objective!(semml::UserSemML, params, model::AbstractSem) +function SEM.objective(ml::UserSemML, model::AbstractSem, params) Σ = imply(model).Σ Σₒ = SEM.obs_cov(observed(model)) if !isposdef(Σ) diff --git a/test/unit_tests/specification.jl b/test/unit_tests/specification.jl index e307d60f2..ef9fc73a1 100644 --- a/test/unit_tests/specification.jl +++ b/test/unit_tests/specification.jl @@ -29,7 +29,7 @@ end fixed_and_labeled_graph = @StenoGraph begin # measurement model - visual → fixed(1.0)*label(:λ)*x1 + visual → fixed(1.0) * label(:λ) * x1 end @testset "ParameterTable" begin @@ -42,7 +42,7 @@ end @test_throws ArgumentError("It is not allowed to label fixed parameters.") ParameterTable( fixed_and_labeled_graph, observed_vars = obs_vars, - latent_vars = lat_vars + latent_vars = lat_vars, ) partable = @inferred( ParameterTable(graph, observed_vars = obs_vars, latent_vars = lat_vars)