StructuralEquationModels · Maximilian-Stefan-Ernst · Feb 4, 2025 · Feb 4, 2025 · Feb 4, 2025 · Feb 4, 2025
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,4 +1,6 @@
 [deps]
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd"
+ProximalAlgorithms = "140ffc9f-1907-541a-a177-7475e0a401e9"
 ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537"
diff --git a/docs/src/assets/concept.svg b/docs/src/assets/concept.svg
diff --git a/docs/src/assets/concept_typed.svg b/docs/src/assets/concept_typed.svg
diff --git a/docs/src/developer/extending.md b/docs/src/developer/extending.md
@@ -1,6 +1,6 @@
 # Extending the package
 
-As discussed in the section on [Model Construction](@ref), every Structural Equation Model (`Sem`) consists of four parts:
+As discussed in the section on [Model Construction](@ref), every Structural Equation Model (`Sem`) consists of three (four with the optimizer) parts:
 
 ![SEM concept typed](../assets/concept_typed.svg)
 

diff --git a/docs/src/developer/implied.md b/docs/src/developer/implied.md
@@ -10,78 +10,89 @@ struct MyImplied <: SemImplied
 end
 ```
 
-and at least a method to compute the objective
+and a method to update!:
 
 ```julia
 import StructuralEquationModels: objective!
 
-function objective!(implied::MyImplied, par, model::AbstractSemSingle)
-    ...
-    return nothing
-end
-```
+function update!(targets::EvaluationTargets, implied::MyImplied, model::AbstractSemSingle, params)
 
-This method should compute and store things you want to make available to the loss functions, and returns `nothing`. For example, as we have seen in [Second example - maximum likelihood](@ref), the `RAM` implied type computes the model-implied covariance matrix and makes it available via `Σ(implied)`.
-To make stored computations available to loss functions, simply write a function - for example, for the `RAM` implied type we defined
+    if is_objective_required(targets)
+        ...
+    end
 
-```julia
-Σ(implied::RAM) = implied.Σ
+    if is_gradient_required(targets)
+        ...
+    end
+    if is_hessian_required(targets)
+        ...
+    end
+
+end
 ```
 
-Additionally, you can specify methods for `gradient` and `hessian` as well as the combinations described in [Custom loss functions](@ref).
+As you can see, `update` gets passed as a first argument `targets`, which is telling us whether the objective value, gradient, and/or hessian are needed.
+We can then use the functions `is_..._required` and conditional on what the optimizer needs, we can compute and store things we want to make available to the loss functions. For example, as we have seen in [Second example - maximum likelihood](@ref), the `RAM` implied type computes the model-implied covariance matrix and makes it available via `implied.Σ`.
 
-The last thing nedded to make it work is a method for `nparams` that takes your implied type and returns the number of parameters of the model:
 
-```julia
-nparams(implied::MyImplied) = ...
-```
 
 Just as described in [Custom loss functions](@ref), you may define a constructor. Typically, this will depend on the `specification = ...` argument that can be a `ParameterTable` or a `RAMMatrices` object.
 
 We implement an `ImpliedEmpty` type in our package that does nothing but serving as an `implied` field in case you are using a loss function that does not need any implied type at all. You may use it as a template for defining your own implied type, as it also shows how to handle the specification objects:
 
 ```julia
-############################################################################
+############################################################################################
 ### Types
-############################################################################
+############################################################################################
+"""
+Empty placeholder for models that don't need an implied part.
+(For example, models that only regularize parameters.)
 
-struct ImpliedEmpty{V, V2} <: SemImplied
-    identifier::V2
-    n_par::V
-end
+# Constructor
 
-############################################################################
-### Constructors
-############################################################################
+    ImpliedEmpty(;specification, kwargs...)
+
+# Arguments
+- `specification`: either a `RAMMatrices` or `ParameterTable` object
+
+# Examples
+A multigroup model with ridge regularization could be specified as a `SemEnsemble` with one
+model per group and an additional model with `ImpliedEmpty` and `SemRidge` for the regularization part.
 
-function ImpliedEmpty(;
-        specification,
-        kwargs...)
+# Extended help
 
-        ram_matrices = RAMMatrices(specification)
-        identifier = StructuralEquationModels.identifier(ram_matrices)
+## Interfaces
+- `params(::RAMSymbolic) `-> Vector of parameter labels
+- `nparams(::RAMSymbolic)` -> Number of parameters
 
-        n_par = length(ram_matrices.parameters)
+## Implementation
+Subtype of `SemImplied`.
+"""
+struct ImpliedEmpty{A, B, C} <: SemImplied
+    hessianeval::A
+    meanstruct::B
+    ram_matrices::C
+end
+
+############################################################################################
+### Constructors
+############################################################################################
 
-        return ImpliedEmpty(identifier, n_par)
+function ImpliedEmpty(;specification, meanstruct = NoMeanStruct(), hessianeval = ExactHessian(), kwargs...)
+    return ImpliedEmpty(hessianeval, meanstruct, convert(RAMMatrices, specification))
 end
 
-############################################################################
+############################################################################################
 ### methods
-############################################################################
+############################################################################################
 
-objective!(implied::ImpliedEmpty, par, model) = nothing
-gradient!(implied::ImpliedEmpty, par, model) = nothing
-hessian!(implied::ImpliedEmpty, par, model) = nothing
+update!(targets::EvaluationTargets, implied::ImpliedEmpty, par, model) = nothing
 
-############################################################################
+############################################################################################
 ### Recommended methods
-############################################################################
-
-identifier(implied::ImpliedEmpty) = implied.identifier
-n_par(implied::ImpliedEmpty) = implied.n_par
+############################################################################################
 
 update_observed(implied::ImpliedEmpty, observed::SemObserved; kwargs...) = implied
 ```
 
-As you see, similar to [Custom loss functions](@ref) we implement a method for `update_observed`. Additionally, you should store the `identifier` from the specification object and write a method for `identifier`, as this will make it possible to access parameter indices by label.
+As you see, similar to [Custom loss functions](@ref) we implement a method for `update_observed`.
diff --git a/docs/src/developer/loss.md b/docs/src/developer/loss.md
@@ -20,17 +20,22 @@ end
 ```
 We store the hyperparameter α and the indices I of the parameters we want to regularize.
 
-Additionaly, we need to define a *method* to compute the objective:
+Additionaly, we need to define a *method* of the function `evaluate!` to compute the objective:
 
 ```@example loss
-import StructuralEquationModels: objective!
+import StructuralEquationModels: evaluate!
 
-objective!(ridge::Ridge, par, model::AbstractSemSingle) = ridge.α*sum(par[ridge.I].^2)
+evaluate!(objective::Number, gradient::Nothing, hessian::Nothing, ridge::Ridge, model::AbstractSem, par) = 
+  ridge.α * sum(i -> par[i]^2, ridge.I)
 ```
 
+The function `evaluate!` recognizes by the types of the arguments `objective`, `gradient` and `hessian` whether it should compute the objective value, gradient or hessian of the model w.r.t. the parameters.
+In this case, `gradient` and `hessian` are of type `Nothing`, signifying that they should not be computed, but only the objective value.
+
 That's all we need to make it work! For example, we can now fit [A first model](@ref) with ridge regularization:
 
 We first give some parameters labels to be able to identify them as targets for the regularization:
+
 ```@example loss
 observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8]
 latent_vars = [:ind60, :dem60, :dem65]
@@ -65,7 +70,7 @@ partable = ParameterTable(
     observed_vars = observed_vars
 )
 
-parameter_indices  = param_indices([:a, :b, :c], partable)
+parameter_indices = getindex.([param_indices(partable)], [:a, :b, :c])
 myridge = Ridge(0.01, parameter_indices)
 
 model = SemFiniteDiff(
@@ -86,15 +91,23 @@ Note that the last argument to the `objective!` method is the whole model. There
 By far the biggest improvements in performance will result from specifying analytical gradients. We can do this for our example:
 
 ```@example loss
-import StructuralEquationModels: gradient!
-
-function gradient!(ridge::Ridge, par, model::AbstractSemSingle)
-    gradient = zero(par)
-    gradient[ridge.I] .= 2*ridge.α*par[ridge.I]
-    return gradient
+function evaluate!(objective, gradient, hessian::Nothing, ridge::Ridge, model::AbstractSem, par)
+    # compute gradient
+    if !isnothing(gradient)
+        fill!(gradient, 0)
+        gradient[ridge.I] .= 2 * ridge.α * par[ridge.I]
+    end
+    # compute objective
+    if !isnothing(objective) 
+        return ridge.α * sum(i -> par[i]^2, ridge.I)
+    end
 end
 ```
 
+As you can see, in this method definition, both `objective` and `gradient` can be different from `nothing`.
+We then check whether to compute the objective value and/or the gradient with `isnothing(objective)`/`isnothing(gradient)`.
+This syntax makes it possible to compute objective value and gradient at the same time, which is beneficial when the the objective and gradient share common computations.
+
 Now, instead of specifying a `SemFiniteDiff`, we can use the normal `Sem` constructor:
 
 ```@example loss
@@ -119,46 +132,7 @@ using BenchmarkTools
 
 The exact results of those benchmarks are of course highly depended an your system (processor, RAM, etc.), but you should see that the median computation time with analytical gradients drops to about 5% of the computation without analytical gradients.
 
-Additionally, you may provide analytic hessians by writing a method of the form
-
-```julia
-function hessian!(ridge::Ridge, par, model::AbstractSemSingle)
-    ...
-    return hessian
-end
-```
-
-however, this will only matter if you use an optimization algorithm that makes use of the hessians. Our default algorithmn `LBFGS` from the package `Optim.jl` does not use hessians (for example, the `Newton` algorithmn from the same package does).
-
-To improve performance even more, you can write a method of the form
-
-```julia
-function objective_gradient!(ridge::Ridge, par, model::AbstractSemSingle)
-    ...
-    return objective, gradient
-end
-```
-
-This is beneficial when the computation of the objective and gradient share common computations. For example, in maximum likelihood estimation, the model implied covariance matrix has to be inverted to both compute the objective and gradient. Whenever the optimization algorithmn asks for the objective value and gradient at the same point, we call `objective_gradient!` and only have to do the shared computations - in this case the matrix inversion - once.
-
-If you want to do hessian-based optimization, there are also the following methods:
-
-```julia
-function objective_hessian!(ridge::Ridge, par, model::AbstractSemSingle)
-    ...
-    return objective, hessian
-end
-
-function gradient_hessian!(ridge::Ridge, par, model::AbstractSemSingle)
-    ...
-    return gradient, hessian
-end
-
-function objective_gradient_hessian!(ridge::Ridge, par, model::AbstractSemSingle)
-    ...
-    return objective, gradient, hessian
-end
-```
+Additionally, you may provide analytic hessians by writing a respective method for `evaluate!`. However, this will only matter if you use an optimization algorithm that makes use of the hessians. Our default algorithmn `LBFGS` from the package `Optim.jl` does not use hessians (for example, the `Newton` algorithmn from the same package does).
 
 ## Convenient
 
@@ -241,11 +215,11 @@ With this information, we write can implement maximum likelihood optimization as
 struct MaximumLikelihood <: SemLossFunction end
 
 using LinearAlgebra
-import StructuralEquationModels: Σ, obs_cov, objective!
+import StructuralEquationModels: obs_cov, evaluate!
 
-function objective!(semml::MaximumLikelihood, parameters, model::AbstractSem)
+function evaluate!(objective::Number, gradient::Nothing, hessian::Nothing, semml::MaximumLikelihood, model::AbstractSem, par)
     # access the model implied and observed covariance matrices
-    Σᵢ = Σ(implied(model))
+    Σᵢ = implied(model).Σ
     Σₒ = obs_cov(observed(model))
     # compute the objective
     if isposdef(Symmetric(Σᵢ)) # is the model implied covariance matrix positive definite?