From 30e74d7024c50588cf1f7591cee4cddea0eb887c Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 2 Feb 2025 14:38:23 -0800 Subject: [PATCH 01/20] minus2ll(): cleanup method signatures --- src/frontend/fit/fitmeasures/minus2ll.jl | 49 +++++++++--------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/src/frontend/fit/fitmeasures/minus2ll.jl b/src/frontend/fit/fitmeasures/minus2ll.jl index 2cb87d79..a5238a29 100644 --- a/src/frontend/fit/fitmeasures/minus2ll.jl +++ b/src/frontend/fit/fitmeasures/minus2ll.jl @@ -9,30 +9,31 @@ function minus2ll end # Single Models ############################################################################################ -# SemFit splices loss functions ------------------------------------------------------------ -minus2ll( - sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: AbstractSemSingle, O}, -) = minus2ll( - sem_fit, - sem_fit.model.observed, - sem_fit.model.implied, - sem_fit.model.loss.functions..., -) - -minus2ll(sem_fit::SemFit, obs, imp, args...) = minus2ll(sem_fit.minimum, obs, imp, args...) +minus2ll(fit::SemFit) = minus2ll(fit, fit.model) + +function minus2ll(fit::SemFit, model::AbstractSemSingle) + minimum = objective(model, fit.solution) + return minus2ll(minimum, model) +end + +minus2ll(minimum::Number, model::AbstractSemSingle) = + sum(lossfun -> minus2ll(lossfun, minimum, model), model.loss.functions) # SemML ------------------------------------------------------------------------------------ -minus2ll(minimum::Number, obs, imp::Union{RAM, RAMSymbolic}, loss_ml::SemML) = - nsamples(obs) * (minimum + log(2π) * nobserved_vars(obs)) +function minus2ll(lossfun::SemML, minimum::Number, model::AbstractSemSingle) + obs = observed(model) + return nsamples(obs) * (minimum + log(2π) * nobserved_vars(obs)) +end # WLS -------------------------------------------------------------------------------------- -minus2ll(minimum::Number, obs, imp::Union{RAM, RAMSymbolic}, loss_ml::SemWLS) = missing +minus2ll(lossfun::SemWLS, minimum::Number, model::AbstractSemSingle) = missing # compute likelihood for missing data - H0 ------------------------------------------------- # -2ll = (∑ log(2π)*(nᵢ + mᵢ)) + F*n -function minus2ll(minimum::Number, observed, imp::Union{RAM, RAMSymbolic}, loss_ml::SemFIML) - F = minimum * nsamples(observed) - F += log(2π) * sum(pat -> nsamples(pat) * nmeasured_vars(pat), observed.patterns) +function minus2ll(lossfun::SemFIML, minimum::Number, model::AbstractSemSingle) + obs = observed(model)::SemObservedMissing + F = minimum * nsamples(obs) + F += log(2π) * sum(pat -> nsamples(pat) * nmeasured_vars(pat), obs.patterns) return F end @@ -66,16 +67,4 @@ end # Collection ############################################################################################ -minus2ll(minimum, model::AbstractSemSingle) = - minus2ll(minimum, model.observed, model.implied, model.loss.functions...) - -function minus2ll( - sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: SemEnsemble, O}, -) - m2ll = 0.0 - for sem in sem_fit.model.sems - minimum = objective!(sem, sem_fit.solution) - m2ll += minus2ll(minimum, sem) - end - return m2ll -end +minus2ll(fit::SemFit, model::SemEnsemble) = sum(Base.Fix1(minus2ll, fit), model.sems) From eaf0faed516697381832865877d42db64b8e46c8 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Thu, 13 Jun 2024 00:13:08 -0700 Subject: [PATCH 02/20] fix chi2 --- src/frontend/fit/fitmeasures/chi2.jl | 97 +++++++++++----------------- 1 file changed, 39 insertions(+), 58 deletions(-) diff --git a/src/frontend/fit/fitmeasures/chi2.jl b/src/frontend/fit/fitmeasures/chi2.jl index 333783f9..dc19467f 100644 --- a/src/frontend/fit/fitmeasures/chi2.jl +++ b/src/frontend/fit/fitmeasures/chi2.jl @@ -1,89 +1,70 @@ """ - χ²(sem_fit::SemFit) + χ²(fit::SemFit) Return the χ² value. """ -function χ² end +χ²(fit::SemFit) = χ²(fit, fit.model) ############################################################################################ # Single Models ############################################################################################ -# SemFit splices loss functions ------------------------------------------------------------ -χ²(sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: AbstractSemSingle, O}) = χ²( - sem_fit, - sem_fit.model.observed, - sem_fit.model.implied, - sem_fit.model.loss.functions..., -) +χ²(fit::SemFit, model::AbstractSemSingle) = + sum(loss -> χ²(loss, fit, model), model.loss.functions) # RAM + SemML -χ²(sem_fit::SemFit, observed, imp::Union{RAM, RAMSymbolic}, loss_ml::SemML) = - (nsamples(sem_fit) - 1) * - (sem_fit.minimum - logdet(observed.obs_cov) - nobserved_vars(observed)) +χ²(lossfun::SemML, fit::SemFit, model::AbstractSemSingle) = + (nsamples(fit) - 1) * + (fit.minimum - logdet(obs_cov(observed(model))) - nobserved_vars(observed(model))) # bollen, p. 115, only correct for GLS weight matrix -χ²(sem_fit::SemFit, observed, imp::Union{RAM, RAMSymbolic}, loss_ml::SemWLS) = - (nsamples(sem_fit) - 1) * sem_fit.minimum +χ²(lossfun::SemWLS, fit::SemFit, model::AbstractSemSingle) = + (nsamples(fit) - 1) * fit.minimum # FIML -function χ²(sem_fit::SemFit, observed::SemObservedMissing, imp, loss_ml::SemFIML) - ll_H0 = minus2ll(sem_fit) - ll_H1 = minus2ll(observed) - chi2 = ll_H0 - ll_H1 - return chi2 +function χ²(lossfun::SemFIML, fit::SemFit, model::AbstractSemSingle) + ll_H0 = minus2ll(fit) + ll_H1 = minus2ll(observed(model)) + return ll_H0 - ll_H1 end ############################################################################################ # Collections ############################################################################################ -# SemFit splices loss functions ------------------------------------------------------------ -χ²(sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: SemEnsemble, O}) = - χ²(sem_fit, sem_fit.model, sem_fit.model.sems[1].loss.functions[1]) +function χ²(fit::SemFit, models::SemEnsemble) + isempty(models.sems) && return 0.0 -function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemWLS} - check_ensemble_length(model) - check_lossfun_types(model, L) - return (nsamples(model) - 1) * sem_fit.minimum -end + lossfun = models.sems[1].loss.functions[1] + # check that all models use the same single loss function + L = typeof(lossfun) + for (i, sem) in enumerate(models.sems) + if length(sem.loss.functions) > 1 + @error "Model for group #$i has $(length(sem.loss.functions)) loss functions. Only the single one is supported" + end + cur_lossfun = sem.loss.functions[1] + if !isa(cur_lossfun, L) + @error "Loss function for group #$i model is $(typeof(cur_lossfun)), expected $L. Heterogeneous loss functions are not supported" + end + end -function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemML} - check_ensemble_length(model) - check_lossfun_types(model, L) - F_G = sem_fit.minimum - F_G -= sum([ - w * (logdet(m.observed.obs_cov) + nobserved_vars(m.observed)) for - (w, m) in zip(model.weights, model.sems) - ]) - return (nsamples(model) - 1) * F_G + return χ²(lossfun, fit, models) end -function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemFIML} - check_ensemble_length(model) - check_lossfun_types(model, L) - - ll_H0 = minus2ll(sem_fit) - ll_H1 = sum(minus2ll.(observed.(models(model)))) - chi2 = ll_H0 - ll_H1 - - return chi2 +function χ²(lossfun::SemWLS, fit::SemFit, models::SemEnsemble) + return (nsamples(models) - 1) * fit.minimum end -function check_ensemble_length(model) - for sem in model.sems - if length(sem.loss.functions) > 1 - @error "A model for one of the groups contains multiple loss functions." - end +function χ²(lossfun::SemML, fit::SemFit, models::SemEnsemble) + G = sum(zip(models.weights, models.sems)) do (w, model) + data = observed(model) + w * (logdet(obs_cov(data)) + nobserved_vars(data)) end + return (nsamples(models) - 1) * (fit.minimum - G) end -function check_lossfun_types(model, type) - for sem in model.sems - for lossfun in sem.loss.functions - if !isa(lossfun, type) - @error "Your model(s) contain multiple lossfunctions with differing types." - end - end - end +function χ²(lossfun::SemFIML, fit::SemFit, models::SemEnsemble) + ll_H0 = minus2ll(fit) + ll_H1 = sum(minus2ll ∘ observed, models.sems) + return ll_H0 - ll_H1 end From d968c54fa305cfe0018c564bdd36f9b024f89396 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 21 Mar 2025 22:30:50 -0700 Subject: [PATCH 03/20] fix RMSEA --- src/frontend/fit/fitmeasures/RMSEA.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/frontend/fit/fitmeasures/RMSEA.jl b/src/frontend/fit/fitmeasures/RMSEA.jl index b9fff648..f9dae84e 100644 --- a/src/frontend/fit/fitmeasures/RMSEA.jl +++ b/src/frontend/fit/fitmeasures/RMSEA.jl @@ -1,15 +1,16 @@ """ - RMSEA(sem_fit::SemFit) + RMSEA(fit::SemFit) Return the RMSEA. """ function RMSEA end -RMSEA(sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: AbstractSemSingle, O}) = - RMSEA(dof(sem_fit), χ²(sem_fit), nsamples(sem_fit)) +RMSEA(fit::SemFit) = RMSEA(fit, fit.model) -RMSEA(sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: SemEnsemble, O}) = - sqrt(length(sem_fit.model.sems)) * RMSEA(dof(sem_fit), χ²(sem_fit), nsamples(sem_fit)) +RMSEA(fit::SemFit, model::AbstractSemSingle) = RMSEA(dof(fit), χ²(fit), nsamples(fit)) + +RMSEA(fit::SemFit, model::SemEnsemble) = + sqrt(length(model.sems)) * RMSEA(dof(fit), χ²(fit), nsamples(fit)) function RMSEA(dof, chi2, nsamples) rmsea = (chi2 - dof) / (nsamples * dof) From 7bb3a7d4a1888235ec370a0a55d0133f23435af9 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 28 May 2024 16:16:04 -0700 Subject: [PATCH 04/20] p_values(): use ccdf() --- src/frontend/fit/fitmeasures/p.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontend/fit/fitmeasures/p.jl b/src/frontend/fit/fitmeasures/p.jl index 8c69d5ec..da9bedaf 100644 --- a/src/frontend/fit/fitmeasures/p.jl +++ b/src/frontend/fit/fitmeasures/p.jl @@ -3,4 +3,4 @@ Return the p value computed from the χ² test statistic. """ -p_value(sem_fit::SemFit) = 1 - cdf(Chisq(dof(sem_fit)), χ²(sem_fit)) +p_value(sem_fit::SemFit) = ccdf(Chisq(dof(sem_fit)), χ²(sem_fit)) From 39d87c6d770640013d12b41a5d8a9b7600eb8ad9 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 22 Mar 2024 18:34:13 -0700 Subject: [PATCH 05/20] RAM: don't need to copy (I-A) --- src/implied/RAM/generic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/implied/RAM/generic.jl b/src/implied/RAM/generic.jl index 301c455e..6a3ceb1c 100644 --- a/src/implied/RAM/generic.jl +++ b/src/implied/RAM/generic.jl @@ -158,7 +158,7 @@ function RAM(; F⨉I_A⁻¹, F⨉I_A⁻¹S, I_A, - copy(I_A), + similar(I_A), ∇A, ∇S, ∇M, From b25a173cbd098536bae5dfa8dcc8edf7b4e37a20 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 10 Apr 2024 15:39:49 -0700 Subject: [PATCH 06/20] EM: move code refs to docstring --- src/observed/EM.jl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/observed/EM.jl b/src/observed/EM.jl index beac45ca..5fe37b97 100644 --- a/src/observed/EM.jl +++ b/src/observed/EM.jl @@ -2,15 +2,8 @@ ### Expectation Maximization Algorithm ############################################################################################ -# An EM Algorithm for MVN-distributed Data with missing values -# Adapted from supplementary Material to the book Machine Learning: A Probabilistic Perspective -# Copyright (2010) Kevin Murphy and Matt Dunham -# found at https://github.com/probml/pmtk3/blob/master/toolbox/BasicModels/gauss/sub/gaussMissingFitEm.m -# and at https://github.com/probml/pmtk3/blob/master/toolbox/Algorithms/optimization/emAlgo.m - # what about random restarts? -# outer function --------------------------------------------------------------------------- """ em_mvn(; observed::SemObservedMissing, @@ -21,6 +14,12 @@ Estimates the covariance matrix and mean vector of the normal distribution via expectation maximization for `observed`. Overwrites the statistics stored in `observed`. + +Uses the EM algorithm for MVN-distributed data with missing values +adapted from the supplementary material to the book *Machine Learning: A Probabilistic Perspective*, +copyright (2010) Kevin Murphy and Matt Dunham: see +[*gaussMissingFitEm.m*](https://github.com/probml/pmtk3/blob/master/toolbox/BasicModels/gauss/sub/gaussMissingFitEm.m) and +[*emAlgo.m*](https://github.com/probml/pmtk3/blob/master/toolbox/Algorithms/optimization/emAlgo.m) scripts. """ function em_mvn( observed::SemObservedMissing; From 301976d698b112b4db5872ced1d011937569035c Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 22 Dec 2024 12:30:08 -0800 Subject: [PATCH 07/20] fix batch_sym_inv_updates() ws --- src/additional_functions/helper.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/additional_functions/helper.jl b/src/additional_functions/helper.jl index 5559034e..3bd7d897 100644 --- a/src/additional_functions/helper.jl +++ b/src/additional_functions/helper.jl @@ -62,7 +62,7 @@ end #= function batch_sym_inv_update!(fun::Union{LossFunction, DiffFunction}, model) M_inv = inv(fun.choleskys[1]) - for i = 1:size(fun.inverses, 1) + for i in 1:size(fun.inverses, 1) if size(model.observed.patterns_not[i]) == 0 fun.inverses[i] .= M_inv else From 6254c24ff664c5a86245b40d992f0092b31c8c5b Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 24 Dec 2024 11:13:36 -0800 Subject: [PATCH 08/20] RAMSymbolic: rename _func to _eval! also remove unused _symbolic fields --- src/implied/RAM/symbolic.jl | 96 +++++++++---------- src/loss/ML/ML.jl | 3 +- src/loss/WLS/WLS.jl | 3 +- .../recover_parameters_twofact.jl | 2 +- 4 files changed, 46 insertions(+), 58 deletions(-) diff --git a/src/implied/RAM/symbolic.jl b/src/implied/RAM/symbolic.jl index eff193c1..311d492f 100644 --- a/src/implied/RAM/symbolic.jl +++ b/src/implied/RAM/symbolic.jl @@ -39,8 +39,8 @@ Jacobians (only available in gradient! calls) - `∇Σ(::RAMSymbolic)` -> ``∂vec(Σ)/∂θᵀ`` - `∇μ(::RAMSymbolic)` -> ``∂μ/∂θᵀ`` -- `∇Σ_function(::RAMSymbolic)` -> function to overwrite `∇Σ` in place, - i.e. `∇Σ_function(∇Σ, θ)`. Normally, you do not want to use this but simply +- `∇Σ_eval!(::RAMSymbolic)` -> function to evaluate `∇Σ` in place, + i.e. `∇Σ_eval!(∇Σ, θ)`. Normally, you do not want to use this but simply query `∇Σ(::RAMSymbolic)`. Hessians @@ -62,23 +62,19 @@ and for models with a meanstructure, the model implied means are computed as \mu = F(I-A)^{-1}M ``` """ -struct RAMSymbolic{MS, F1, F2, F3, A1, A2, A3, S1, S2, S3, V2, F4, A4, F5, A5} <: - SemImpliedSymbolic +struct RAMSymbolic{MS, F1, F2, F3, A1, A2, A3, V2, F4, A4, F5, A5} <: SemImpliedSymbolic meanstruct::MS hessianeval::ExactHessian - Σ_function::F1 - ∇Σ_function::F2 - ∇²Σ_function::F3 + Σ_eval!::F1 + ∇Σ_eval!::F2 + ∇²Σ_eval!::F3 Σ::A1 ∇Σ::A2 ∇²Σ::A3 - Σ_symbolic::S1 - ∇Σ_symbolic::S2 - ∇²Σ_symbolic::S3 ram_matrices::V2 - μ_function::F4 + μ_eval!::F4 μ::A4 - ∇μ_function::F5 + ∇μ_eval!::F5 ∇μ::A5 RAMSymbolic{MS}(args...) where {MS <: MeanStruct} = @@ -117,81 +113,75 @@ function RAMSymbolic(; I_A⁻¹ = neumann_series(A) # Σ - Σ_symbolic = eval_Σ_symbolic(S, I_A⁻¹, F; vech = vech, simplify = simplify_symbolics) - #print(Symbolics.build_function(Σ_symbolic)[2]) - Σ_function = Symbolics.build_function(Σ_symbolic, par, expression = Val{false})[2] - Σ = zeros(size(Σ_symbolic)) - precompile(Σ_function, (typeof(Σ), Vector{Float64})) + Σ_sym = eval_Σ_symbolic(S, I_A⁻¹, F; vech, simplify = simplify_symbolics) + #print(Symbolics.build_function(Σ_sym)[2]) + Σ_eval! = Symbolics.build_function(Σ_sym, par, expression = Val{false})[2] + Σ = zeros(size(Σ_sym)) + precompile(Σ_eval!, (typeof(Σ), Vector{Float64})) # ∇Σ if gradient - ∇Σ_symbolic = Symbolics.sparsejacobian(vec(Σ_symbolic), [par...]) - ∇Σ_function = Symbolics.build_function(∇Σ_symbolic, par, expression = Val{false})[2] - constr = findnz(∇Σ_symbolic) - ∇Σ = sparse(constr[1], constr[2], fill(1.0, nnz(∇Σ_symbolic)), size(∇Σ_symbolic)...) - precompile(∇Σ_function, (typeof(∇Σ), Vector{Float64})) + ∇Σ_sym = Symbolics.sparsejacobian(vec(Σ_sym), [par...]) + ∇Σ_eval! = Symbolics.build_function(∇Σ_sym, par, expression = Val{false})[2] + constr = findnz(∇Σ_sym) + ∇Σ = sparse(constr[1], constr[2], fill(1.0, nnz(∇Σ_sym)), size(∇Σ_sym)...) + precompile(∇Σ_eval!, (typeof(∇Σ), Vector{Float64})) else - ∇Σ_symbolic = nothing - ∇Σ_function = nothing + ∇Σ_eval! = nothing ∇Σ = nothing end if hessian && !approximate_hessian - n_sig = length(Σ_symbolic) - ∇²Σ_symbolic_vec = [Symbolics.sparsehessian(σᵢ, [par...]) for σᵢ in vec(Σ_symbolic)] + n_sig = length(Σ_sym) + ∇²Σ_sym_vec = [Symbolics.sparsehessian(σᵢ, [par...]) for σᵢ in vec(Σ_sym)] @variables J[1:n_sig] - ∇²Σ_symbolic = zeros(Num, n_par, n_par) + ∇²Σ_sym = zeros(Num, n_par, n_par) for i in 1:n_sig - ∇²Σ_symbolic += J[i] * ∇²Σ_symbolic_vec[i] + ∇²Σ_sym += J[i] * ∇²Σ_sym_vec[i] end - ∇²Σ_function = - Symbolics.build_function(∇²Σ_symbolic, J, par, expression = Val{false})[2] + ∇²Σ_eval! = Symbolics.build_function(∇²Σ_sym, J, par, expression = Val{false})[2] ∇²Σ = zeros(n_par, n_par) else - ∇²Σ_symbolic = nothing - ∇²Σ_function = nothing + ∇²Σ_sym = nothing + ∇²Σ_eval! = nothing ∇²Σ = nothing end # μ if meanstructure MS = HasMeanStruct - μ_symbolic = eval_μ_symbolic(M, I_A⁻¹, F; simplify = simplify_symbolics) - μ_function = Symbolics.build_function(μ_symbolic, par, expression = Val{false})[2] - μ = zeros(size(μ_symbolic)) + μ_sym = eval_μ_symbolic(M, I_A⁻¹, F; simplify = simplify_symbolics) + μ_eval! = Symbolics.build_function(μ_sym, par, expression = Val{false})[2] + μ = zeros(size(μ_sym)) if gradient - ∇μ_symbolic = Symbolics.jacobian(μ_symbolic, [par...]) - ∇μ_function = - Symbolics.build_function(∇μ_symbolic, par, expression = Val{false})[2] + ∇μ_sym = Symbolics.jacobian(μ_sym, [par...]) + ∇μ_eval! = Symbolics.build_function(∇μ_sym, par, expression = Val{false})[2] ∇μ = zeros(size(F, 1), size(par, 1)) else - ∇μ_function = nothing + ∇μ_eval! = nothing ∇μ = nothing end else MS = NoMeanStruct - μ_function = nothing + μ_eval! = nothing μ = nothing - ∇μ_function = nothing + ∇μ_eval! = nothing ∇μ = nothing end return RAMSymbolic{MS}( - Σ_function, - ∇Σ_function, - ∇²Σ_function, + Σ_eval!, + ∇Σ_eval!, + ∇²Σ_eval!, Σ, ∇Σ, ∇²Σ, - Σ_symbolic, - ∇Σ_symbolic, - ∇²Σ_symbolic, ram_matrices, - μ_function, + μ_eval!, μ, - ∇μ_function, + ∇μ_eval!, ∇μ, ) end @@ -206,15 +196,15 @@ function update!( model::AbstractSemSingle, par, ) - implied.Σ_function(implied.Σ, par) + implied.Σ_eval!(implied.Σ, par) if MeanStruct(implied) === HasMeanStruct - implied.μ_function(implied.μ, par) + implied.μ_eval!(implied.μ, par) end if is_gradient_required(targets) || is_hessian_required(targets) - implied.∇Σ_function(implied.∇Σ, par) + implied.∇Σ_eval!(implied.∇Σ, par) if MeanStruct(implied) === HasMeanStruct - implied.∇μ_function(implied.∇μ, par) + implied.∇μ_eval!(implied.∇μ, par) end end end diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index d14af648..679d39d8 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -117,10 +117,9 @@ function evaluate!( if HessianEval(semml) === ApproxHessian mul!(hessian, ∇Σ' * kron(Σ⁻¹, Σ⁻¹), ∇Σ, 2, 0) else - ∇²Σ_function! = implied.∇²Σ_function ∇²Σ = implied.∇²Σ # inner - ∇²Σ_function!(∇²Σ, J, par) + implied.∇²Σ_eval!(∇²Σ, J, par) # outer H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹) mul!(hessian, ∇Σ' * H_outer, ∇Σ) diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl index 0fe2c9b3..2d1f2d27 100644 --- a/src/loss/WLS/WLS.jl +++ b/src/loss/WLS/WLS.jl @@ -130,10 +130,9 @@ function evaluate!( end isnothing(hessian) || (mul!(hessian, ∇σ' * V, ∇σ, 2, 0)) if !isnothing(hessian) && (HessianEval(semwls) === ExactHessian) - ∇²Σ_function! = implied.∇²Σ_function ∇²Σ = implied.∇²Σ J = -2 * (σ₋' * semwls.V)' - ∇²Σ_function!(∇²Σ, J, par) + implied.∇²Σ_eval!(∇²Σ, J, par) hessian .+= ∇²Σ end if MeanStruct(implied) === HasMeanStruct diff --git a/test/examples/recover_parameters/recover_parameters_twofact.jl b/test/examples/recover_parameters/recover_parameters_twofact.jl index a3e426cb..b0c26838 100644 --- a/test/examples/recover_parameters/recover_parameters_twofact.jl +++ b/test/examples/recover_parameters/recover_parameters_twofact.jl @@ -55,7 +55,7 @@ start = [ implied_ml = RAMSymbolic(; specification = ram_matrices, start_val = start) -implied_ml.Σ_function(implied_ml.Σ, true_val) +implied_ml.Σ_eval!(implied_ml.Σ, true_val) true_dist = MultivariateNormal(implied_ml.Σ) From 51ae020e784134eff698cb98dcfc59f94ceedbe6 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 21 Mar 2025 21:19:12 -0700 Subject: [PATCH 09/20] md ws fixes --- docs/src/tutorials/concept.md | 4 +-- docs/src/tutorials/constraints/constraints.md | 18 ++++++------- .../tutorials/construction/build_by_parts.md | 2 +- docs/src/tutorials/fitting/fitting.md | 20 +++++++------- docs/src/tutorials/meanstructure.md | 4 +-- .../regularization/regularization.md | 14 +++++----- .../specification/graph_interface.md | 12 ++++----- .../specification/parameter_table.md | 2 +- .../tutorials/specification/ram_matrices.md | 26 +++++++++---------- 9 files changed, 51 insertions(+), 51 deletions(-) diff --git a/docs/src/tutorials/concept.md b/docs/src/tutorials/concept.md index 035144d6..2b453925 100644 --- a/docs/src/tutorials/concept.md +++ b/docs/src/tutorials/concept.md @@ -50,8 +50,8 @@ Available loss functions are - [`SemRidge`](@ref): ridge regularization ## The optimizer part aka `SemOptimizer` -The optimizer part of a model connects to the numerical optimization backend used to fit the model. -It can be used to control options like the optimization algorithm, linesearch, stopping criteria, etc. +The optimizer part of a model connects to the numerical optimization backend used to fit the model. +It can be used to control options like the optimization algorithm, linesearch, stopping criteria, etc. There are currently three available backends, [`SemOptimizerOptim`](@ref) connecting to the [Optim.jl](https://github.com/JuliaNLSolvers/Optim.jl) backend, [`SemOptimizerNLopt`](@ref) connecting to the [NLopt.jl](https://github.com/JuliaOpt/NLopt.jl) backend and [`SemOptimizerProximal`](@ref) connecting to [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl). For more information about the available options see also the tutorials about [Using Optim.jl](@ref) and [Using NLopt.jl](@ref), as well as [Constrained optimization](@ref) and [Regularization](@ref) . diff --git a/docs/src/tutorials/constraints/constraints.md b/docs/src/tutorials/constraints/constraints.md index 338803cb..2d8b7062 100644 --- a/docs/src/tutorials/constraints/constraints.md +++ b/docs/src/tutorials/constraints/constraints.md @@ -38,7 +38,7 @@ end partable = ParameterTable( graph, - latent_vars = latent_vars, + latent_vars = latent_vars, observed_vars = observed_vars) data = example_data("political_democracy") @@ -64,7 +64,7 @@ Let's introduce some constraints: (Of course those constaints only serve an illustratory purpose.) -We first need to get the indices of the respective parameters that are invoved in the constraints. +We first need to get the indices of the respective parameters that are invoved in the constraints. We can look up their labels in the output above, and retrieve their indices as ```@example constraints @@ -112,7 +112,7 @@ end ``` If the algorithm needs gradients at an iteration, it will pass the vector `gradient` that is of the same size as the parameters. -With `if length(gradient) > 0` we check if the algorithm needs gradients, and if it does, we fill the `gradient` vector with the gradients +With `if length(gradient) > 0` we check if the algorithm needs gradients, and if it does, we fill the `gradient` vector with the gradients of the constraint w.r.t. the parameters. In NLopt, vector-valued constraints are also possible, but we refer to the documentation for that. @@ -134,7 +134,7 @@ constrained_optimizer = SemOptimizerNLopt( ``` As you see, the equality constraints and inequality constraints are passed as keyword arguments, and the bounds are passed as options for the (outer) optimization algorithm. -Additionally, for equality and inequality constraints, a feasibility tolerance can be specified that controls if a solution can be accepted, even if it violates the constraints by a small amount. +Additionally, for equality and inequality constraints, a feasibility tolerance can be specified that controls if a solution can be accepted, even if it violates the constraints by a small amount. Especially for equality constraints, it is recommended to allow for a small positive tolerance. In this example, we set both tolerances to `1e-8`. @@ -142,7 +142,7 @@ In this example, we set both tolerances to `1e-8`. We have often observed that the default convergence criteria in NLopt lead to non-convergence flags. Indeed, this example does not convergence with default criteria. As you see above, we used a realively liberal absolute tolerance in the optimization parameters of 1e-4. - This should not be a problem in most cases, as the sampling variance in (almost all) structural equation models + This should not be a problem in most cases, as the sampling variance in (almost all) structural equation models should lead to uncertainty in the parameter estimates that are orders of magnitude larger. We nontheless recommend choosing a convergence criterion with care (i.e. w.r.t. the scale of your parameters), inspecting the solutions for plausibility, and comparing them to unconstrained solutions. @@ -162,14 +162,14 @@ As you can see, the optimizer converged (`:XTOL_REACHED`) and investigating the update_partable!( partable, :estimate_constr, - param_labels(model_fit_constrained), - solution(model_fit_constrained), - ) + param_labels(model_fit_constrained), + solution(model_fit_constrained), +) details(partable) ``` -As we can see, the constrained solution is very close to the original solution (compare the columns estimate and estimate_constr), with the difference that the constrained parameters fulfill their constraints. +As we can see, the constrained solution is very close to the original solution (compare the columns estimate and estimate_constr), with the difference that the constrained parameters fulfill their constraints. As all parameters are estimated simultaneously, it is expexted that some unconstrained parameters are also affected (e.g., the constraint on `dem60 → y2` leads to a higher estimate of the residual variance `y2 ↔ y2`). ## Using the Optim.jl backend diff --git a/docs/src/tutorials/construction/build_by_parts.md b/docs/src/tutorials/construction/build_by_parts.md index 45d2a2ea..b5c3c451 100644 --- a/docs/src/tutorials/construction/build_by_parts.md +++ b/docs/src/tutorials/construction/build_by_parts.md @@ -40,7 +40,7 @@ end partable = ParameterTable( graph, - latent_vars = lat_vars, + latent_vars = lat_vars, observed_vars = obs_vars) ``` diff --git a/docs/src/tutorials/fitting/fitting.md b/docs/src/tutorials/fitting/fitting.md index fff06aba..d7353c9f 100644 --- a/docs/src/tutorials/fitting/fitting.md +++ b/docs/src/tutorials/fitting/fitting.md @@ -7,19 +7,19 @@ model_fit = fit(model) # output -Fitted Structural Equation Model -=============================================== ---------------------- Model ------------------- +Fitted Structural Equation Model +=============================================== +--------------------- Model ------------------- -Structural Equation Model -- Loss Functions +Structural Equation Model +- Loss Functions SemML -- Fields - observed: SemObservedData - implied: RAM - optimizer: SemOptimizerOptim +- Fields + observed: SemObservedData + implied: RAM + optimizer: SemOptimizerOptim -------------- Optimization result ------------- +------------- Optimization result ------------- * Status: success diff --git a/docs/src/tutorials/meanstructure.md b/docs/src/tutorials/meanstructure.md index b2da5029..4e6d2a36 100644 --- a/docs/src/tutorials/meanstructure.md +++ b/docs/src/tutorials/meanstructure.md @@ -40,7 +40,7 @@ end partable = ParameterTable( graph, - latent_vars = latent_vars, + latent_vars = latent_vars, observed_vars = observed_vars) ``` @@ -78,7 +78,7 @@ end partable = ParameterTable( graph, - latent_vars = latent_vars, + latent_vars = latent_vars, observed_vars = observed_vars) ``` diff --git a/docs/src/tutorials/regularization/regularization.md b/docs/src/tutorials/regularization/regularization.md index 3d82fcfb..a22601b9 100644 --- a/docs/src/tutorials/regularization/regularization.md +++ b/docs/src/tutorials/regularization/regularization.md @@ -2,7 +2,7 @@ ## Setup -For ridge regularization, you can simply use `SemRidge` as an additional loss function +For ridge regularization, you can simply use `SemRidge` as an additional loss function (for example, a model with the loss functions `SemML` and `SemRidge` corresponds to ridge-regularized maximum likelihood estimation). For lasso, elastic net and (far) beyond, you can load the `ProximalAlgorithms.jl` and `ProximalOperators.jl` packages alongside `StructuralEquationModels`: @@ -22,7 +22,7 @@ using StructuralEquationModels, ProximalAlgorithms, ProximalOperators ## `SemOptimizerProximal` To estimate regularized models, we provide a "building block" for the optimizer part, called `SemOptimizerProximal`. -It connects our package to the [`ProximalAlgorithms.jl`](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl) optimization backend, providing so-called proximal optimization algorithms. +It connects our package to the [`ProximalAlgorithms.jl`](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl) optimization backend, providing so-called proximal optimization algorithms. Those can handle, amongst other things, various forms of regularization. It can be used as @@ -33,7 +33,7 @@ SemOptimizerProximal( options = Dict{Symbol, Any}(), operator_g, operator_h = nothing - ) +) ``` The proximal operator (aka the regularization function) can be passed as `operator_g`, available options are listed [here](https://juliafirstorder.github.io/ProximalOperators.jl/stable/functions/). @@ -70,7 +70,7 @@ end partable = ParameterTable( graph, - latent_vars = latent_vars, + latent_vars = latent_vars, observed_vars = observed_vars ) @@ -86,7 +86,7 @@ We labeled the covariances between the items because we want to regularize those ```@example reg ind = getindex.( - [param_indices(model)], + Ref(param_indices(model)), [:cov_15, :cov_24, :cov_26, :cov_37, :cov_48, :cov_68]) ``` @@ -108,7 +108,7 @@ and use `SemOptimizerProximal`. ```@example reg optimizer_lasso = SemOptimizerProximal( operator_g = NormL1(λ) - ) +) model_lasso = Sem( specification = partable, @@ -159,7 +159,7 @@ prox_operator = SlicedSeparableSum((NormL0(20.0), NormL1(0.02), NormL0(0.0)), ([ model_mixed = Sem( specification = partable, - data = data, + data = data, ) fit_mixed = fit(model_mixed; engine = :Proximal, operator_g = prox_operator) diff --git a/docs/src/tutorials/specification/graph_interface.md b/docs/src/tutorials/specification/graph_interface.md index 75e1d1b6..62eeef00 100644 --- a/docs/src/tutorials/specification/graph_interface.md +++ b/docs/src/tutorials/specification/graph_interface.md @@ -1,6 +1,6 @@ # Graph interface -## Workflow +## Workflow As discussed before, when using the graph interface, you can specify your model as a graph ```julia @@ -17,7 +17,7 @@ lat_vars = ... partable = ParameterTable( graph, - latent_vars = lat_vars, + latent_vars = lat_vars, observed_vars = obs_vars) model = Sem( @@ -32,7 +32,7 @@ In general, there are two different types of parameters: **directed** and **indi We allow multiple variables on both sides of an arrow, for example `x → [y z]` or `[a b] → [c d]`. The later specifies element wise edges; that is its the same as `a → c; b → d`. If you want edges corresponding to the cross-product, we have the double lined arrow `[a b] ⇒ [c d]`, corresponding to `a → c; a → d; b → c; b → d`. The undirected arrows ↔ (element-wise) and ⇔ (crossproduct) behave the same way. !!! note "Unicode symbols in julia" - The `→` symbol is a unicode symbol allowed in julia (among many others; see this [list](https://docs.julialang.org/en/v1/manual/unicode-input/)). You can enter it in the julia REPL or the vscode IDE by typing `\to` followed by hitting `tab`. Similarly, + The `→` symbol is a unicode symbol allowed in julia (among many others; see this [list](https://docs.julialang.org/en/v1/manual/unicode-input/)). You can enter it in the julia REPL or the vscode IDE by typing `\to` followed by hitting `tab`. Similarly, - `←` = `\leftarrow`, - `↔` = `\leftrightarrow`, - `⇒` = `\Rightarrow`, @@ -54,7 +54,7 @@ graph = @StenoGraph begin ξ₃ ↔ fixed(1.0)*ξ₃ end ``` -would +would - fix the directed effects from `ξ₁` to `x1` and from `ξ₂` to `x2` to `1` - leave the directed effect from `ξ₃` to `x7` free but instead restrict the variance of `ξ₃` to `1` - give the effect from `ξ₁` to `x3` the label `:a` (which can be convenient later if you want to retrieve information from your model about that specific parameter) @@ -66,7 +66,7 @@ As you saw above and in the [A first model](@ref) example, the graph object need ```julia partable = ParameterTable( graph, - latent_vars = lat_vars, + latent_vars = lat_vars, observed_vars = obs_vars) ``` @@ -85,7 +85,7 @@ The variable names (`:x1`) have to be symbols, the syntax `:something` creates a _(lat_vars) ⇔ _(lat_vars) end ``` -creates undirected effects coresponding to +creates undirected effects coresponding to 1. the variances of all observed variables and 2. the variances plus covariances of all latent variables So if you want to work with a subset of variables, simply specify a vector of symbols `somevars = [...]`, and inside the graph specification, refer to them as `_(somevars)`. diff --git a/docs/src/tutorials/specification/parameter_table.md b/docs/src/tutorials/specification/parameter_table.md index c328a3b1..62c45c9a 100644 --- a/docs/src/tutorials/specification/parameter_table.md +++ b/docs/src/tutorials/specification/parameter_table.md @@ -5,5 +5,5 @@ As lavaan also uses parameter tables to store model specifications, we are worki ## Convert from and to RAMMatrices -To convert a RAMMatrices object to a ParameterTable, simply use `partable = ParameterTable(rammatrices)`. +To convert a RAMMatrices object to a ParameterTable, simply use `partable = ParameterTable(rammatrices)`. To convert an object of type `ParameterTable` to RAMMatrices, you can use `ram_matrices = RAMMatrices(partable)`. \ No newline at end of file diff --git a/docs/src/tutorials/specification/ram_matrices.md b/docs/src/tutorials/specification/ram_matrices.md index abe76ea6..2730ff4b 100644 --- a/docs/src/tutorials/specification/ram_matrices.md +++ b/docs/src/tutorials/specification/ram_matrices.md @@ -1,6 +1,6 @@ # RAMMatrices interface -Models can also be specified by an object of type `RAMMatrices`. +Models can also be specified by an object of type `RAMMatrices`. The RAM (reticular action model) specification corresponds to three matrices; the `A` matrix containing all directed parameters, the `S` matrix containing all undirected parameters, and the `F` matrix filtering out latent variables from the model implied covariance. The model implied covariance matrix for the observed variables of a SEM is then computed as @@ -56,9 +56,9 @@ A =[0 0 0 0 0 0 0 0 0 0 0 1.0 0 0 θ = Symbol.(:θ, 1:31) spec = RAMMatrices(; - A = A, - S = S, - F = F, + A = A, + S = S, + F = F, param_labels = θ, vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8, :ind60, :dem60, :dem65] ) @@ -71,9 +71,9 @@ model = Sem( Let's look at this step by step: -First, we specify the `A`, `S` and `F`-Matrices. -For a free parameter, we write a `Symbol` like `:θ1` (or any other symbol we like) to the corresponding place in the respective matrix, to constrain parameters to be equal we just use the same `Symbol` in the respective entries. -To fix a parameter (as in the `A`-Matrix above), we just write down the number we want to fix it to. +First, we specify the `A`, `S` and `F`-Matrices. +For a free parameter, we write a `Symbol` like `:θ1` (or any other symbol we like) to the corresponding place in the respective matrix, to constrain parameters to be equal we just use the same `Symbol` in the respective entries. +To fix a parameter (as in the `A`-Matrix above), we just write down the number we want to fix it to. All other entries are 0. Second, we specify a vector of symbols containing our parameters: @@ -82,14 +82,14 @@ Second, we specify a vector of symbols containing our parameters: θ = Symbol.(:θ, 1:31) ``` -Third, we construct an object of type `RAMMatrices`, passing our matrices and parameters, as well as the column names of our matrices. +Third, we construct an object of type `RAMMatrices`, passing our matrices and parameters, as well as the column names of our matrices. Those are quite important, as they will be used to rearrange your data to match it to your `RAMMatrices` specification. ```julia spec = RAMMatrices(; - A = A, - S = S, - F = F, + A = A, + S = S, + F = F, param_labels = θ, vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8, :ind60, :dem60, :dem65] ) @@ -109,7 +109,7 @@ According to the RAM, model implied mean values of the observed variables are co ```math \mu = F(I-A)^{-1}M ``` -where `M` is a vector of mean parameters. To estimate the means of the observed variables in our example (and set the latent means to `0`), we would specify the model just as before but add +where `M` is a vector of mean parameters. To estimate the means of the observed variables in our example (and set the latent means to `0`), we would specify the model just as before but add ```julia ... @@ -128,5 +128,5 @@ spec = RAMMatrices(; ## Convert from and to ParameterTables -To convert a RAMMatrices object to a ParameterTable, simply use `partable = ParameterTable(ram_matrices)`. +To convert a RAMMatrices object to a ParameterTable, simply use `partable = ParameterTable(ram_matrices)`. To convert an object of type `ParameterTable` to RAMMatrices, you can use `ram_matrices = RAMMatrices(partable)`. \ No newline at end of file From ae1adc723cf57f3980814061c992b444c9941aa5 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 24 Dec 2024 11:07:05 -0800 Subject: [PATCH 10/20] test/Proximal: move usings to the central file --- test/examples/proximal/l0.jl | 2 -- test/examples/proximal/lasso.jl | 2 -- test/examples/proximal/proximal.jl | 2 ++ test/examples/proximal/ridge.jl | 2 -- 4 files changed, 2 insertions(+), 6 deletions(-) diff --git a/test/examples/proximal/l0.jl b/test/examples/proximal/l0.jl index 374f8e58..2c0040fe 100644 --- a/test/examples/proximal/l0.jl +++ b/test/examples/proximal/l0.jl @@ -1,5 +1,3 @@ -using StructuralEquationModels, Test, ProximalAlgorithms, ProximalOperators - # load data dat = example_data("political_democracy") diff --git a/test/examples/proximal/lasso.jl b/test/examples/proximal/lasso.jl index beb5cf52..356ac618 100644 --- a/test/examples/proximal/lasso.jl +++ b/test/examples/proximal/lasso.jl @@ -1,5 +1,3 @@ -using StructuralEquationModels, Test, ProximalAlgorithms, ProximalOperators - # load data dat = example_data("political_democracy") diff --git a/test/examples/proximal/proximal.jl b/test/examples/proximal/proximal.jl index 40e72a1e..84a9162c 100644 --- a/test/examples/proximal/proximal.jl +++ b/test/examples/proximal/proximal.jl @@ -1,3 +1,5 @@ +using StructuralEquationModels, Test, ProximalAlgorithms, ProximalOperators, Suppressor + @testset "Ridge" begin include("ridge.jl") end diff --git a/test/examples/proximal/ridge.jl b/test/examples/proximal/ridge.jl index fd7ae113..61b7fa12 100644 --- a/test/examples/proximal/ridge.jl +++ b/test/examples/proximal/ridge.jl @@ -1,5 +1,3 @@ -using StructuralEquationModels, Test, ProximalAlgorithms, ProximalOperators, Suppressor - # load data dat = example_data("political_democracy") From c06e874da9d340a9f11d9f9bce402d38a22ff6c3 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 24 Dec 2024 12:06:12 -0800 Subject: [PATCH 11/20] tests: move usings in the top file --- test/examples/political_democracy/constructor.jl | 3 --- test/examples/political_democracy/political_democracy.jl | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/test/examples/political_democracy/constructor.jl b/test/examples/political_democracy/constructor.jl index 7a8adc72..4045141c 100644 --- a/test/examples/political_democracy/constructor.jl +++ b/test/examples/political_democracy/constructor.jl @@ -1,6 +1,3 @@ -using Statistics: cov, mean -using Random, NLopt - ############################################################################################ ### models w.o. meanstructure ############################################################################################ diff --git a/test/examples/political_democracy/political_democracy.jl b/test/examples/political_democracy/political_democracy.jl index ad06e0fc..cbdf7ea7 100644 --- a/test/examples/political_democracy/political_democracy.jl +++ b/test/examples/political_democracy/political_democracy.jl @@ -1,4 +1,6 @@ using StructuralEquationModels, Test, Suppressor, FiniteDiff +using Statistics: cov, mean +using Random, NLopt SEM = StructuralEquationModels From 4b597a91bbb2c2ab4fe713eed73190cf7a52ebf5 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 21 Mar 2025 21:00:35 -0700 Subject: [PATCH 12/20] Revert "fix Proximal extension" This reverts commit 9729819b86f375e4663de1fe9ec9c38d4932f580. --- ext/SEMProximalOptExt/SEMProximalOptExt.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ext/SEMProximalOptExt/SEMProximalOptExt.jl b/ext/SEMProximalOptExt/SEMProximalOptExt.jl index 192944fe..43e8aa5a 100644 --- a/ext/SEMProximalOptExt/SEMProximalOptExt.jl +++ b/ext/SEMProximalOptExt/SEMProximalOptExt.jl @@ -2,7 +2,9 @@ module SEMProximalOptExt using StructuralEquationModels using ProximalAlgorithms -using StructuralEquationModels: SemOptimizerProximal, print_type_name, print_field_types +using StructuralEquationModels: print_type_name, print_field_types + +export SemOptimizerProximal SEM = StructuralEquationModels From e743018dacce3c131266ec9ab4cd5d9cfa7427ff Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 21 Mar 2025 21:01:59 -0700 Subject: [PATCH 13/20] Revert "fix NLopt extension" This reverts commit 81a4bd9839df01e9f487b9aa13e3df107856114a. --- ext/SEMNLOptExt/NLopt.jl | 5 +++++ ext/SEMNLOptExt/SEMNLOptExt.jl | 3 ++- src/StructuralEquationModels.jl | 1 - 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ext/SEMNLOptExt/NLopt.jl b/ext/SEMNLOptExt/NLopt.jl index c5e0ad6c..238cc560 100644 --- a/ext/SEMNLOptExt/NLopt.jl +++ b/ext/SEMNLOptExt/NLopt.jl @@ -1,3 +1,8 @@ +Base.@kwdef struct NLoptConstraint + f::Any + tol = 0.0 +end + Base.convert( ::Type{NLoptConstraint}, tuple::NamedTuple{(:f, :tol), Tuple{F, T}}, diff --git a/ext/SEMNLOptExt/SEMNLOptExt.jl b/ext/SEMNLOptExt/SEMNLOptExt.jl index c79fc2b8..a159f6dc 100644 --- a/ext/SEMNLOptExt/SEMNLOptExt.jl +++ b/ext/SEMNLOptExt/SEMNLOptExt.jl @@ -1,10 +1,11 @@ module SEMNLOptExt using StructuralEquationModels, NLopt -using StructuralEquationModels: SemOptimizerNLopt, NLoptConstraint SEM = StructuralEquationModels +export SemOptimizerNLopt, NLoptConstraint + include("NLopt.jl") end diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index f6068dc5..cfa94062 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -198,6 +198,5 @@ export AbstractSem, ↔, ⇔, SemOptimizerNLopt, - NLoptConstraint, SemOptimizerProximal end From c4234d6b56bb85866bc28f994c837892bcb43728 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 21 Mar 2025 21:03:35 -0700 Subject: [PATCH 14/20] Revert "fix exporting structs from package extensions" This reverts commit f0df6538f0220f964cbf51772698c317a0b4cf86. --- ext/SEMNLOptExt/NLopt.jl | 67 ++++++++++++++++++++ ext/SEMProximalOptExt/ProximalAlgorithms.jl | 25 ++++++++ src/StructuralEquationModels.jl | 7 +-- src/package_extensions/SEMNLOptExt.jl | 69 --------------------- src/package_extensions/SEMProximalOptExt.jl | 21 ------- 5 files changed, 93 insertions(+), 96 deletions(-) delete mode 100644 src/package_extensions/SEMNLOptExt.jl delete mode 100644 src/package_extensions/SEMProximalOptExt.jl diff --git a/ext/SEMNLOptExt/NLopt.jl b/ext/SEMNLOptExt/NLopt.jl index 238cc560..32a928d2 100644 --- a/ext/SEMNLOptExt/NLopt.jl +++ b/ext/SEMNLOptExt/NLopt.jl @@ -1,3 +1,70 @@ +############################################################################################ +### Types +############################################################################################ +""" +Connects to `NLopt.jl` as the optimization backend. + +# Constructor + + SemOptimizerNLopt(; + algorithm = :LD_LBFGS, + options = Dict{Symbol, Any}(), + local_algorithm = nothing, + local_options = Dict{Symbol, Any}(), + equality_constraints = Vector{NLoptConstraint}(), + inequality_constraints = Vector{NLoptConstraint}(), + kwargs...) + +# Arguments +- `algorithm`: optimization algorithm. +- `options::Dict{Symbol, Any}`: options for the optimization algorithm +- `local_algorithm`: local optimization algorithm +- `local_options::Dict{Symbol, Any}`: options for the local optimization algorithm +- `equality_constraints::Vector{NLoptConstraint}`: vector of equality constraints +- `inequality_constraints::Vector{NLoptConstraint}`: vector of inequality constraints + +# Example +```julia +my_optimizer = SemOptimizerNLopt() + +# constrained optimization with augmented lagrangian +my_constrained_optimizer = SemOptimizerNLopt(; + algorithm = :AUGLAG, + local_algorithm = :LD_LBFGS, + local_options = Dict(:ftol_rel => 1e-6), + inequality_constraints = NLoptConstraint(;f = my_constraint, tol = 0.0), +) +``` + +# Usage +All algorithms and options from the NLopt library are available, for more information see +the NLopt.jl package and the NLopt online documentation. +For information on how to use inequality and equality constraints, +see [Constrained optimization](@ref) in our online documentation. + +# Extended help + +## Interfaces +- `algorithm(::SemOptimizerNLopt)` +- `local_algorithm(::SemOptimizerNLopt)` +- `options(::SemOptimizerNLopt)` +- `local_options(::SemOptimizerNLopt)` +- `equality_constraints(::SemOptimizerNLopt)` +- `inequality_constraints(::SemOptimizerNLopt)` + +## Implementation + +Subtype of `SemOptimizer`. +""" +struct SemOptimizerNLopt{A, A2, B, B2, C} <: SemOptimizer{:NLopt} + algorithm::A + local_algorithm::A2 + options::B + local_options::B2 + equality_constraints::C + inequality_constraints::C +end + Base.@kwdef struct NLoptConstraint f::Any tol = 0.0 diff --git a/ext/SEMProximalOptExt/ProximalAlgorithms.jl b/ext/SEMProximalOptExt/ProximalAlgorithms.jl index 0d4748e3..438665b5 100644 --- a/ext/SEMProximalOptExt/ProximalAlgorithms.jl +++ b/ext/SEMProximalOptExt/ProximalAlgorithms.jl @@ -1,3 +1,28 @@ +############################################################################################ +### Types +############################################################################################ +""" +Connects to `ProximalAlgorithms.jl` as the optimization backend. + +# Constructor + + SemOptimizerProximal(; + algorithm = ProximalAlgorithms.PANOC(), + operator_g, + operator_h = nothing, + kwargs..., + +# Arguments +- `algorithm`: optimization algorithm. +- `operator_g`: gradient of the objective function +- `operator_h`: optional hessian of the objective function +""" +mutable struct SemOptimizerProximal{A, B, C} <: SemOptimizer{:Proximal} + algorithm::A + operator_g::B + operator_h::C +end + SEM.SemOptimizer{:Proximal}(args...; kwargs...) = SemOptimizerProximal(args...; kwargs...) SemOptimizerProximal(; diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index cfa94062..84456595 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -86,9 +86,6 @@ include("frontend/fit/fitmeasures/fit_measures.jl") # standard errors include("frontend/fit/standard_errors/hessian.jl") include("frontend/fit/standard_errors/bootstrap.jl") -# extensions -include("package_extensions/SEMNLOptExt.jl") -include("package_extensions/SEMProximalOptExt.jl") export AbstractSem, AbstractSemSingle, @@ -196,7 +193,5 @@ export AbstractSem, →, ←, ↔, - ⇔, - SemOptimizerNLopt, - SemOptimizerProximal + ⇔ end diff --git a/src/package_extensions/SEMNLOptExt.jl b/src/package_extensions/SEMNLOptExt.jl deleted file mode 100644 index 69721ac9..00000000 --- a/src/package_extensions/SEMNLOptExt.jl +++ /dev/null @@ -1,69 +0,0 @@ -""" -Connects to `NLopt.jl` as the optimization backend. -Only usable if `NLopt.jl` is loaded in the current Julia session! - -# Constructor - - SemOptimizerNLopt(; - algorithm = :LD_LBFGS, - options = Dict{Symbol, Any}(), - local_algorithm = nothing, - local_options = Dict{Symbol, Any}(), - equality_constraints = Vector{NLoptConstraint}(), - inequality_constraints = Vector{NLoptConstraint}(), - kwargs...) - -# Arguments -- `algorithm`: optimization algorithm. -- `options::Dict{Symbol, Any}`: options for the optimization algorithm -- `local_algorithm`: local optimization algorithm -- `local_options::Dict{Symbol, Any}`: options for the local optimization algorithm -- `equality_constraints::Vector{NLoptConstraint}`: vector of equality constraints -- `inequality_constraints::Vector{NLoptConstraint}`: vector of inequality constraints - -# Example -```julia -my_optimizer = SemOptimizerNLopt() - -# constrained optimization with augmented lagrangian -my_constrained_optimizer = SemOptimizerNLopt(; - algorithm = :AUGLAG, - local_algorithm = :LD_LBFGS, - local_options = Dict(:ftol_rel => 1e-6), - inequality_constraints = NLoptConstraint(;f = my_constraint, tol = 0.0), -) -``` - -# Usage -All algorithms and options from the NLopt library are available, for more information see -the NLopt.jl package and the NLopt online documentation. -For information on how to use inequality and equality constraints, -see [Constrained optimization](@ref) in our online documentation. - -# Extended help - -## Interfaces -- `algorithm(::SemOptimizerNLopt)` -- `local_algorithm(::SemOptimizerNLopt)` -- `options(::SemOptimizerNLopt)` -- `local_options(::SemOptimizerNLopt)` -- `equality_constraints(::SemOptimizerNLopt)` -- `inequality_constraints(::SemOptimizerNLopt)` - -## Implementation - -Subtype of `SemOptimizer`. -""" -struct SemOptimizerNLopt{A, A2, B, B2, C} <: SemOptimizer{:NLopt} - algorithm::A - local_algorithm::A2 - options::B - local_options::B2 - equality_constraints::C - inequality_constraints::C -end - -Base.@kwdef struct NLoptConstraint - f::Any - tol = 0.0 -end diff --git a/src/package_extensions/SEMProximalOptExt.jl b/src/package_extensions/SEMProximalOptExt.jl deleted file mode 100644 index 5d400750..00000000 --- a/src/package_extensions/SEMProximalOptExt.jl +++ /dev/null @@ -1,21 +0,0 @@ -""" -Connects to `ProximalAlgorithms.jl` as the optimization backend. - -# Constructor - - SemOptimizerProximal(; - algorithm = ProximalAlgorithms.PANOC(), - operator_g, - operator_h = nothing, - kwargs..., - -# Arguments -- `algorithm`: optimization algorithm. -- `operator_g`: gradient of the objective function -- `operator_h`: optional hessian of the objective function -""" -mutable struct SemOptimizerProximal{A, B, C} <: SemOptimizer{:Proximal} - algorithm::A - operator_g::B - operator_h::C -end From c3c5a82bbad2f8277ca0e04211e2539a87baaded Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 21 Mar 2025 21:18:02 -0700 Subject: [PATCH 15/20] types.jl: move SemOptimizer API into abstract.jl --- src/optimizer/abstract.jl | 17 +++++++++++++++++ src/types.jl | 11 ----------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/optimizer/abstract.jl b/src/optimizer/abstract.jl index 2487b7c5..6d27f784 100644 --- a/src/optimizer/abstract.jl +++ b/src/optimizer/abstract.jl @@ -1,3 +1,20 @@ +engine(::Type{SemOptimizer{E}}) where {E} = E +engine(optimizer::SemOptimizer) = engine(typeof(optimizer)) + +SemOptimizer(args...; engine::Symbol = :Optim, kwargs...) = + SemOptimizer{engine}(args...; kwargs...) + +# fallback optimizer constructor +function SemOptimizer{E}(args...; kwargs...) where {E} + if E == :NLOpt + error("$E optimizer requires \"using NLopt\".") + elseif E == :Proximal + error("$E optimizer requires \"using ProximalAlgorithms\".") + else + error("$E optimizer is not supported.") + end +end + """ fit([optim::SemOptimizer], model::AbstractSem; [engine::Symbol], start_val = start_val, kwargs...) diff --git a/src/types.jl b/src/types.jl index 64a4acba..81355808 100644 --- a/src/types.jl +++ b/src/types.jl @@ -86,17 +86,6 @@ If you want to connect the SEM package to a new optimization backend, you should """ abstract type SemOptimizer{E} end -engine(::Type{SemOptimizer{E}}) where {E} = E -engine(optimizer::SemOptimizer) = engine(typeof(optimizer)) - -SemOptimizer(args...; engine::Symbol = :Optim, kwargs...) = - SemOptimizer{engine}(args...; kwargs...) - -# fallback optimizer constructor -function SemOptimizer{E}(args...; kwargs...) where {E} - throw(ErrorException("$E optimizer is not supported.")) -end - """ Supertype of all objects that can serve as the observed field of a SEM. Pre-processes data and computes sufficient statistics for example. From 85fa10b5a4673d14e081ee037f3eb21802030aca Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 21 Mar 2025 23:25:57 -0700 Subject: [PATCH 16/20] NLoptResult should not be mutable --- ext/SEMNLOptExt/NLopt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/SEMNLOptExt/NLopt.jl b/ext/SEMNLOptExt/NLopt.jl index 32a928d2..7471dcf4 100644 --- a/ext/SEMNLOptExt/NLopt.jl +++ b/ext/SEMNLOptExt/NLopt.jl @@ -123,7 +123,7 @@ local_options(optimizer::SemOptimizerNLopt) = optimizer.local_options equality_constraints(optimizer::SemOptimizerNLopt) = optimizer.equality_constraints inequality_constraints(optimizer::SemOptimizerNLopt) = optimizer.inequality_constraints -mutable struct NLoptResult +struct NLoptResult result::Any problem::Any end From 28ff7c47bfbee7aab1b83699f8cdd95c248e5922 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 21 Mar 2025 23:32:15 -0700 Subject: [PATCH 17/20] SemNLOpt: use f or f => tol pair for constraints It is a simple and intuitive syntax and avoids declaring new types. Also allow specifying default constraint tolerance as `constraint_tol`. --- docs/src/tutorials/constraints/constraints.md | 6 +- ext/SEMNLOptExt/NLopt.jl | 108 +++++++++--------- ext/SEMNLOptExt/SEMNLOptExt.jl | 2 +- .../political_democracy/constraints.jl | 4 +- 4 files changed, 61 insertions(+), 59 deletions(-) diff --git a/docs/src/tutorials/constraints/constraints.md b/docs/src/tutorials/constraints/constraints.md index 2d8b7062..930910c6 100644 --- a/docs/src/tutorials/constraints/constraints.md +++ b/docs/src/tutorials/constraints/constraints.md @@ -1,6 +1,6 @@ # Constrained optimization -## Using the NLopt backend +## Using the NLopt engine ### Define an example model @@ -128,8 +128,8 @@ constrained_optimizer = SemOptimizerNLopt( algorithm = :AUGLAG, options = Dict(:upper_bounds => upper_bounds, :xtol_abs => 1e-4), local_algorithm = :LD_LBFGS, - equality_constraints = NLoptConstraint(;f = eq_constraint, tol = 1e-8), - inequality_constraints = NLoptConstraint(;f = ineq_constraint, tol = 1e-8), + equality_constraints = (eq_constraint => 1e-8), + inequality_constraints = (ineq_constraint => 1e-8), ) ``` diff --git a/ext/SEMNLOptExt/NLopt.jl b/ext/SEMNLOptExt/NLopt.jl index 7471dcf4..49277b8c 100644 --- a/ext/SEMNLOptExt/NLopt.jl +++ b/ext/SEMNLOptExt/NLopt.jl @@ -1,6 +1,9 @@ ############################################################################################ ### Types ############################################################################################ + +const NLoptConstraint = Pair{Any, Number} + """ Connects to `NLopt.jl` as the optimization backend. @@ -11,8 +14,9 @@ Connects to `NLopt.jl` as the optimization backend. options = Dict{Symbol, Any}(), local_algorithm = nothing, local_options = Dict{Symbol, Any}(), - equality_constraints = Vector{NLoptConstraint}(), - inequality_constraints = Vector{NLoptConstraint}(), + equality_constraints = nothing, + inequality_constraints = nothing, + constraint_tol::Number = 0.0, kwargs...) # Arguments @@ -20,19 +24,32 @@ Connects to `NLopt.jl` as the optimization backend. - `options::Dict{Symbol, Any}`: options for the optimization algorithm - `local_algorithm`: local optimization algorithm - `local_options::Dict{Symbol, Any}`: options for the local optimization algorithm -- `equality_constraints::Vector{NLoptConstraint}`: vector of equality constraints -- `inequality_constraints::Vector{NLoptConstraint}`: vector of inequality constraints +- `equality_constraints: optional equality constraints +- `inequality_constraints:: optional inequality constraints +- `constraint_tol::Number`: default tolerance for constraints + +## Constraints specification + +Equality and inequality constraints arguments could be a single constraint or any +iterable constraints container (e.g. vector or tuple). +Each constraint could be a function or any other callable object that +takes the two input arguments: + - the vector of the model parameters; + - the array for the in-place calculation of the constraint gradient. +To override the default tolerance, the constraint could be specified +as a pair of the function and its tolerance: `constraint_func => tol`. # Example ```julia -my_optimizer = SemOptimizerNLopt() +my_optimizer = SemOptimizer(engine = :NLopt) # constrained optimization with augmented lagrangian -my_constrained_optimizer = SemOptimizerNLopt(; +my_constrained_optimizer = SemOptimizer(; + engine = :NLopt, algorithm = :AUGLAG, local_algorithm = :LD_LBFGS, local_options = Dict(:ftol_rel => 1e-6), - inequality_constraints = NLoptConstraint(;f = my_constraint, tol = 0.0), + inequality_constraints = (my_constraint => tol), ) ``` @@ -56,25 +73,15 @@ see [Constrained optimization](@ref) in our online documentation. Subtype of `SemOptimizer`. """ -struct SemOptimizerNLopt{A, A2, B, B2, C} <: SemOptimizer{:NLopt} - algorithm::A - local_algorithm::A2 - options::B - local_options::B2 - equality_constraints::C - inequality_constraints::C +struct SemOptimizerNLopt <: SemOptimizer{:NLopt} + algorithm::Symbol + local_algorithm::Union{Symbol, Nothing} + options::Dict{Symbol, Any} + local_options::Dict{Symbol, Any} + equality_constraints::Vector{NLoptConstraint} + inequality_constraints::Vector{NLoptConstraint} end -Base.@kwdef struct NLoptConstraint - f::Any - tol = 0.0 -end - -Base.convert( - ::Type{NLoptConstraint}, - tuple::NamedTuple{(:f, :tol), Tuple{F, T}}, -) where {F, T} = NLoptConstraint(tuple.f, tuple.tol) - ############################################################################################ ### Constructor ############################################################################################ @@ -84,22 +91,26 @@ function SemOptimizerNLopt(; local_algorithm = nothing, options = Dict{Symbol, Any}(), local_options = Dict{Symbol, Any}(), - equality_constraints = Vector{NLoptConstraint}(), - inequality_constraints = Vector{NLoptConstraint}(), - kwargs..., + equality_constraints = nothing, + inequality_constraints = nothing, + constraint_tol::Number = 0.0, + kwargs..., # FIXME remove the sink for unused kwargs ) - applicable(iterate, equality_constraints) && !isa(equality_constraints, NamedTuple) || - (equality_constraints = [equality_constraints]) - applicable(iterate, inequality_constraints) && - !isa(inequality_constraints, NamedTuple) || - (inequality_constraints = [inequality_constraints]) + constraint(f::Any) = f => constraint_tol + constraint(f_and_tol::Pair) = f_and_tol + + constraints(::Nothing) = Vector{NLoptConstraint}() + constraints(constraints) = + applicable(iterate, constraints) && !isa(constraints, Pair) ? + [constraint(constr) for constr in constraints] : [constraint(constraints)] + return SemOptimizerNLopt( algorithm, local_algorithm, options, local_options, - convert.(NLoptConstraint, equality_constraints), - convert.(NLoptConstraint, inequality_constraints), + constraints(equality_constraints), + constraints(inequality_constraints), ) end @@ -150,10 +161,7 @@ function SEM.fit( start_params::AbstractVector; kwargs..., ) - - # construct the NLopt problem - opt = construct_NLopt_problem(optim.algorithm, optim.options, length(start_params)) - set_NLopt_constraints!(opt, optim) + opt = construct_NLopt(optim.algorithm, optim.options, nparams(model)) opt.min_objective = (par, G) -> SEM.evaluate!( zero(eltype(par)), @@ -162,13 +170,16 @@ function SEM.fit( model, par, ) + for (f, tol) in optim.inequality_constraints + inequality_constraint!(opt, f, tol) + end + for (f, tol) in optim.equality_constraints + equality_constraint!(opt, f, tol) + end if !isnothing(optim.local_algorithm) - opt_local = construct_NLopt_problem( - optim.local_algorithm, - optim.local_options, - length(start_params), - ) + opt_local = + construct_NLopt(optim.local_algorithm, optim.local_options, nparams(model)) opt.local_optimizer = opt_local end @@ -182,7 +193,7 @@ end ### additional functions ############################################################################################ -function construct_NLopt_problem(algorithm, options, npar) +function construct_NLopt(algorithm, options, npar) opt = Opt(algorithm, npar) for (key, val) in pairs(options) @@ -192,15 +203,6 @@ function construct_NLopt_problem(algorithm, options, npar) return opt end -function set_NLopt_constraints!(opt::Opt, optimizer::SemOptimizerNLopt) - for con in optimizer.inequality_constraints - inequality_constraint!(opt, con.f, con.tol) - end - for con in optimizer.equality_constraints - equality_constraint!(opt, con.f, con.tol) - end -end - ############################################################################################ # pretty printing ############################################################################################ diff --git a/ext/SEMNLOptExt/SEMNLOptExt.jl b/ext/SEMNLOptExt/SEMNLOptExt.jl index a159f6dc..61c41338 100644 --- a/ext/SEMNLOptExt/SEMNLOptExt.jl +++ b/ext/SEMNLOptExt/SEMNLOptExt.jl @@ -4,7 +4,7 @@ using StructuralEquationModels, NLopt SEM = StructuralEquationModels -export SemOptimizerNLopt, NLoptConstraint +export SemOptimizerNLopt include("NLopt.jl") diff --git a/test/examples/political_democracy/constraints.jl b/test/examples/political_democracy/constraints.jl index cc1b0874..7a6670fa 100644 --- a/test/examples/political_democracy/constraints.jl +++ b/test/examples/political_democracy/constraints.jl @@ -26,8 +26,8 @@ constrained_optimizer = SemOptimizer(; algorithm = :AUGLAG, local_algorithm = :LD_LBFGS, options = Dict(:xtol_rel => 1e-4), - # equality_constraints = (f = eq_constraint, tol = 1e-14), - inequality_constraints = (f = ineq_constraint, tol = 0.0), + # equality_constraints = (eq_constraint => 1e-14), + inequality_constraints = (ineq_constraint => 0.0), ) @test constrained_optimizer isa SemOptimizer{:NLopt} From c1905040894fab4295fe159919927c49a87694bc Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 21 Mar 2025 23:34:22 -0700 Subject: [PATCH 18/20] NLopt: update/simplify docs use SemOptimizer(engine = :NLopt) instead of SemOptimizerNLopt() as this is a more universal scheme --- docs/src/tutorials/backends/nlopt.md | 34 ++++++------------- docs/src/tutorials/constraints/constraints.md | 23 ++++++------- 2 files changed, 22 insertions(+), 35 deletions(-) diff --git a/docs/src/tutorials/backends/nlopt.md b/docs/src/tutorials/backends/nlopt.md index 2afa5e54..840f3992 100644 --- a/docs/src/tutorials/backends/nlopt.md +++ b/docs/src/tutorials/backends/nlopt.md @@ -1,31 +1,21 @@ # Using NLopt.jl -[`SemOptimizerNLopt`](@ref) implements the connection to `NLopt.jl`. -It is only available if the `NLopt` package is loaded alongside `StructuralEquationModel.jl` in the running Julia session. -It takes a bunch of arguments: +When [`NLopt.jl`](https://github.com/jump-dev/NLopt.jl) is loaded in the running Julia session, +it could be used by the [`SemOptimizer`](@ref) by specifying `engine = :NLopt` +(see [NLopt-specific options](@ref `SemOptimizerNLopt`)). +Among other things, `NLopt` enables constrained optimization of the SEM models, which is +explained in the [Constrained optimization](@ref) section. -```julia - • algorithm: optimization algorithm - - • options::Dict{Symbol, Any}: options for the optimization algorithm - - • local_algorithm: local optimization algorithm - - • local_options::Dict{Symbol, Any}: options for the local optimization algorithm - - • equality_constraints::Vector{NLoptConstraint}: vector of equality constraints - - • inequality_constraints::Vector{NLoptConstraint}: vector of inequality constraints -``` -Constraints are explained in the section on [Constrained optimization](@ref). - -The defaults are LBFGS as the optimization algorithm and the standard options from `NLopt.jl`. -We can choose something different: +We can override the default *NLopt* algorithm (LFBGS) and instead use +the *augmented lagrangian* method with LBFGS as the *local* optimization algorithm, +stop at a maximum of 200 evaluations and use a relative tolerance of +the objective value of `1e-6` as the stopping criterion for the local algorithm: ```julia using NLopt -my_optimizer = SemOptimizerNLopt(; +my_optimizer = SemOptimizer(; + engine = :NLopt, algorithm = :AUGLAG, options = Dict(:maxeval => 200), local_algorithm = :LD_LBFGS, @@ -33,8 +23,6 @@ my_optimizer = SemOptimizerNLopt(; ) ``` -This uses an augmented lagrangian method with LBFGS as the local optimization algorithm, stops at a maximum of 200 evaluations and uses a relative tolerance of the objective value of `1e-6` as the stopping criterion for the local algorithm. - To see how to use the optimizer to actually fit a model now, check out the [Model fitting](@ref) section. In the NLopt docs, you can find explanations about the different [algorithms](https://nlopt.readthedocs.io/en/latest/NLopt_Algorithms/) and a [tutorial](https://nlopt.readthedocs.io/en/latest/NLopt_Introduction/) that also explains the different options. diff --git a/docs/src/tutorials/constraints/constraints.md b/docs/src/tutorials/constraints/constraints.md index 930910c6..69a8c07d 100644 --- a/docs/src/tutorials/constraints/constraints.md +++ b/docs/src/tutorials/constraints/constraints.md @@ -64,7 +64,8 @@ Let's introduce some constraints: (Of course those constaints only serve an illustratory purpose.) -We first need to get the indices of the respective parameters that are invoved in the constraints. +To fit the SEM model with the functional constraints, we will use the *NLopt* optimization engine. +Since *NLopt* does not have access to the SEM parameter names, we have to lookup the indices of the respective parameters that are invoved in the constraints. We can look up their labels in the output above, and retrieve their indices as ```@example constraints @@ -115,16 +116,17 @@ If the algorithm needs gradients at an iteration, it will pass the vector `gradi With `if length(gradient) > 0` we check if the algorithm needs gradients, and if it does, we fill the `gradient` vector with the gradients of the constraint w.r.t. the parameters. -In NLopt, vector-valued constraints are also possible, but we refer to the documentation for that. +In *NLopt*, vector-valued constraints are also possible, but we refer to the documentation for that. ### Fit the model -We now have everything together to specify and fit our model. First, we specify our optimizer backend as +Now we can construct the *SemOptimizer* that will use the *NLopt* engine for constrained optimization. ```@example constraints using NLopt -constrained_optimizer = SemOptimizerNLopt( +constrained_optimizer = SemOptimizer( + engine = :NLopt, algorithm = :AUGLAG, options = Dict(:upper_bounds => upper_bounds, :xtol_abs => 1e-4), local_algorithm = :LD_LBFGS, @@ -133,7 +135,7 @@ constrained_optimizer = SemOptimizerNLopt( ) ``` -As you see, the equality constraints and inequality constraints are passed as keyword arguments, and the bounds are passed as options for the (outer) optimization algorithm. +As you see, the equality and inequality constraints are passed as keyword arguments, and the bounds are passed as options for the (outer) optimization algorithm. Additionally, for equality and inequality constraints, a feasibility tolerance can be specified that controls if a solution can be accepted, even if it violates the constraints by a small amount. Especially for equality constraints, it is recommended to allow for a small positive tolerance. In this example, we set both tolerances to `1e-8`. @@ -141,19 +143,16 @@ In this example, we set both tolerances to `1e-8`. !!! warning "Convergence criteria" We have often observed that the default convergence criteria in NLopt lead to non-convergence flags. Indeed, this example does not convergence with default criteria. - As you see above, we used a realively liberal absolute tolerance in the optimization parameters of 1e-4. + As you see above, we used a relatively liberal absolute tolerance in the optimization parameters of 1e-4. This should not be a problem in most cases, as the sampling variance in (almost all) structural equation models should lead to uncertainty in the parameter estimates that are orders of magnitude larger. We nontheless recommend choosing a convergence criterion with care (i.e. w.r.t. the scale of your parameters), inspecting the solutions for plausibility, and comparing them to unconstrained solutions. -```@example constraints -model_constrained = Sem( - specification = partable, - data = data -) +We now have everything to fit our model under constraints: -model_fit_constrained = fit(constrained_optimizer, model_constrained) +```@example constraints +model_fit_constrained = fit(constrained_optimizer, model) ``` As you can see, the optimizer converged (`:XTOL_REACHED`) and investigating the solution yields From 134e36e8ff105d47e3efcae15d7eab6b0674477b Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 21 Mar 2025 23:35:11 -0700 Subject: [PATCH 19/20] Optim.md: SemOptimizerOptim => SemOptiimizer --- docs/src/tutorials/backends/optim.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/src/tutorials/backends/optim.md b/docs/src/tutorials/backends/optim.md index cf287e77..a16537ec 100644 --- a/docs/src/tutorials/backends/optim.md +++ b/docs/src/tutorials/backends/optim.md @@ -1,23 +1,23 @@ # Using Optim.jl -[`SemOptimizerOptim`](@ref) implements the connection to `Optim.jl`. -It takes two arguments, `algorithm` and `options`. -The defaults are LBFGS as the optimization algorithm and the standard options from `Optim.jl`. -We can load the `Optim` and `LineSearches` packages to choose something different: +[Optim.jl](https://github.com/JuliaNLSolvers/Optim.jl) is the default optimization engine of *SEM.jl*, +see [`SemOptimizerOptim`](@ref) for a full list of its parameters. +It defaults to the LBFGS optimization, but we can load the `Optim` and `LineSearches` packages +and specify BFGS (!not L-BFGS) with a back-tracking linesearch and Hager-Zhang initial step length guess: ```julia using Optim, LineSearches -my_optimizer = SemOptimizerOptim( +my_optimizer = SemOptimizer( algorithm = BFGS( - linesearch = BackTracking(order=3), + linesearch = BackTracking(order=3), alphaguess = InitialHagerZhang() - ), - options = Optim.Options(show_trace = true) - ) + ), + options = Optim.Options(show_trace = true) +) ``` -This optimizer will use BFGS (!not L-BFGS) with a back tracking linesearch and a certain initial step length guess. Also, the trace of the optimization will be printed to the console. +Note that we used `options` to print the optimization progress to the console. To see how to use the optimizer to actually fit a model now, check out the [Model fitting](@ref) section. From 0c7d594fd8a4a821855c6d39b242e9f89d84b552 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Fri, 21 Mar 2025 23:37:20 -0700 Subject: [PATCH 20/20] regulariz.md: SemOptimProx => SemOptim --- .../regularization/regularization.md | 47 +++++++------------ 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/docs/src/tutorials/regularization/regularization.md b/docs/src/tutorials/regularization/regularization.md index a22601b9..e9655b41 100644 --- a/docs/src/tutorials/regularization/regularization.md +++ b/docs/src/tutorials/regularization/regularization.md @@ -5,7 +5,10 @@ For ridge regularization, you can simply use `SemRidge` as an additional loss function (for example, a model with the loss functions `SemML` and `SemRidge` corresponds to ridge-regularized maximum likelihood estimation). -For lasso, elastic net and (far) beyond, you can load the `ProximalAlgorithms.jl` and `ProximalOperators.jl` packages alongside `StructuralEquationModels`: +For lasso, elastic net and (far) beyond, you can use the [`ProximalOperators.jl`](https://github.com/JuliaFirstOrder/ProximalOperators.jl) +and optimize the model with [`ProximalAlgorithms.jl`](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl) +that provides so-called *proximal optimization* algorithms. +It can handle, amongst other things, various forms of regularization. ```@setup reg using StructuralEquationModels, ProximalAlgorithms, ProximalOperators @@ -19,16 +22,14 @@ Pkg.add("ProximalOperators") using StructuralEquationModels, ProximalAlgorithms, ProximalOperators ``` -## `SemOptimizerProximal` +## Proximal optimization -To estimate regularized models, we provide a "building block" for the optimizer part, called `SemOptimizerProximal`. -It connects our package to the [`ProximalAlgorithms.jl`](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl) optimization backend, providing so-called proximal optimization algorithms. -Those can handle, amongst other things, various forms of regularization. - -It can be used as +With *ProximalAlgorithms* package loaded, it is now possible to use `:Proximal` optimization engine +in `SemOptimizer` for estimating regularized models. ```julia -SemOptimizerProximal( +SemOptimizer(; + engine = :Proximal, algorithm = ProximalAlgorithms.PANOC(), options = Dict{Symbol, Any}(), operator_g, @@ -37,7 +38,7 @@ SemOptimizerProximal( ``` The proximal operator (aka the regularization function) can be passed as `operator_g`, available options are listed [here](https://juliafirstorder.github.io/ProximalOperators.jl/stable/functions/). -The available Algorithms are listed [here](https://juliafirstorder.github.io/ProximalAlgorithms.jl/stable/guide/implemented_algorithms/). +The available algorithms are listed [here](https://juliafirstorder.github.io/ProximalAlgorithms.jl/stable/guide/implemented_algorithms/). ## First example - lasso @@ -101,26 +102,18 @@ From the previously linked [documentation](https://juliafirstorder.github.io/Pro ```@example reg λ = zeros(31); λ[ind] .= 0.02 -``` - -and use `SemOptimizerProximal`. -```@example reg -optimizer_lasso = SemOptimizerProximal( +optimizer_lasso = SemOptimizer( + engine = :Proximal, operator_g = NormL1(λ) ) - -model_lasso = Sem( - specification = partable, - data = data -) ``` Let's fit the regularized model ```@example reg -fit_lasso = fit(optimizer_lasso, model_lasso) +fit_lasso = fit(optimizer_lasso, model) ``` and compare the solution to unregularizted estimates: @@ -135,7 +128,8 @@ update_partable!(partable, :estimate_lasso, param_labels(fit_lasso), solution(fi details(partable) ``` -Instead of explicitely defining a `SemOptimizerProximal` object, you can also pass `engine = :Proximal` and additional keyword arguments to `fit`: +Instead of explicitly defining a `SemOptimizer` object, you can also pass `engine = :Proximal` +and additional keyword arguments directly to the `fit` function: ```@example reg fit = fit(model; engine = :Proximal, operator_g = NormL1(λ)) @@ -144,25 +138,20 @@ fit = fit(model; engine = :Proximal, operator_g = NormL1(λ)) ## Second example - mixed l1 and l0 regularization You can choose to penalize different parameters with different types of regularization functions. -Let's use the lasso again on the covariances, but additionally penalyze the error variances of the observed items via l0 regularization. +Let's use the lasso again on the covariances, but additionally penalize the error variances of the observed items via l0 regularization. The l0 penalty is defined as ```math \lambda \mathrm{nnz}(\theta) ``` -To define a sup of separable proximal operators (i.e. no parameter is penalized twice), +To define a sum of separable proximal operators (i.e. no parameter is penalized twice), we can use [`SlicedSeparableSum`](https://juliafirstorder.github.io/ProximalOperators.jl/stable/calculus/#ProximalOperators.SlicedSeparableSum) from the `ProximalOperators` package: ```@example reg prox_operator = SlicedSeparableSum((NormL0(20.0), NormL1(0.02), NormL0(0.0)), ([ind], [9:11], [vcat(1:8, 12:25)])) -model_mixed = Sem( - specification = partable, - data = data, -) - -fit_mixed = fit(model_mixed; engine = :Proximal, operator_g = prox_operator) +fit_mixed = fit(model; engine = :Proximal, operator_g = prox_operator) ``` Let's again compare the different results: