Skip to content

Commit 334683a

Browse files
Merge pull request #36 from SciNim/addCurveFitUncertainties
add chi2 + add uncertainties to levmarq
2 parents 9d2f6ab + fd4cd18 commit 334683a

File tree

5 files changed

+206
-14
lines changed

5 files changed

+206
-14
lines changed

numericalnim.nimble

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Package Information
2-
version = "0.8.5"
2+
version = "0.8.6"
33
author = "Hugo Granström"
44
description = "A collection of numerical methods written in Nim. Current features: integration, ode, optimization."
55
license = "MIT"

src/numericalnim/optimize.nim

Lines changed: 150 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import std/[strformat, sequtils, math, deques]
22
import arraymancer
3-
import ./differentiate
3+
import
4+
./differentiate,
5+
./utils
46

57
when not defined(nimHasEffectsOf):
68
{.pragma: effectsOf.}
@@ -126,9 +128,9 @@ proc secant*(f: proc(x: float64): float64, start: array[2, float64], precision:
126128
raise newException(ArithmeticError, "Maximum iterations for Secant method exceeded")
127129
return xCurrent
128130

129-
##############################
130-
## Multidimensional methods ##
131-
##############################
131+
# ######################## #
132+
# Multidimensional methods #
133+
# ######################## #
132134

133135
type LineSearchCriterion* = enum
134136
Armijo, Wolfe, WolfeStrong, NoLineSearch
@@ -146,36 +148,71 @@ type
146148
LBFGSOptions*[U] = object
147149
savedIterations*: int
148150

149-
proc optimOptions*[U](tol: U = U(1e-6), alpha: U = U(1), lambda0: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
151+
proc optimOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
152+
## Returns a vanilla OptimOptions
153+
## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
154+
## - alpha: The step size.
155+
## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used.
156+
## Else a more accurate but slowe second order finite difference scheme will be used.
157+
## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
158+
## - lineSearchCriterion: Which line search method to use.
150159
result.tol = tol
151160
result.alpha = alpha
152-
result.lambda0 = lambda0
153161
result.fastMode = fastMode
154162
result.maxIterations = maxIterations
155163
result.lineSearchCriterion = lineSearchCriterion
156164

157165
proc steepestDescentOptions*[U](tol: U = U(1e-6), alpha: U = U(0.001), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
166+
## Returns a Steepest Descent OptimOptions
167+
## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
168+
## - alpha: The step size.
169+
## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used.
170+
## Else a more accurate but slowe second order finite difference scheme will be used.
171+
## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
172+
## - lineSearchCriterion: Which line search method to use.
158173
result.tol = tol
159174
result.alpha = alpha
160175
result.fastMode = fastMode
161176
result.maxIterations = maxIterations
162177
result.lineSearchCriterion = lineSearchCriterion
163178

164179
proc newtonOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
180+
## Returns a Newton OptimOptions
181+
## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
182+
## - alpha: The step size.
183+
## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used.
184+
## Else a more accurate but slowe second order finite difference scheme will be used.
185+
## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
186+
## - lineSearchCriterion: Which line search method to use.
165187
result.tol = tol
166188
result.alpha = alpha
167189
result.fastMode = fastMode
168190
result.maxIterations = maxIterations
169191
result.lineSearchCriterion = lineSearchCriterion
170192

171193
proc bfgsOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
194+
## Returns a BFGS OptimOptions
195+
## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
196+
## - alpha: The step size.
197+
## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used.
198+
## Else a more accurate but slowe second order finite difference scheme will be used.
199+
## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
200+
## - lineSearchCriterion: Which line search method to use.
172201
result.tol = tol
173202
result.alpha = alpha
174203
result.fastMode = fastMode
175204
result.maxIterations = maxIterations
176205
result.lineSearchCriterion = lineSearchCriterion
177206

178207
proc lbfgsOptions*[U](savedIterations: int = 10, tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, LBFGSOptions[U]] =
208+
## Returns a LBFGS OptimOptions
209+
## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
210+
## - alpha: The step size.
211+
## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used.
212+
## Else a more accurate but slowe second order finite difference scheme will be used.
213+
## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
214+
## - lineSearchCriterion: Which line search method to use.
215+
## - savedIterations: Number of past iterations to save. The higher the value, the better but slower steps.
179216
result.tol = tol
180217
result.alpha = alpha
181218
result.fastMode = fastMode
@@ -184,6 +221,14 @@ proc lbfgsOptions*[U](savedIterations: int = 10, tol: U = U(1e-6), alpha: U = U(
184221
result.algoOptions.savedIterations = savedIterations
185222

186223
proc levmarqOptions*[U](lambda0: U = U(1), tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, LevMarqOptions[U]] =
224+
## Returns a levmarq OptimOptions
225+
## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
226+
## - alpha: The step size.
227+
## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used.
228+
## Else a more accurate but slowe second order finite difference scheme will be used.
229+
## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
230+
## - lineSearchCriterion: Which line search method to use.
231+
## - lambda0: Starting value of dampening parameter
187232
result.tol = tol
188233
result.alpha = alpha
189234
result.fastMode = fastMode
@@ -253,7 +298,15 @@ template analyticOrNumericGradient(analytic, f, x, options: untyped): untyped =
253298
analytic(x)
254299

255300
proc steepestDescent*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = steepestDescentOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
256-
## Minimize scalar-valued function f.
301+
## Steepest descent method for optimization.
302+
##
303+
## Inputs:
304+
## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar.
305+
## - options: Options object (see `steepestDescentOptions` for constructing one)
306+
## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead.
307+
##
308+
## Returns:
309+
## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
257310
var alpha = options.alpha
258311
var x = x0.clone()
259312
var fNorm = abs(f(x0))
@@ -275,6 +328,15 @@ proc steepestDescent*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U],
275328
result = x
276329

277330
proc newton*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = newtonOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
331+
## Newton's method for optimization.
332+
##
333+
## Inputs:
334+
## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar.
335+
## - options: Options object (see `newtonOptions` for constructing one)
336+
## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead.
337+
##
338+
## Returns:
339+
## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
278340
var alpha = options.alpha
279341
var x = x0.clone()
280342
var fNorm = abs(f(x))
@@ -342,6 +404,15 @@ proc bfgs_old*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha:
342404
result = x
343405

344406
proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = bfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
407+
## BFGS (Broyden–Fletcher–Goldfarb–Shanno) method for optimization.
408+
##
409+
## Inputs:
410+
## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar.
411+
## - options: Options object (see `bfgsOptions` for constructing one)
412+
## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead.
413+
##
414+
## Returns:
415+
## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
345416
# Use gemm and gemv with preallocated Tensors and setting beta = 0
346417
var alpha = options.alpha
347418
var x = x0.clone()
@@ -421,15 +492,24 @@ proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: O
421492
#echo iters, " iterations done!"
422493
result = x
423494

424-
proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], m: int = 10, options: OptimOptions[U, LBFGSOptions[U]] = lbfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
495+
proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, LBFGSOptions[U]] = lbfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
496+
## LBFGS (Limited-memory Broyden–Fletcher–Goldfarb–Shanno) method for optimization.
497+
##
498+
## Inputs:
499+
## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar.
500+
## - options: Options object (see `lbfgsOptions` for constructing one)
501+
## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead.
502+
##
503+
## Returns:
504+
## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
425505
var alpha = options.alpha
426506
var x = x0.clone()
427507
let xLen = x.shape[0]
428508
var fNorm = abs(f(x))
429509
var gradient = 0.01*analyticOrNumericGradient(analyticGradient, f, x0, options)
430510
var gradNorm = vectorNorm(gradient)
431511
var iters: int
432-
#let m = 10 # number of past iterations to save
512+
let m = options.algoOptions.savedIterations # number of past iterations to save
433513
var sk_queue = initDeque[Tensor[U]](m)
434514
var yk_queue = initDeque[Tensor[T]](m)
435515
# the problem is the first iteration as the gradient is huge and no adjustments are made
@@ -475,7 +555,20 @@ proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], m: int =
475555
#echo iters, " iterations done!"
476556
result = x
477557

478-
proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Tensor[U], xData: Tensor[U], yData: Tensor[T], options: OptimOptions[U, LevmarqOptions[U]] = levmarqOptions[U]()): Tensor[U] =
558+
proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Tensor[U], xData: Tensor[U], yData: Tensor[T], options: OptimOptions[U, LevmarqOptions[U]] = levmarqOptions[U](), yError: Tensor[T] = ones_like(yData)): Tensor[U] =
559+
## Levenberg-Marquardt for non-linear least square solving. Basically it fits parameters of a function to data samples.
560+
##
561+
## Input:
562+
## - f: The function you want to fit the data to. The first argument should be a 1D Tensor with the values of the parameters
563+
## and the second argument is the value if the independent variable to evaluate the function at.
564+
## - params0: The starting guess for the parameter values as a 1D Tensor.
565+
## - yData: The measured values of the dependent variable as 1D Tensor.
566+
## - xData: The values of the independent variable as 1D Tensor.
567+
## - options: Object with all the options like `tol` and `lambda0`. (see `levmarqOptions`)
568+
## - yError: The uncertainties of the `yData` as 1D Tensor. Ideally these should be the 1σ standard deviation.
569+
##
570+
## Returns:
571+
## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
479572
assert xData.rank == 1
480573
assert yData.rank == 1
481574
assert params0.rank == 1
@@ -486,8 +579,8 @@ proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Te
486579

487580
let residualFunc = # proc that returns the residual vector
488581
proc (params: Tensor[U]): Tensor[T] =
489-
result = map2_inline(xData, yData):
490-
f(params, x) - y
582+
result = map3_inline(xData, yData, yError):
583+
(f(params, x) - y) / z
491584

492585
let errorFunc = # proc that returns the scalar error
493586
proc (params: Tensor[U]): T =
@@ -524,6 +617,51 @@ proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Te
524617
result = params
525618

526619

620+
proc inv[T](t: Tensor[T]): Tensor[T] =
621+
result = solve(t, eye[T](t.shape[0]))
622+
623+
proc getDiag[T](t: Tensor[T]): Tensor[T] =
624+
let n = t.shape[0]
625+
result = newTensor[T](n)
626+
for i in 0 ..< n:
627+
result[i] = t[i,i]
628+
629+
proc paramUncertainties*[U; T](params: Tensor[U], fitFunc: proc(params: Tensor[U], x: U): T, yData: Tensor[T], xData: Tensor[U], yError: Tensor[T], returnFullCov = false): Tensor[T] =
630+
## Returns the whole covariance matrix or only the diagonal elements for the parameters in `params`.
631+
##
632+
## Inputs:
633+
## - params: The parameters in a 1D Tensor that the uncertainties are wanted for.
634+
## - fitFunc: The function used for fitting the parameters. (see `levmarq` for more)
635+
## - yData: The measured values of the dependent variable as 1D Tensor.
636+
## - xData: The values of the independent variable as 1D Tensor.
637+
## - yError: The uncertainties of the `yData` as 1D Tensor. Ideally these should be the 1σ standard deviation.
638+
## - returnFullConv: If true, the full covariance matrix will be returned as a 2D Tensor, else only the diagonal elements will be returned as a 1D Tensor.
639+
##
640+
## Returns:
641+
##
642+
## The uncertainties of the parameters in the form of a covariance matrix (or only the diagonal elements).
643+
##
644+
## Note: it is the covariance that is returned, so if you want the standard deviation you have to
645+
## take the square root of it.
646+
proc fError(params: Tensor[U]): T =
647+
let yCurve = xData.map_inline:
648+
fitFunc(params, x)
649+
result = chi2(yData, yCurve, yError)
650+
651+
let dof = xData.size - params.size
652+
let sigma2 = fError(params) / T(dof)
653+
let H = tensorHessian(fError, params)
654+
let cov = sigma2 * H.inv()
655+
656+
if returnFullCov:
657+
result = cov
658+
else:
659+
result = cov.getDiag()
660+
661+
662+
663+
664+
527665

528666
when isMainModule:
529667
import benchy

src/numericalnim/utils.nim

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,22 @@ proc hermiteInterpolate*[T](x: openArray[float], t: openArray[float],
303303
raise newException(ValueError, &"{a} not in interval {min(t)} - {max(t)}")
304304

305305

306+
307+
proc chi2*[T](yData, yFit, yError: seq[T] or Tensor[T]): T =
308+
when yData is Tensor:
309+
assert yData.rank == 1
310+
assert yFit.rank == 1
311+
assert yError.rank == 1
312+
assert yData.size == yFit.size
313+
assert yFit.size == yError.size
314+
let N = yData.size
315+
else:
316+
let N = yData.len
317+
result = T(0)
318+
for i in 0 ..< N:
319+
let temp = (yData[i] - yFit[i]) / yError[i]
320+
result += temp * temp
321+
306322
proc delete*[T](s: var seq[T], idx: seq[int]) =
307323
## Deletes the elements of seq s at indices idx.
308324
## idx must not contain duplicates!

tests/test_optimize.nim

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,4 +144,19 @@ suite "Multi-dim":
144144
for x in abs(paramsSol - correctParams):
145145
check x < 1.3e-3
146146

147+
test "levmarq with yError":
148+
let yError = ones_like(yData) * 1e-2
149+
let paramsSol = levmarq(fitFunc, params0, xData, yData, yError=yError)
150+
for x in abs(paramsSol - correctParams):
151+
check x < 1.3e-3
152+
153+
test "paramUncertainties":
154+
let yError = ones_like(yData) * 1e-2
155+
let paramsSol = levmarq(fitFunc, params0, xData, yData, yError=yError)
156+
157+
let uncertainties = paramUncertainties(paramsSol, fitFunc, yData, xData, yError).sqrt()
158+
159+
for (unc, err) in zip(uncertainties, abs(paramsSol - correctParams)):
160+
check abs(unc / err) in 0.79 .. 3.6
161+
147162

tests/test_utils.nim

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import unittest, math, sequtils, algorithm
1+
import unittest, math, sequtils, algorithm, random
22
import arraymancer
33
import ./numericalnim
44

@@ -91,3 +91,26 @@ test "meshgrid":
9191
let grid = meshgrid(x, y, z)
9292
check grid == [[0, 2, 4], [1, 2, 4], [0, 3, 4], [1, 3, 4], [0, 2, 5], [1, 2, 5], [0, 3, 5], [1, 3, 5]].toTensor
9393

94+
test "chi2 Tensor":
95+
randomize(1337)
96+
let N = 1000
97+
let sigma = 1.23
98+
let yMeasure = newSeqWith(N, gauss(0.0, sigma)).toTensor
99+
let yCorrect = zeros[float](N)
100+
let yError = ones[float](N) * sigma
101+
let chi = chi2(yMeasure, yCorrect, yError)
102+
# Check that the mean χ² is around 1
103+
check chi / N.float in 0.90 .. 1.1
104+
105+
test "chi2 Seq":
106+
randomize(1337)
107+
let N = 1000
108+
let sigma = 1.23
109+
let yMeasure = newSeqWith(N, gauss(0.0, sigma))
110+
let yCorrect = newSeqWith(N, 0.0)
111+
let yError = newSeqWith(N, sigma)
112+
let chi = chi2(yMeasure, yCorrect, yError)
113+
# Check that the mean χ² is around 1
114+
check chi / N.float in 0.90 .. 1.1
115+
116+

0 commit comments

Comments
 (0)