Skip to content

Commit 73c758c

Browse files
authored
Merge pull request #24 from padix-key/optimization
New parameters for the optimizer
2 parents 39cf0f6 + 009ddf1 commit 73c758c

File tree

10 files changed

+149
-119
lines changed

10 files changed

+149
-119
lines changed

LICENSE.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
BSD 3-Clause License
22
--------------------
33

4-
Copyright 2019 - 2021, Patrick Kunzmann, Benjamin Mayer
4+
Copyright 2019 - 2022, Patrick Kunzmann, Benjamin Mayer
55
All rights reserved.
66

77
Redistribution and use in source and binary forms, with or without modification,

doc/cli.rst

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,15 @@ In order to increase the quality of the scheme tha amount of optimization steps
7171
(``--nsteps``) or the number of runs (``--nruns``) can be increased.
7272
However, increasing these values also extends the runtime of the optimization.
7373
Note that ``--nruns`` can take advantage of multiple cores.
74+
The number of used cores is set with ``--nthreads``
7475

7576
The simulated annealing can be adjusted even more fine grained by setting
76-
the initial reverse temperature (``--beta``) and the rate of its exponential
77-
growth (``--rate``). The step size decreases in the course of the simulated
77+
the inverse temperature at the first (``--beta-start``) and last
78+
(``--beta-end``) step of the optimization.
79+
For the steps in between the inverse temperature is interpolated exponentially.
80+
The step size decreases in the course of the simulated
7881
annealing also in an exponential manner, which can be parameterized via
79-
``--step-size-start`` and ``--step-size-end``.
82+
``--stepsize-start`` and ``--stepsize-end``.
8083
The seed for the random number generator used by the algorithm is set with
8184
the ``--seed`` option.
8285
However, these parameters address the more advanced users.

doc/plotgen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def plot_generator(function):
6565
def plot_main_example_alignment():
6666
scheme_file = biotite.temp_file("json")
6767
gecli.main(args=[
68-
"--seed", "0",
68+
"--seed", "3",
6969
"--matrix", "BLOSUM62",
7070
"--lmin", "60",
7171
"--lmax", "75",

doc/theory.rst

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -173,12 +173,7 @@ an exponential schedule for the step size
173173
The step size is used for perturbing the current solution in each step of the
174174
SA algorithm to find a new candidate solution.
175175
So the idea for using the schedule here is to start with relatively large
176-
step size :math:`\delta_{start}` and to chose the rate according to an
177-
target step size :math:`\delta_{end}`.
178-
An according rate is easily derived by claiming
179-
:math:`\delta(N_{max})=\delta_{end}` which leads to
180-
181-
.. math:: \gamma = \frac{1}{N_{max}}\log \left( \frac{\delta_{end}}{\delta_{start}} \right).
176+
step size and reduce it over the course of the simulation.
182177

183178

184179
Monte-Carlo algorithm

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "gecos"
3-
version = "1.3.0"
3+
version = "2.0.0"
44
description = "Generated color schemes for sequence alignment visualizations"
55
readme = "README.rst"
66
license = "BSD-3-Clause"

src/gecos/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
33
# information.
44

5-
__version__ = "1.3.0"
5+
__version__ = "2.0.0"
66
__author__ = "Patrick Kunzmann"
77

88
from .colors import *

src/gecos/cli.py

Lines changed: 42 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from multiprocessing import Pool
22
from os.path import join, dirname, realpath, isfile
3+
import itertools
34
import argparse
45
import copy
56
import sys
@@ -37,22 +38,18 @@ class InputError(Exception):
3738
pass
3839

3940

40-
def _optimize(optline):
41+
def _optimize(seed, alphabet, score_func, space, constraints,
42+
nsteps, beta_start, beta_end, stepsize_start, stepsize_end):
4143
"""
42-
Worker function used for parallel execution of simulated annealing
43-
optimizer with optline being a tuple containing the needed data
44+
Worker function used for parallel execution of simulated annealing.
4445
"""
45-
(
46-
alphabet, score_func, space, constraints,
47-
nsteps, beta, rate, step_size_start, step_size_end,
48-
seed
49-
) = optline
50-
5146
np.random.seed(seed)
5247
optimizer = ColorOptimizer(
5348
alphabet, score_func, space, constraints
5449
)
55-
optimizer.optimize(nsteps, beta, rate, step_size_start, step_size_end)
50+
optimizer.optimize(
51+
nsteps, beta_start, beta_end, stepsize_start, stepsize_end
52+
)
5653
return optimizer.get_result()
5754

5855
@handle_error
@@ -201,29 +198,35 @@ def main(args=None, result_container=None, show_plots=True):
201198
)
202199
opt_group.add_argument(
203200
"--nruns", default=16, type=int,
204-
help="Number of parallel optimization algorithms runs that are to be "
205-
"executed.",
201+
help="Number of optimizations to run. "
202+
"From these runs, the color scheme with the best score is "
203+
"selected.",
204+
metavar="NUMBER"
205+
)
206+
opt_group.add_argument(
207+
"--nthreads", type=int,
208+
help="Number of optimization runs to run in parallel. "
209+
"By default this is equal to the number of CPUs.",
206210
metavar="NUMBER"
207211
)
208212
opt_group.add_argument(
209-
"--beta", default=1e-7, type=float,
210-
help="Inverse start temperature for simulated annealing algorithm.",
213+
"--beta-start", default=1, type=float,
214+
help="Inverse temperature at start of simulated annealing.",
211215
metavar="FLOAT",
212216
)
213217
opt_group.add_argument(
214-
"--rate", default=1, type=float,
215-
help="Rate controlling the exponential annealing schedule, "
216-
"for the annealing of the inverse temperature.",
218+
"--beta-end", default=500, type=float,
219+
help="Inverse temperature at end of simulated annealing.",
217220
metavar="FLOAT",
218221
)
219222
opt_group.add_argument(
220-
"--step-size-start", default=20, type=float,
221-
help="Start step size for simulated annealing algorithm.",
223+
"--stepsize-start", default=10, type=float,
224+
help="Step size temperature at start of simulated annealing.",
222225
metavar="FLOAT",
223226
)
224227
opt_group.add_argument(
225-
"--step-size-end", default=0.1, type=float,
226-
help="End step size for simulated annealing algorithm.",
228+
"--stepsize-end", default=0.2, type=float,
229+
help="Step size temperature at end of simulated annealing.",
227230
metavar="FLOAT",
228231
)
229232

@@ -318,30 +321,26 @@ def main(args=None, result_container=None, show_plots=True):
318321

319322
score_func = DefaultScoreFunction(matrix, args.contrast, args.delta)
320323

321-
# Simulated annealing
322-
n_parallel = args.nruns
324+
# Simulated annealing
323325
if args.seed is not None:
324326
np.random.seed(int(args.seed))
325-
# Different randomly seed for each run
326-
seeds = np.random.randint(0, 1000000, size=n_parallel)
327-
opt_data = [
328-
(
329-
matrix.get_alphabet1(),
330-
score_func,
331-
space,
332-
constraints,
333-
args.nsteps,
334-
args.beta,
335-
args.rate,
336-
args.step_size_start,
337-
args.step_size_end,
338-
seeds[i])
339-
for i in range(n_parallel)
340-
]
341-
342-
with Pool(n_parallel) as p:
343-
results = p.map(_optimize, opt_data)
344-
best_result = sorted(results, key=lambda x: x.score)[0]
327+
# Different random seed for each run
328+
seeds = np.random.randint(0, 1000000, size=args.nruns)
329+
330+
with Pool(args.nthreads) as p:
331+
results = p.starmap(_optimize, zip(
332+
seeds,
333+
itertools.repeat(matrix.get_alphabet1()),
334+
itertools.repeat(score_func),
335+
itertools.repeat(space),
336+
itertools.repeat(constraints),
337+
itertools.repeat(args.nsteps),
338+
itertools.repeat(args.beta_start),
339+
itertools.repeat(args.beta_end),
340+
itertools.repeat(args.stepsize_start),
341+
itertools.repeat(args.stepsize_end),
342+
))
343+
best_result = min(results, key=lambda x: x.score)
345344

346345
scores = np.array([result.scores for result in results])
347346
scores_mean = np.mean(scores, axis=0)

src/gecos/optimizer.py

Lines changed: 49 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -166,78 +166,62 @@ def _set_coordinates(self, coord, score=None):
166166
score = self._score_func(coord)
167167
self._scores.append(score)
168168

169-
def optimize(self, n_steps,
170-
beta_start, rate_beta, stepsize_start, stepsize_end):
169+
def optimize(self, n_steps=20000,
170+
beta_start=1, beta_end=500,
171+
stepsize_start=10, stepsize_end=0.2):
171172
r"""
172-
Perform a Simulated Annealing optimization on the current
173+
Perform a simulated annealing optimization on the current
173174
coordinate to minimize the score returned by the score function.
174175
175-
This is basically a Monte-Carlo optimization where the
176-
temperature is varied according to a so called annealing
177-
schedule over the course of the optimization.
176+
This is basically a Metropolis-Monte-Carlo optimization where
177+
the inverse temperature and step size is varied according to
178+
exponential annealing schedule over the course of the
179+
optimization.
180+
181+
Parameters
182+
----------
183+
n_steps : int
184+
The number of simulated annealing steps.
185+
beta_start, beta_end : float
186+
The inverse temperature in the first and last step of the
187+
optimization, respectively.
188+
Higher values allow less increase of score, i.e. result
189+
in a steering into the local minimum.
190+
Must be positive.
191+
stepsize_start, stepsize_end : float
192+
The step size in the first and last step of the
193+
optimization, respectively.
194+
it is the radius in which the coordinates are randomly
195+
altered at in each optimization step.
196+
Must be positive.
197+
198+
Notes
199+
-----
178200
The algorithm is a heuristic thats motivated by the physical
179201
process of annealing.
180202
If we, e.g., cool steel than a slow cooling can yield a superior
181203
quality, whereas for a fast cooling the steel can become
182204
brittle.
183205
The same happens here within the search space for the given
184-
minimization task.
185-
186-
Parameters
187-
----------
188-
n_steps : int
189-
The number of Simulated-Annealing steps.
190-
beta_start : float
191-
The inverse start temperature, where the start temperature
192-
would be :math:`T_{start} = 1/(k_b \cdot \beta_{start})` with
193-
:math:`k_b` being the boltzmann constant.
194-
rate_beta: float
195-
The rate controls how fast the inverse temperature is
196-
increased within the annealing schedule.
197-
Here the exponential schedule is chosen so we have
198-
:math:`\beta (t) = \beta_0 \cdot \exp(rate \cdot t)`.
199-
stepsize_start : float
200-
The radius in which the coordinates are randomly altered at
201-
the beginning of the simulated anneling algorithm.
202-
Like the inverse temperature the step size follows an
203-
exponential schedule, enabling the algorithm
204-
to do large perturbartions at the beginning of the algorithm
205-
run and increasingly smaller ones afterwards.
206-
stepsize_end : float
207-
The radius in which the coordinates are randomly altered at
208-
the end of the simulated annealing algorithm run.
206+
minimization task.
209207
"""
210-
# Calculate the max value 'i' can reach so that
211-
# 'np.exp(rate_beta*i)' does not overflow
212-
max_i = np.log(np.finfo(np.float64).max) / rate_beta
213-
beta = lambda i: beta_start*np.exp(rate_beta*i) \
214-
if i < max_i else np.inf
215-
216-
# Choose rate so that stepsize_end reached after n_steps
217-
# derived from step_size(N_steps) = steps_end
218-
if stepsize_start == stepsize_end:
219-
rate_stepsize = 0
220-
else:
221-
rate_stepsize = np.log(stepsize_end / stepsize_start) / n_steps
222-
step_size = lambda i: stepsize_start * np.exp(rate_stepsize * i)
208+
betas = _calculate_schedule(n_steps, beta_start, beta_end)
209+
stepsizes = _calculate_schedule(n_steps, stepsize_start, stepsize_end)
223210

224211
for i in range(n_steps):
225-
226212
score = self._scores[-1]
227213
new_coord = self._sample_coord(
228214
self._coord,
229-
lambda c: c + (random.rand(*c.shape)-0.5) * 2 * step_size(i)
215+
lambda c: c + (random.rand(*c.shape)-0.5) * 2 * stepsizes[i]
230216
)
231217
new_score = self._score_func(new_coord)
232218

233219
if new_score < score:
234220
self._set_coordinates(new_coord, new_score)
235221

236222
else:
237-
p_accept = np.exp( -beta(i) * (new_score-score))
238-
p = random.rand()
239-
240-
if p <= p_accept:
223+
p_accept = np.exp( -betas[i] * (new_score-score))
224+
if random.rand() <= p_accept:
241225
self._set_coordinates(new_coord, new_score)
242226
else:
243227
self._set_coordinates(self._coord, score)
@@ -292,6 +276,15 @@ def _apply_constraints(self, coord):
292276
coord[self._constraint_mask] = self._constraints[self._constraint_mask]
293277

294278

279+
def _calculate_schedule(n_steps, start, end):
280+
"""
281+
Calculate the values for each step in an exponential schedule.
282+
"""
283+
# Use float 64
284+
return start * (end/start)**np.linspace(0, 1, n_steps, dtype=np.float64)
285+
286+
287+
295288
class ScoreFunction(metaclass=abc.ABCMeta):
296289
"""
297290
Abstract base class for a score function.
@@ -396,19 +389,18 @@ def __call__(self, coord):
396389
@staticmethod
397390
def _calculate_distances(tri_indices, coord, distance_formula):
398391
ind1, ind2 = tri_indices
399-
if distance_formula == "CIEDE76":
400-
dist = skimage.color.deltaE_ciede94(
401-
coord[ind1], coord[ind2]
392+
if distance_formula == "CIE76":
393+
return np.sqrt(
394+
np.sum((coord[ind1, :] - coord[ind2, :])**2, axis=-1)
402395
)
403396
elif distance_formula == "CIEDE94":
404-
dist = skimage.color.deltaE_cie76(
397+
return skimage.color.deltaE_ciede94(
405398
coord[ind1], coord[ind2]
406399
)
407400
else: #"CIEDE2000"
408-
dist = skimage.color.deltaE_ciede2000(
401+
return skimage.color.deltaE_ciede2000(
409402
coord[ind1], coord[ind2]
410403
)
411-
return dist
412404

413405
@staticmethod
414406
def _calculate_ideal_distances(tri_indices, substitution_matrix):
@@ -419,4 +411,4 @@ def _calculate_ideal_distances(tri_indices, substitution_matrix):
419411
distances = dist_matrix[ind_i, ind_j]
420412
# Scale, so that average distance is 1
421413
distances /= np.average(distances)
422-
return distances
414+
return distances

0 commit comments

Comments
 (0)