1. Description
How to distribute $n$ vowels in an acoustic space so that the average vowel-to-vowel distance is maximized? In a classical paper, Liljencrants and Lindblom proposed numerically minimizing the energy function
$$E = \sum_{i=1}^{n-1} \sum_{j=i+1}^{n} \frac{1}{(x_i - x_j)^2 + (y_i - y_j)^2}$$
where the $x_i$ stand for the first and the $y_i$ for the second formants (expressed in mel space, with second formants F3-corrected). In the paper, $E$ was minimized in a semi-guided manner by first distributing the $n$ vowels on a circle in the centre of the vowel space and then moving the vowels towards the periphery (defined by a model of the articulators) as long as $E$ was diminishing.
A conceptually simpler way is to throw $n$ vowels into the space at random, and then minimize $E$ by making random improvements to the vowels (stochastic hill climbing). Here I give C++ code for a program that does this, in the hopes that this may be useful in teaching (to illustrate the theory of adaptive dispersion, and also to introduce optimization by way of hill climbing), or even in research. The logic of the program is as follows:
- Distribute $n$ vowels in the two-dimensional space at random.
- For a desired number of iterations:
- Pick a random vowel
- Move this vowel in a random direction
- If vowel ended up outside the articulatorily possible space, try another random direction and move by just half the amount.
- If energy $E$ diminished, leave vowel at this new position; otherwise, move vowel back to its original position
2. Illustration
Here’s an illustration of the operation of the algorithm, with $n=9$ vowels optimized over five thousand iterations. Thanks to advances in modern computing, it took my modest laptop (2.4 GHz, 4GB RAM) just three hundreths of a second to run the optimization (cp. Liljencrants and Lindblom’s original statement: “the computer was left overnight to compute the solutions for $n=3$ to $n=12$” ; p. 842). So this is pretty fast. (A more interesting problem is that the algorithm may get stuck at local optima.)
3. Source code
(download)
/* ad.cpp / Henri Kauhanen 2018
*
* Adaptive dispersion via stochastic hill climbing.
*
* To compile:
* g++ -std=c++11 ad.cpp -o ad
*
* Usage:
* ./ad -n <vowels> -i <iterations> -o <outfile> [-l <logfile>] [-m <movement>]
*
* Original idea:
* Liljencrants, J. & Lindblom, B. (1972) Numerical simulation of vowel
* quality systems: the role of perceptual contrast. Language, 48(4):839-862.
*
*/
#include <iostream>
#include <fstream>
#include <unistd.h>
#include <cmath>
#include <random>
#include <string>
// contour formula
double contour(double M1value, double M1lim[2], double M20) {
return(1150 + (M20 - 1150)*sqrt((M1lim[1] - M1value)/(M1lim[1] - M1lim[0])));
}
// is point x inside the vowel contours?
bool inside(double m1, double m2, double M1lim[2], double M2lim[2]) {
bool ins = true;
if (m1 < M1lim[0] || m1 > M1lim[1]) {
ins = false;
} else if (m2 > contour(m1, M1lim, M2lim[1])) {
ins = false;
} else if (m2 < contour(m1, M1lim, M2lim[0])) {
ins = false;
}
return(ins);
}
// calculate total system energy
double energy(double M1[], double M2[], int n) {
double E = 0;
for (int i=0; i<n-1; i++) {
for (int j=i+1; j<n; j++) {
E = E + 1/(pow(M1[i] - M1[j], 2) + pow(M2[i] - M2[j], 2));
}
}
return(E);
}
// print usage instructions
void usage(char *argv[]) {
fprintf(stderr, "Usage: %s -n <vowels> -i <iterations> -o <outfile> [-l <logfile>] [-m <movement>]\n", argv[0]);
}
// main
int main(int argc, char *argv[]) {
// global variables
int iterations = -1; // number of iterations
int n = -1; // number of vowels
double curr_energy; // current system energy
double log_energy; // current system energy; for logging
int rv; // random vowel
int opt; // getopt
double movement = 10; // movement multiplier
double movehere; // local movement
// vowel trapezium limits, from Liljencrants & Lindblom
double M1lim[2] = {350, 850};
double M2lim[2] = {800, 1700};
// file names and file streams
std::string outfilename = "";
std::string logfilename = "";
std::ofstream outfile;
std::ofstream logfile;
// read command-line arguments
while ((opt = getopt(argc, argv, "i:n:o:l:m:")) != -1) {
switch (opt) {
case 'i':
iterations = atoi(optarg);
break;
case 'n':
n = atoi(optarg);
break;
case 'o':
outfilename = optarg;
break;
case 'l':
logfilename = optarg;
break;
case 'm':
movement = atof(optarg);
break;
default:
usage(argv);
exit(EXIT_FAILURE);
}
}
// check mandatory command-line arguments
if (iterations == -1) {
usage(argv);
exit(EXIT_FAILURE);
}
if (n == -1) {
usage(argv);
exit(EXIT_FAILURE);
}
if (outfilename == "") {
usage(argv);
exit(EXIT_FAILURE);
}
// RNG stuff
std::random_device rd; // seed
std::mt19937 gen(rd()); // Mersenne Twister generator
std::uniform_real_distribution<double> disM1(M1lim[0], M1lim[1]);
std::uniform_real_distribution<double> disM2(M2lim[0], M2lim[1]);
std::uniform_int_distribution<int> disDisc(0,n-1);
std::normal_distribution<double> disNorm(0,1);
// declare dynamic M1 and M2 arrays
double* M1 = new double[n];
double* M2 = new double[n];
// place n vowels randomly inside the contours
for (int i=0; i<n; i++) {
M1[i] = M1lim[0] - 1;
M2[i] = M2lim[0] - 1;
while (!inside(M1[i], M2[i], M1lim, M2lim)) {
M1[i] = disM1(gen);
M2[i] = disM2(gen);
}
}
// main loop
double newM1;
double newM2;
double oldM1;
double oldM2;
double oldmovehere;
if (logfilename != "") {
logfile.open(logfilename);
logfile << "iteration,M1,M2prime,energy\n";
log_energy = energy(M1, M2, n);
for (int i=0; i<n; i++) {
logfile << 0 << "," << M1[i] << "," << M2[i] << "," << log_energy << "\n";
}
}
for (int t=0; t<iterations; t++) {
// find current system energy
curr_energy = energy(M1, M2, n);
// pick random vowel
rv = disDisc(gen);
// move vowel
movehere = movement;
newM1 = M1[rv] + disNorm(gen)*movehere;
newM2 = M2[rv] + disNorm(gen)*movehere;
// still inside contours?
oldmovehere = movehere;
while (!inside(newM1, newM2, M1lim, M2lim)) {
movehere = 0.5*movehere;
newM1 = M1[rv] + disNorm(gen)*movehere;
newM2 = M2[rv] + disNorm(gen)*movehere;
}
movehere = oldmovehere;
// if energy decreased, update
oldM1 = M1[rv];
oldM2 = M2[rv];
M1[rv] = newM1;
M2[rv] = newM2;
if (energy(M1, M2, n) >= curr_energy) {
M1[rv] = oldM1;
M2[rv] = oldM2;
}
// log
if (logfilename != "") {
log_energy = energy(M1, M2, n);
for (int i=0; i<n; i++) {
logfile << t+1 << "," << M1[i] << "," << M2[i] << "," << log_energy << "\n";
}
}
}
// writeout
outfile.open(outfilename);
outfile << "M1,M2prime\n";
for (int i=0; i<n; i++) {
outfile << M1[i] << "," << M2[i] << "\n";
}
// exit
delete[] M1;
delete[] M2;
outfile.close();
if (logfilename != "") {
logfile.close();
}
return 0;
}
4. Compilation and usage
ISO C++ 2011 support is required. To compile with GCC, try:
$ g++ -std=c++11 ad.cpp -o ad
To call the program, use:
$ ./ad -n <vowels> -i <iterations> -o <outfile>
where <vowels>
is the number of vowels to distribute in the space, <iterations>
is the number of hill climbing iterations, and <outfile>
is the name of the output file (a CSV file of vowel formants).
An optional -l
(log) argument is available for tracing the evolution of the hill climbing algorithm iteration by iteration:
$ ./ad -n <vowels> -i <iterations> -o <outfile> -l <logfile>
Finally, the optional -m
argument may be used to control the hill climbing step size:
$ ./ad -n <vowels> -i <iterations> -o <outfile> -l <logfile> -m <M>
A vowel $(x,y)$ is moved to $(x + shM, y + shM)$, where $s$ is sampled from the normal distribution with mean $0$ and standard deviation $1$, and $h$ (initially $h=1$) is halved every time the vowel ends up outside the articulatory bounds. By default, $M$ has the value $M = 10$.
5. R source for illustration
The following code was used to prepare the above animation (avconv required).
(download)
make_animation <- function(logfile,
tmpfolder,
outfile,
fps = 50,
width = 1600,
height = 800,
pointsize = 28) {
grid <- read.csv(logfile)
png(file=paste0(tmpfolder, "/tmp%05d.png"), width=width, height=height, pointsize=pointsize)
par(mfcol=c(1,2))
# plot initial state for a while
for (i in 1:(2*fps)) {
plot_LL_space(grid[grid$iteration==0, ])
plot_energy_until(grid, until_iteration=0)
}
# now set animation in motion
for (i in unique(grid$iteration)) {
plot_LL_space(grid[grid$iteration==i, ])
plot_energy_until(grid, until_iteration=i)
}
# plot final state for a while
for (i in 1:(2*fps)) {
plot_LL_space(grid[grid$iteration==max(grid$iteration), ])
plot_energy_until(grid, until_iteration=max(grid$iteration))
}
dev.off()
system(paste0("avconv -r ", fps, " -i ", tmpfolder, "/tmp%05d.png -qscale 1 ", outfile))
}
plot_LL_space <- function(grid,
pch = 21,
bg = "",
cex = 1.2,
contours = TRUE,
lwd = 1.5,
lwd.cont = 1.5,
M1lim = c(350, 850),
M2lim = c(800, 1700)) {
# prepare plot
plot(M1~M2prime,
grid,
xlim=rev(M2lim),
ylim=rev(M1lim),
xlab="M2'",
ylab="M1",
type="n",
main="Vowel space (mel)")
if (bg == "") {
bg <- rainbow(nrow(grid))
}
# plot contours
if (contours) {
segments(y0=M1lim[1], y1=M1lim[1], x0=M2lim[2], x1=M2lim[1], lwd=lwd.cont)
x <- seq(from=M2lim[2], to=1150)
lines(x, M1lim[2] - (M1lim[2]-M1lim[1])*((x - 1150)/(M2lim[2] - 1150))^2, lwd=lwd.cont)
x <- seq(from=1150, to=M2lim[1])
lines(x, M1lim[2] - (M1lim[2]-M1lim[1])*((x - 1150)/(M2lim[1] - 1150))^2, lwd=lwd.cont)
}
# plot vowels
points(M1~M2prime, grid, pch=pch, cex=cex, lwd=lwd, bg=bg)
}
plot_energy_until <- function(grid,
until_iteration,
lwd = 1.2) {
maxiter <- max(grid$iteration)
maxenergy <- max(grid$energy)
minenergy <- min(grid$energy)
grid <- grid[, c("iteration", "energy")]
grid <- grid[grid$iteration <= until_iteration, ]
grid <- grid[!duplicated(grid), ]
if (until_iteration == 0) {
plot(energy~iteration, grid, type="n", log="xy", xlim=c(1, maxiter), ylim=c(minenergy, maxenergy), lwd=lwd, main="Energy evolution")
} else {
plot(energy~iteration, grid, type="l", log="xy", xlim=c(1, maxiter), ylim=c(minenergy, maxenergy), lwd=lwd, main="Energy evolution")
points(energy~iteration, grid[nrow(grid),], pch=20)
}
text(x=0.8, y=minenergy, pos=4, labels=paste0("iteration: ", until_iteration), cex=0.9)
}