/* The Analysis & Resynthesis Sound Spectrograph Copyright (C) 2005-2008 Michel Rouzic This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.*/
#include "dsp.h"
void fft(double *in, double *out, int32_t N, uint8_t method)
{
/* method :
* 0 = DFT
* 1 = IDFT
* 2 = DHT
*/
fftw_plan p = fftw_plan_r2r_1d(N, in, out, method, FFTW_ESTIMATE);
fftw_execute(p);
fftw_destroy_plan(p);
}
void normi(double **s, int32_t xs, int32_t ys, double ratio) // normalises a signal to the +/-ratio range
{
int32_t ix, iy, maxx, maxy;
double max;
max=0;
for (iy=0; iy<ys; iy++)
for (ix=0; ix<xs; ix++)
if (fabs(s[iy][ix])>max)
{
max=fabs(s[iy][ix]);
maxx=ix;
maxy=iy;
}
#ifdef DEBUG
printf("norm : %.3f (Y:%i X:%i)\n", max, maxy, maxx);
#endif
if (max!=0.0)
{
max /= ratio;
max = 1.0/max;
}
else
max = 0.0;
for (iy=0; iy<ys; iy++)
for (ix=0; ix<xs; ix++)
s[iy][ix]*=max;
#ifdef DEBUG
printf("ex : %.3f\n", s[0][0]);
#endif
}
double log_pos(double x, double min, double max) // turns a logarithmic position (i.e. band number/band count) to a frequency
{
if (LOGBASE==1.0)
return x*(max-min) + min;
else
return (max-min) * (min * pow(LOGBASE, x * (log(max)-log(min))/log(2.0)) - min) / (min * pow(LOGBASE, (log(max)-log(min))/log(2.0)) - min) + min;
}
double log_pos_inv(double x, double min, double max) // turns a frequency to a logarithmic position (i.e. band number/band count)
{
if (LOGBASE==1.0)
return (x - min)/(max-min);
else
return log(((x-min) * (min * pow(LOGBASE, (log(max) - log(min))/log(2.0)) - min) / (max-min) + min) / log(LOGBASE)) * log(2.0) / (log(max) - log(min));
}
double *freqarray(double basefreq, int32_t bands, double bandsperoctave)
{
int32_t i;
double *freq, maxfreq;
freq=malloc (sizeof(double) * bands);
if (LOGBASE==1.0)
maxfreq = bandsperoctave; // in linear mode we use bpo to store the maxfreq since we couldn't deduce maxfreq otherwise
else
maxfreq = basefreq * pow(LOGBASE, ((double) (bands-1)/ bandsperoctave));
for (i=0;i<bands;i++)
{
freq[i] = log_pos((double) i/(double) (bands-1), basefreq, maxfreq); //band's central freq
}
if (log_pos((double) bands / (double) (bands-1), basefreq, maxfreq)>0.5)
printf("Warning: Upper frequency limit above Nyquist frequency\n"); // TODO change sampling rate instead
return freq;
}
double *blackman_downsampling(double *in, int32_t Mi, int32_t Mo) // Downsampling of the signal by a Blackman function
{
int32_t i, j; // general purpose iterators
double *out;
double pos_in; // position in the original signal
double x; // position of the iterator in the blackman(x) formula
double ratio; // scaling ratio (> 1.0)
double ratio_i; // ratio^-1
double coef; // Blackman coefficient
double coef_sum; // sum of coefficients, used for weighting
/*
* Mi is the original signal's length
* Mo is the output signal's length
*/
ratio = (double) Mi/Mo;
ratio_i = 1.0/ratio;
out = calloc (Mo, sizeof(double));
for (i=0; i<Mo; i++)
{
pos_in = (double) i * ratio;
coef_sum = 0;
for (j=roundup(pos_in - ratio); j<=pos_in + ratio; j++)
{
if (j>=0 && j<Mi) // if the read sample is within bounds
{
x = j - pos_in + ratio; // calculate position within the Blackman function
coef = 0.42 - 0.5*cos(pi * x * ratio_i) + 0.08*cos(2*pi * x * ratio_i);
coef_sum += coef;
out[i] += in[j] * coef; // convolve
}
}
out[i] /= coef_sum;
}
return out;
}
double *bmsq_lut(int32_t size) // Blackman Square look-up table generator
{
int32_t i; // general purpose iterator
double coef; // Blackman square final coefficient
double bar = pi * (3.0 / (double) size) * (1.0/1.5);
double foo;
double f1 = -0.6595044010905501; // Blackman Square coefficients
double f2 = 0.1601741366715479;
double f4 = -0.0010709178680006;
double f5 = 0.0001450093579222;
double f7 = 0.0001008528049040;
double f8 = 0.0000653092892874;
double f10 = 0.0000293385615146;
double f11 = 0.0000205351559060;
double f13 = 0.0000108567682890;
double f14 = 0.0000081549460136;
double f16 = 0.0000048519309366;
double f17 = 0.0000038284344102;
double f19 = 0.0000024753630724;
size++; // allows to read value 3.0
double *lut = calloc (size, sizeof(double));
for (i=0; i<size; i++)
{
foo = (double) i * bar;
coef = 0;
coef += cos( foo) * f1 - f1;
coef += cos( 2.0 * foo) * f2 - f2;
coef += cos( 4.0 * foo) * f4 - f4;
coef += cos( 5.0 * foo) * f5 - f5;
coef += cos( 7.0 * foo) * f7 - f7;
coef += cos( 8.0 * foo) * f8 - f8;
coef += cos(10.0 * foo) * f10 - f10;
coef += cos(11.0 * foo) * f11 - f11;
coef += cos(13.0 * foo) * f13 - f13;
coef += cos(14.0 * foo) * f14 - f14;
coef += cos(16.0 * foo) * f16 - f16;
coef += cos(17.0 * foo) * f17 - f17;
coef += cos(19.0 * foo) * f19 - f19;
lut[i] = coef;
}
return lut;
}
void blackman_square_interpolation(double *in, double *out, int32_t Mi, int32_t Mo, double *lut, int32_t lut_size) // Interpolation based on an estimate of the Blackman Square function, which is a Blackman function convolved with a square. It's like smoothing the result of a nearest neighbour interpolation with a Blackman FIR
{
int32_t i, j; // general purpose iterators
double pos_in; // position in the original signal
double x; // position of the iterator in the blackman_square(x) formula
double ratio; // scaling ratio (> 1.0)
double ratio_i; // ratio^-1
double coef; // Blackman square final coefficient
double pos_lut; // Index on the look-up table
int32_t pos_luti; // Integer index on the look-up table
double mod_pos; // modulo of the position on the look-up table
double y0, y1; // values of the two closest values on the LUT
double foo = (double) lut_size / 3.0;
int32_t j_start, j_stop; // boundary values for the j loop
/*
* Mi is the original signal's length
* Mo is the output signal's length
*/
ratio = (double) Mi/Mo;
ratio_i = 1.0/ratio;
for (i=0; i<Mo; i++)
{
pos_in = (double) i * ratio;
j_stop = pos_in + 1.5;
j_start = j_stop - 2;
if (j_start<0)
j_start=0;
if (j_stop >= Mi) // The boundary check is done after j_start is calculated to avoid miscalculating it
j_stop = Mi - 1;
for (j=j_start; j<=j_stop; j++)
{
x = j - pos_in + 1.5; // calculate position within the Blackman square function in the [0.0 ; 3.0] range
pos_lut = x * foo;
pos_luti = (int32_t) pos_lut;
mod_pos = fmod(pos_lut, 1.0); // modulo of the index
y0 = lut[pos_luti]; // interpolate linearly between the two closest values
y1 = lut[pos_luti + 1];
coef = y0 + mod_pos * (y1 - y0); // linear interpolation
out[i] += in[j] * coef; // convolve
}
}
}
double **anal(double *s, int32_t samplecount, int32_t samplerate, int32_t *Xsize, int32_t bands, double bpo, double pixpersec, double basefreq)
{
int32_t i, ib, Mb, Mc, Md, Fa, Fd;
double **out, *h, *freq, *t, coef, La, Ld, Li, maxfreq;
/*
s is the original signal
samplecount is the original signal's orginal length
ib is the band iterator
i is a general purpose iterator
Mb is the length of the original signal once zero-padded (always even)
Mc is the length of the filtered signal
Md is the length of the envelopes once downsampled (constant)
Fa is the index of the band's start in the frequency domain
Fd is the index of the band's end in the frequency domain
La is the log2 of the frequency of Fa
Ld is the log2 of the frequency of Fd
Li is the iterative frequency between La and Ld defined logarithmically
coef is a temporary modulation coefficient
t is temporary pointer to a new version of the signal being worked on
bands is the total count of bands
freq is the band's central frequency
maxfreq is the central frequency of the last band
*/
freq = freqarray(basefreq, bands, bpo);
if (LOGBASE==1.0)
maxfreq = bpo; // in linear mode we use bpo to store the maxfreq since we couldn't deduce maxfreq otherwise
else
maxfreq = basefreq * pow(LOGBASE, ((double) (bands-1)/ bpo));
*Xsize = samplecount * pixpersec;
if (fmod((double) samplecount * pixpersec, 1.0) != 0.0) // round-up
(*Xsize)++;
printf("Image size : %dx%d\n", *Xsize, bands);
out = malloc (bands * sizeof(double *));
clocka=gettime();
//********ZEROPADDING******** Note : Don't do it in Circular mode
if (LOGBASE==1.0)
Mb = samplecount - 1 + (int32_t) roundoff(5.0/ freq[1]-freq[0]); // linear mode
else
Mb = samplecount - 1 + (int32_t) roundoff(2.0*5.0/((freq[0] * pow(LOGBASE, -1.0/(bpo))) * (1.0 - pow(LOGBASE, -1.0 / bpo))));
if (Mb % 2 == 1) // if Mb is odd
Mb++; // make it even (for the sake of simplicity)
Mb = roundoff((double) nextsprime((int32_t) roundoff(Mb * pixpersec)) / pixpersec);
Md = roundoff(Mb * pixpersec);
s = realloc (s, Mb * sizeof(double)); // realloc to the zeropadded size
memset(&s[samplecount], 0, (Mb-samplecount) * sizeof(double)); // zero-out the padded area. Equivalent of : for (i=samplecount; i<Mb; i++) s[i] = 0;
//--------ZEROPADDING--------
fft(s, s, Mb, 0); // In-place FFT of the original zero-padded signal
for (ib=0; ib<bands; ib++)
{
//********Filtering********
Fa = roundoff(log_pos((double) (ib-1)/(double) (bands-1), basefreq, maxfreq) * Mb);
Fd = roundoff(log_pos((double) (ib+1)/(double) (bands-1), basefreq, maxfreq) * Mb);
La = log_pos_inv((double) Fa / (double) Mb, basefreq, maxfreq);
Ld = log_pos_inv((double) Fd / (double) Mb, basefreq, maxfreq);
if (Fd > Mb/2)
Fd = Mb/2; // stop reading if reaching the Nyquist frequency
if (Fa<1)
Fa=1;
Mc = (Fd-Fa)*2 + 1; // '*2' because the filtering is on both real and imaginary parts, '+1' for the DC. No Nyquist component since the signal length is necessarily odd
if (Md>Mc) // if the band is going to be too narrow
Mc = Md;
if (Md<Mc) // round the larger bands up to the next integer made of 2^n * 3^m
Mc = nextsprime(Mc);
printf("%4d/%d (FFT size: %6d) %.2f Hz - %.2f Hz\r", ib+1, bands, Mc, (double) Fa*samplerate/Mb, (double) Fd*samplerate/Mb);
out[bands-ib-1] = calloc(Mc, sizeof(double)); // allocate new band
for (i=0; i<Fd-Fa; i++)
{
Li = log_pos_inv((double) (i+Fa) / (double) Mb, basefreq, maxfreq); // calculation of the logarithmic position
Li = (Li-La)/(Ld-La);
coef = 0.5 - 0.5*cos(2.0*pi*Li); // Hann function
out[bands-ib-1][i+1] = s[i+1+Fa] * coef;
out[bands-ib-1][Mc-1-i] = s[Mb-Fa-1-i] * coef;
}
//--------Filtering--------
//********90° rotation********
h = calloc(Mc, sizeof(double)); // allocate the 90° rotated version of the band
// Rotation : Re' = Im; Im' = -Re
for (i=0; i<Fd-Fa; i++)
{
h[i+1] = out[bands-ib-1][Mc-1-i]; // Re' = Im
h[Mc-1-i] = -out[bands-ib-1][i+1]; // Im' = -Re
}
//--------90° rotation--------
//********Envelope detection********
fft(out[bands-ib-1], out[bands-ib-1], Mc, 1); // In-place IFFT of the filtered band signal
fft(h, h, Mc, 1); // In-place IFFT of the filtered band signal rotated by 90°
for (i=0; i<Mc; i++)
out[bands-ib-1][i] = sqrt(out[bands-ib-1][i]*out[bands-ib-1][i] + h[i]*h[i]); // Magnitude of the analytic signal
free(h);
//--------Envelope detection--------
//********Downsampling********
if (Mc < Md) // if the band doesn't have to be resampled
out[bands-ib-1] = realloc(out[bands-ib-1], Md * sizeof(double)); // simply ignore the end of it
if (Mc > Md) // If the band *has* to be downsampled
{
t = out[bands-ib-1];
out[bands-ib-1] = blackman_downsampling(out[bands-ib-1], Mc, Md); // Blackman downsampling
free(t);
}
//--------Downsampling--------
out[bands-ib-1] = realloc(out[bands-ib-1], *Xsize * sizeof(double)); // Tail chopping
}
printf("\n");
normi(out, *Xsize, bands, 1.0);
return out;
}
double *wsinc_max(int32_t length, double bw)
{
int32_t i;
int32_t bwl; // integer transition bandwidth
double tbw; // double transition bandwidth
double *h; // kernel
double x; // position in the antiderivate of the Blackman function of the sample we're at
double coef; // coefficient obtained from the function
tbw = bw * (double) (length-1);
bwl = roundup(tbw);
h = calloc (length, sizeof(double));
for (i=1; i<length; i++)
h[i] = 1.0;
for (i=0; i<bwl; i++)
{
x = (double) i / tbw; // position calculation between 0.0 and 1.0
coef = 0.42*x - (0.5/(2.0*pi))*sin(2.0*pi*x) + (0.08/(4.0*pi))*sin(4.0*pi*x); // antiderivative of the Blackman function
coef *= 1.0/0.42;
h[i+1] = coef;
h[length-1-i] = coef;
}
return h;
}
double *synt_sine(double **d, int32_t Xsize, int32_t bands, int32_t *samplecount, int32_t samplerate, double basefreq, double pixpersec, double bpo)
{
double *s, *freq, *filter, *sband, sine[4], rphase;
int32_t i, ib;
int32_t Fc, Bc, Mh, Mn, sbsize;
/*
d is the original image (spectrogram)
s is the output sound
sband is the band's envelope upsampled and shifted up in frequency
sbsize is the length of sband
sine is the random sine look-up table
*samplecount is the output sound's length
ib is the band iterator
i is a general purpose iterator
bands is the total count of bands
Fc is the index of the band's centre in the frequency domain on the new signal
Bc is the index of the band's centre in the frequency domain on sband (its imaginary match being sbsize-Bc)
Mh is the length of the real or imaginary part of the envelope's FFT, DC element included and Nyquist element excluded
Mn is the length of the real or imaginary part of the sound's FFT, DC element included and Nyquist element excluded
freq is the band's central frequency
rphase is the band's sine's random phase
*/
freq = freqarray(basefreq, bands, bpo);
clocka=gettime();
sbsize = nextsprime(Xsize * 2); // In Circular mode keep it to sbsize = Xsize * 2;
*samplecount = roundoff(Xsize/pixpersec);
printf("Sound duration : %.3f s\n", (double) *samplecount/samplerate);
*samplecount = roundoff(0.5*sbsize/pixpersec); // Do not change this value as it would stretch envelopes
s = calloc(*samplecount, sizeof(double)); // allocation of the sound signal
sband = malloc (sbsize * sizeof(double)); // allocation of the shifted band
Bc = roundoff(0.25 * (double) sbsize);
Mh = (sbsize + 1) >> 1;
Mn = (*samplecount + 1) >> 1;
filter = wsinc_max(Mh, 1.0/TRANSITION_BW_SYNT); // generation of the frequency-domain filter
for (ib=0; ib<bands; ib++)
{
memset(sband, 0, sbsize * sizeof(double)); // reset sband
//********Frequency shifting********
rphase = dblrand() * pi; // random phase between -pi and +pi
for (i=0; i<4; i++) // generating the random sine LUT
sine[i]=cos(i*2.0*pi*0.25 + rphase);
for (i=0; i<Xsize; i++) // envelope sampling rate * 2 and frequency shifting by 0.25
{
if ((i & 1) == 0)
{
sband[i<<1] = d[bands-ib-1][i] * sine[0];
sband[(i<<1) + 1] = d[bands-ib-1][i] * sine[1];
}
else
{
sband[i<<1] = d[bands-ib-1][i] * sine[2];
sband[(i<<1) + 1] = d[bands-ib-1][i] * sine[3];
}
}
//--------Frequency shifting--------
fft(sband, sband, sbsize, 0); // FFT of the envelope
Fc = roundoff(freq[ib] * *samplecount); // band's centre index (envelope's DC element)
printf("%4d/%d %.2f Hz\r", ib+1, bands, (double) Fc*samplerate / *samplecount);
//********Write FFT********
for (i=1; i<Mh; i++)
{
if (Fc-Bc+i > 0 && Fc-Bc+i < Mn) // if we're between frequencies 0 and 0.5 of the new signal and that we're not at Fc
{
s[i+Fc-Bc] += sband[i] * filter[i]; // Real part
s[*samplecount-(i+Fc-Bc)] += sband[sbsize-i] * filter[i]; // Imaginary part
}
}
//--------Write FFT--------
}
printf("\n");
fft(s, s, *samplecount, 1); // IFFT of the final sound
*samplecount = roundoff(Xsize/pixpersec); // chopping tails by ignoring them
normi(&s, *samplecount, 1, 1.0);
return s;
}
double *synt_noise(double **d, int32_t Xsize, int32_t bands, int32_t *samplecount, int32_t samplerate, double basefreq, double pixpersec, double bpo)
{
int32_t i; // general purpose iterator
int32_t ib; // bands iterator
int32_t il; // loop iterator
double *s; // final signal
double coef;
double *noise; // filtered looped noise
double loop_size_sec=LOOP_SIZE_SEC; // size of the filter bank loop, in seconds. Later to be taken from user input
int32_t loop_size; // size of the filter bank loop, in samples. Deduced from loop_size_sec
int32_t loop_size_min; // minimum required size for the filter bank loop, in samples. Calculated from the longest windowed sinc's length
double *pink_noise; // original pink noise (in the frequency domain)
double mag, phase; // parameters for the creation of pink_noise's samples
double *envelope; // interpolated envelope
double *lut; // Blackman Sqaure look-up table
double *freq; // frequency look-up table
double maxfreq; // central frequency of the last band
int32_t Fa; // Fa is the index of the band's start in the frequency domain
int32_t Fd; // Fd is the index of the band's end in the frequency domain
double La; // La is the log2 of the frequency of Fa
double Ld; // Ld is the log2 of the frequency of Fd
double Li; // Li is the iterative frequency between La and Ld defined logarithmically
freq = freqarray(basefreq, bands, bpo);
if (LOGBASE==1.0)
maxfreq = bpo; // in linear mode we use bpo to store the maxfreq since we couldn't deduce maxfreq otherwise
else
maxfreq = basefreq * pow(LOGBASE, ((double) (bands-1)/ bpo));
clocka=gettime();
*samplecount = roundoff(Xsize/pixpersec); // calculation of the length of the final signal
printf("Sound duration : %.3f s\n", (double) *samplecount/samplerate);
s = calloc (*samplecount, sizeof(double)); // allocation of the final signal
envelope = calloc (*samplecount, sizeof(double)); // allocation of the interpolated envelope
//********Loop size calculation********
loop_size = loop_size_sec * samplerate;
if (LOGBASE==1.0)
loop_size_min = (int32_t) roundoff(4.0*5.0/ freq[1]-freq[0]); // linear mode
else
loop_size_min = (int32_t) roundoff(2.0*5.0/((freq[0] * pow(2.0, -1.0/(bpo))) * (1.0 - pow(2.0, -1.0 / bpo)))); // this is the estimate of how many samples the longest FIR will take up in the time domain
if (loop_size_min > loop_size)
loop_size = loop_size_min;
loop_size = nextsprime(loop_size); // enlarge the loop_size to the next multiple of short primes in order to make IFFTs faster
//--------Loop size calculation--------
//********Pink noise generation********
pink_noise = calloc (loop_size, sizeof(double));
for (i=1; i<(loop_size+1)>>1; i++)
{
mag = pow((double) i, 0.5 - 0.5*LOGBASE); // FIXME something's not necessarily right with that formula
phase = dblrand() * pi; // random phase between -pi and +pi
pink_noise[i]= mag * cos(phase); // real part
pink_noise[loop_size-i]= mag * sin(phase); // imaginary part
}
//--------Pink noise generation--------
noise = malloc(loop_size * sizeof(double)); // allocate noise
lut = bmsq_lut(BMSQ_LUT_SIZE); // Blackman Square look-up table initalisation
for (ib=0; ib<bands; ib++)
{
printf("%4d/%d\r", ib+1, bands);
memset(noise, 0, loop_size * sizeof(double)); // reset filtered noise
//********Filtering********
Fa = roundoff(log_pos((double) (ib-1)/(double) (bands-1), basefreq, maxfreq) * loop_size);
Fd = roundoff(log_pos((double) (ib+1)/(double) (bands-1), basefreq, maxfreq) * loop_size);
La = log_pos_inv((double) Fa / (double) loop_size, basefreq, maxfreq);
Ld = log_pos_inv((double) Fd / (double) loop_size, basefreq, maxfreq);
if (Fd > loop_size/2)
Fd = loop_size/2; // stop reading if reaching the Nyquist frequency
if (Fa<1)
Fa=1;
printf("%4d/%d %.2f Hz - %.2f Hz\r", ib+1, bands, (double) Fa*samplerate/loop_size, (double) Fd*samplerate/loop_size);
for (i=Fa; i<Fd; i++)
{
Li = log_pos_inv((double) i / (double) loop_size, basefreq, maxfreq); // calculation of the logarithmic position
Li = (Li-La)/(Ld-La);
coef = 0.5 - 0.5*cos(2.0*pi*Li); // Hann function
noise[i+1] = pink_noise[i+1] * coef;
noise[loop_size-1-i] = pink_noise[loop_size-1-i] * coef;
}
//--------Filtering--------
fft(noise, noise, loop_size, 1); // IFFT of the filtered noise
memset(envelope, 0, *samplecount * sizeof(double)); // blank the envelope
blackman_square_interpolation(d[bands-ib-1], envelope, Xsize, *samplecount, lut, BMSQ_LUT_SIZE); // interpolation of the envelope
il = 0;
for (i=0; i<*samplecount; i++)
{
s[i] += envelope[i] * noise[il]; // modulation
il++; // increment loop iterator
if (il==loop_size) // if the array iterator has reached the end of the array, it's reset
il=0;
}
}
printf("\n");
normi(&s, *samplecount, 1, 1.0);
return s;
}
void brightness_control(double **image, int32_t width, int32_t height, double ratio) // Almost like a gamma correction, but uses a different formula
{
// Actually this is based on the idea of converting values to decibels, for example, 0.01 becomes -40 dB, dividing them by ratio, so if ratio is 2 then -40 dB/2 = -20 dB, and then turning it back to regular values, so -20 dB becomes 0.1
// If ratio==2 then this function is equivalent to a square root
// 1/ratio is used for the forward transformation
// ratio is used for the reverse transformation
int32_t ix, iy;
for (iy=0; iy<width; iy++)
for (ix=0; ix<height; ix++)
image[iy][ix] = pow(image[iy][ix], ratio);
}