00001 #include "spectrogram.hpp"
00002
00003 #include <cmath>
00004 #include <cstring>
00005 #include <cassert>
00006 #include <QVector>
00007 #include <QTextStream>
00008 #include <QRgb>
00009
00010 #include <vector>
00011 #include <algorithm>
00012 #include <limits>
00013 #include <iostream>
00014 #include "samplerate.h"
00015
00016 namespace
00017 {
00018 float log10scale(float val)
00019 {
00020 assert(val >= 0 && val <= 1);
00021 return std::log10(1+9*val);
00022 }
00023
00024 float log10scale_inv(float val)
00025 {
00026 assert(val >= 0 && val <= 1);
00027 return (std::pow(10, val)-1)/9;
00028 }
00029
00030
00031 double cent2freq(double cents)
00032 {
00033 return std::pow(2, cents/1200);
00034 }
00035 double freq2cent(double freq)
00036 {
00037 return std::log(freq)/std::log(2)*1200;
00038 }
00039 double cent2oct(double cents)
00040 {
00041 return cents/1200;
00042 }
00043 double oct2cent(double oct)
00044 {
00045 return oct*1200;
00046 }
00047
00048 void shift90deg(Complex& x)
00049 {
00050 x = std::conj(Complex(x.imag(), x.real()));
00051 }
00052
00054 real_vec resample(const real_vec& in, size_t len)
00055 {
00056 assert(len > 0);
00057
00058 if (in.size() == len)
00059 return in;
00060
00061 const double ratio = (double)len/in.size();
00062 if (ratio >= 256)
00063 return resample(resample(in, in.size()*50), len);
00064 else if (ratio <= 1.0/256)
00065 return resample(resample(in, in.size()/50), len);
00066
00067 real_vec out(len);
00068
00069 SRC_DATA parms = {const_cast<float*>(&in[0]),
00070 &out[0], in.size(), out.size(), 0,0,0, ratio};
00071 src_simple(&parms, SRC_SINC_FASTEST, 1);
00072
00073 return out;
00074 }
00075
00077 real_vec get_envelope(complex_vec& band)
00078 {
00079 assert(band.size() > 1);
00080
00081
00082 complex_vec shifted(band);
00083 std::for_each(shifted.begin(), shifted.end(), shift90deg);
00084
00085 real_vec envelope = padded_IFFT(band);
00086 real_vec shifted_signal = padded_IFFT(shifted);
00087
00088 for (size_t i = 0; i < envelope.size(); ++i)
00089 envelope[i] = std::sqrt(envelope[i]*envelope[i] +
00090 shifted_signal[i]*shifted_signal[i]);
00091
00092 return envelope;
00093 }
00094
00095 double blackman_window(double x)
00096 {
00097 assert(x >= 0 && x <= 1);
00098 return std::max(0.42 - 0.5*cos(2*PI*x) + 0.08*cos(4*PI*x), 0.0);
00099 }
00100
00101 double hann_window(double x)
00102 {
00103 assert(x >= 0 && x <= 1);
00104 return 0.5*(1-std::cos(x*2*PI));
00105 }
00106
00107 double triangular_window(double x)
00108 {
00109 assert(x >= 0 && x <= 1);
00110 return 1-std::abs(2*(x-0.5));
00111 }
00112
00113 double window_coef(double x, Window window)
00114 {
00115 assert(x >= 0 && x <= 1);
00116 if (window == WINDOW_RECTANGULAR)
00117 return 1.0;
00118 switch (window)
00119 {
00120 case WINDOW_HANN:
00121 return hann_window(x);
00122 case WINDOW_BLACKMAN:
00123 return blackman_window(x);
00124 case WINDOW_TRIANGULAR:
00125 return triangular_window(x);
00126 default:
00127 assert(false);
00128 }
00129 }
00130
00131 float calc_intensity(float val, AxisScale intensity_axis)
00132 {
00133 assert(val >= 0 && val <= 1);
00134 switch (intensity_axis)
00135 {
00136 case SCALE_LOGARITHMIC:
00137 return log10scale(val);
00138 case SCALE_LINEAR:
00139 return val;
00140 default:
00141 assert(false);
00142 }
00143 }
00144
00145 float calc_intensity_inv(float val, AxisScale intensity_axis)
00146 {
00147 assert(val >= 0 && val <= 1);
00148 switch (intensity_axis)
00149 {
00150 case SCALE_LOGARITHMIC:
00151 return log10scale_inv(val);
00152 case SCALE_LINEAR:
00153 return val;
00154 default:
00155 assert(false);
00156 }
00157 }
00158
00159
00160 void normalize_image(std::vector<real_vec>& data)
00161 {
00162 float max = 0.0f;
00163 for (std::vector<real_vec>::iterator it=data.begin();
00164 it!=data.end(); ++it)
00165 max = std::max(*std::max_element(it->begin(), it->end()), max);
00166 if (max == 0.0f)
00167 return;
00168 for (std::vector<real_vec>::iterator it=data.begin();
00169 it!=data.end(); ++it)
00170 for (real_vec::iterator i = it->begin(); i != it->end(); ++i)
00171 *i = std::abs(*i)/max;
00172 }
00173
00174
00175 void normalize_signal(real_vec& vector)
00176 {
00177 float max = 0;
00178 for (real_vec::iterator it = vector.begin(); it != vector.end(); ++it)
00179 max = std::max(max, std::abs(*it));
00180
00181 assert(max > 0);
00182 for (real_vec::iterator it = vector.begin(); it != vector.end(); ++it)
00183 *it /= max;
00184 }
00185
00186
00187 double random_double()
00188 {
00189 return ((double)rand()/(double)RAND_MAX);
00190 }
00191
00192 float brightness_correction(float intensity, BrightCorrection correction)
00193 {
00194 switch (correction)
00195 {
00196 case BRIGHT_NONE:
00197 return intensity;
00198 case BRIGHT_SQRT:
00199 return std::sqrt(intensity);
00200 }
00201 assert(false);
00202 }
00203
00205
00206 complex_vec get_pink_noise(size_t size)
00207 {
00208 complex_vec res;
00209 for (size_t i = 0; i < (size+1)/2; ++i)
00210 {
00211 const float mag = std::pow((float) i, -0.5f);
00212 const double phase = (2*random_double()-1) * PI;
00213 res.push_back(Complex(mag*std::cos(phase), mag*std::sin(phase)));
00214 }
00215 return res;
00216 }
00217 }
00218
00219 Spectrogram::Spectrogram(QObject* parent)
00220 : QObject(parent)
00221 , bandwidth(100)
00222 , basefreq(55)
00223 , maxfreq(22050)
00224 , overlap(0.8)
00225 , pixpersec(100)
00226 , window(WINDOW_HANN)
00227 , intensity_axis(SCALE_LOGARITHMIC)
00228 , frequency_axis(SCALE_LOGARITHMIC)
00229 , cancelled_(false)
00230 {
00231 }
00232
00233 QImage Spectrogram::to_image(real_vec& signal, int samplerate) const
00234 {
00235 emit status("Transforming input");
00236 emit progress(0);
00237 const complex_vec spectrum = padded_FFT(signal);
00238
00239 const size_t width = (spectrum.size()-1)*2*pixpersec/samplerate;
00240
00241
00242 const double filterscale = ((double)spectrum.size()*2)/samplerate;
00243
00244
00245 std::auto_ptr<Filterbank> filterbank = Filterbank::get_filterbank(
00246 frequency_axis, filterscale, basefreq, bandwidth, overlap);
00247 const int bands = filterbank->num_bands_est(maxfreq);
00248 const int top_index = maxfreq*filterscale;
00249
00250 assert(top_index <= (int)spectrum.size());
00251
00252 std::vector<real_vec> image_data;
00253 for (size_t bandidx = 0;; ++bandidx)
00254 {
00255 if (cancelled())
00256 return QImage();
00257 band_progress(bandidx, bands, 5, 93);
00258
00259
00260 intpair range = filterbank->get_band(bandidx);
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271 complex_vec filterband(range.second - range.first);
00272 std::copy(spectrum.begin()+range.first,
00273 spectrum.begin()+std::min(range.second, top_index),
00274 filterband.begin());
00275
00276 if (range.first > top_index)
00277 break;
00278 if (range.second > top_index)
00279 std::fill(filterband.begin()+top_index-range.first,
00280 filterband.end(), Complex(0,0));
00281
00282
00283 apply_window(filterband, range.first, filterscale);
00284
00285
00286 const real_vec envelope = resample(get_envelope(filterband), width);
00287 image_data.push_back(envelope);
00288 }
00289
00290 normalize_image(image_data);
00291
00292 emit progress(99);
00293 return make_image(image_data);
00294 }
00295
00297 QImage Spectrogram::make_image(const std::vector<real_vec>& data) const
00298 {
00299 emit status("Generating image");
00300 const size_t height = data.size();
00301 const size_t width = data[0].size();
00302 std::cout << "image size: " << width <<" x "<<height<<"\n";
00303 QImage out = palette.make_canvas(width, height);
00304 for (size_t y = 0; y < height; ++y)
00305 {
00306 assert(data[y].size() == width);
00307 for (size_t x = 0; x < width; ++x)
00308 {
00309 float intensity = calc_intensity(data[y][x], intensity_axis);
00310 intensity = brightness_correction(intensity, correction);
00311 out.setPixel(x, (height-1-y), palette.get_color(intensity));
00312 }
00313 }
00314 out.setText("Spectrogram", serialized());
00315 emit progress(100);
00316 emit status("Displaying image");
00317 return out;
00318 }
00319
00320 void Spectrogram::apply_window(complex_vec& chunk, int lowidx, double filterscale) const
00321 {
00322 const int highidx = lowidx+chunk.size();
00323 if (frequency_axis == SCALE_LINEAR)
00324 for (size_t i = 0; i < chunk.size(); ++i)
00325 chunk[i] *= window_coef((double)i/(chunk.size()-1), window);
00326 else
00327 {
00328 const double rloglow = freq2cent(lowidx/filterscale);
00329 const double rloghigh = freq2cent((highidx-1)/filterscale);
00330 for (size_t i = 0; i < chunk.size(); ++i)
00331 {
00332 const double logidx = freq2cent((lowidx+i)/filterscale);
00333 const double winidx = (logidx - rloglow)/(rloghigh - rloglow);
00334 chunk[i] *= window_coef(winidx, window);
00335 }
00336 }
00337 }
00338
00339 real_vec Spectrogram::synthetize(const QImage& image, int samplerate,
00340 SynthesisType type) const
00341 {
00342 switch (type)
00343 {
00344 case SYNTHESIS_SINE:
00345 return sine_synthesis(image, samplerate);
00346 case SYNTHESIS_NOISE:
00347 return noise_synthesis(image, samplerate);
00348 }
00349 assert(false);
00350 }
00351
00352 real_vec Spectrogram::sine_synthesis(const QImage& image, int samplerate) const
00353 {
00354 const size_t samples = image.width()*samplerate/pixpersec;
00355 complex_vec spectrum(samples/2+1);
00356
00357 const double filterscale = ((double)spectrum.size()*2)/samplerate;
00358
00359 std::auto_ptr<Filterbank> filterbank = Filterbank::get_filterbank(
00360 frequency_axis, filterscale, basefreq, bandwidth, overlap);
00361
00362 for (int bandidx = 0; bandidx < image.height(); ++bandidx)
00363 {
00364 if (cancelled())
00365 return real_vec();
00366 band_progress(bandidx, image.height()-1);
00367
00368 real_vec envelope = envelope_from_spectrogram(image, bandidx);
00369
00370
00371 const double phase = (2*random_double()-1) * PI;
00372
00373 real_vec bandsignal(envelope.size()*2);
00374 for (int j = 0; j < 4; ++j)
00375 {
00376 const double sine = std::cos(j*PI/2 + phase);
00377 for (size_t i = j; i < bandsignal.size(); i += 4)
00378 bandsignal[i] = envelope[i/2] * sine;
00379 }
00380 complex_vec filterband = padded_FFT(bandsignal);
00381
00382 for (size_t i = 0; i < filterband.size(); ++i)
00383 {
00384 const double x = (double)i/(filterband.size()-1);
00385
00386 filterband[i] *= x - ((0.5/(2.0*PI))*sin(2.0*PI*x) +
00387 (0.08/(4.0*PI))*sin(4.0*PI*x)/0.42);
00388 }
00389
00390
00391
00392
00393 const size_t center = filterbank->get_center(bandidx);
00394 const size_t offset = std::max((size_t)0, center - filterband.size()/2);
00395
00396 for (size_t i = 0; i < filterband.size(); ++i)
00397 if (offset+i > 0 && offset+i < spectrum.size())
00398 spectrum[offset+i] += filterband[i];
00399 }
00400
00401 real_vec out = padded_IFFT(spectrum);
00402
00403 normalize_signal(out);
00404 return out;
00405 }
00406
00407 real_vec Spectrogram::noise_synthesis(const QImage& image, int samplerate) const
00408 {
00409 size_t samples = image.width()*samplerate/pixpersec;
00410
00411 complex_vec noise = get_pink_noise(samplerate*10);
00412
00413 const double filterscale = ((double)noise.size()*2)/samplerate;
00414 std::auto_ptr<Filterbank> filterbank = Filterbank::get_filterbank(
00415 frequency_axis, filterscale, basefreq, bandwidth, overlap);
00416
00417 const int top_index = maxfreq*filterscale;
00418
00419 real_vec out(samples);
00420
00421 for (int bandidx = 0; bandidx < image.height(); ++bandidx)
00422 {
00423 if (cancelled())
00424 return real_vec();
00425 band_progress(bandidx, image.height()-1);
00426
00427
00428 intpair range = filterbank->get_band(bandidx);
00429
00430
00431
00432 complex_vec filtered_noise(noise.size());
00433 std::copy(noise.begin()+range.first,
00434 noise.begin()+std::min(range.second, top_index),
00435 filtered_noise.begin()+range.first);
00436
00437
00438
00439
00440 real_vec noise_mod = padded_IFFT(filtered_noise);
00441
00442 real_vec envelope =
00443 resample(envelope_from_spectrogram(image, bandidx), samples);
00444
00445 for (size_t i = 0; i < samples; ++i)
00446 out[i] += envelope[i] * noise_mod[i % noise_mod.size()];
00447 }
00448 normalize_signal(out);
00449 return out;
00450 }
00451
00452 void Spectrogram::band_progress(int x, int of, int from, int to) const
00453 {
00454 QString bandstatus;
00455 bandstatus.sprintf("Processing band %i of %i", x, of);
00456
00457 emit status(bandstatus);
00458 emit progress(to*x/of+from);
00459 }
00460
00461 void Spectrogram::cancel()
00462 {
00463 cancelled_ = true;
00464 emit status("Cancelling...");
00465
00466 }
00467
00468 bool Spectrogram::cancelled() const
00469 {
00470 bool was = cancelled_;
00471 cancelled_ = false;
00472 return was;
00473 }
00474
00475
00476 real_vec Spectrogram::envelope_from_spectrogram(const QImage& image, int row) const
00477 {
00478 real_vec envelope(image.width());
00479 for (int x = 0; x < image.width(); ++x)
00480 envelope[x] = calc_intensity_inv(palette.get_intensity(
00481 image.pixel(x, image.height()-row-1)), intensity_axis);
00482 return envelope;
00483 }
00484
00485 void Spectrogram::deserialize(const QString& text)
00486 {
00487 QStringList tokens = text.split(delimiter);
00488 bandwidth = tokens[1].toDouble();
00489 basefreq = tokens[2].toDouble();
00490 maxfreq = tokens[3].toDouble();
00491 overlap = tokens[4].toDouble()/100.0;
00492 pixpersec = tokens[5].toDouble();
00493 window = (Window)tokens[6].toInt();
00494 intensity_axis = (AxisScale)tokens[7].toInt();
00495 frequency_axis = (AxisScale)tokens[8].toInt();
00496 }
00497
00498 QString Spectrogram::serialized() const
00499 {
00500 QString out;
00501 QTextStream desc(&out);
00502 desc.setRealNumberPrecision(4);
00503 desc.setRealNumberNotation(QTextStream::FixedNotation);
00504 desc << "Spectrogram:" << delimiter
00505 << bandwidth << delimiter
00506 << basefreq << delimiter
00507 << maxfreq << delimiter
00508 << overlap*100 << delimiter
00509 << pixpersec << delimiter
00510 << (int)window << delimiter
00511 << (int)intensity_axis << delimiter
00512 << (int)frequency_axis << delimiter
00513 ;
00514
00515 return out;
00516 }
00517
00518 Palette::Palette(const QImage& img)
00519 {
00520 assert(!img.isNull());
00521 for (int x = 0; x < img.width(); ++x)
00522 colors_.append(img.pixel(x, 0));
00523 }
00524
00525 Palette::Palette()
00526 {
00527 QVector<QRgb> colors;
00528 for (int i = 0; i < 256; ++i)
00529 colors.append(qRgb(i, i, i));
00530 colors_ = colors;
00531 }
00532
00533 int Palette::get_color(float val) const
00534 {
00535 assert(val >= 0 && val <= 1);
00536 if (indexable())
00537
00538 return (colors_.size()-1)*val;
00539 else
00540
00541 return colors_[(colors_.size()-1)*val];
00542 }
00543
00544 bool Palette::has_color(QRgb color) const
00545 {
00546 return colors_.indexOf(color) != -1;
00547 }
00548
00549
00550 float Palette::get_intensity(QRgb color) const
00551 {
00552 int index = colors_.indexOf(color);
00553 if (index == -1)
00554 return 0;
00555 return (float)index/(colors_.size()-1);
00556 }
00557
00558 QImage Palette::make_canvas(int width, int height) const
00559 {
00560 if (indexable())
00561 {
00562 QImage out(width, height, QImage::Format_Indexed8);
00563 out.setColorTable(colors_);
00564 out.fill(0);
00565 return out;
00566 }
00567 else
00568 {
00569 QImage out(width, height, QImage::Format_RGB32);
00570 out.fill(colors_[0]);
00571 return out;
00572 }
00573 }
00574
00575 bool Palette::indexable() const
00576 {
00577 return colors_.size() <= 256;
00578 }
00579
00580 QPixmap Palette::preview(int width, int height) const
00581 {
00582 QImage out = make_canvas(width, height);
00583 for (int x = 0; x < width; ++x)
00584 out.setPixel(x, 0, get_color((double)x/(width-1)));
00585 int bytes = out.bytesPerLine();
00586 for (int y = 1; y < height; ++y)
00587 std::memcpy(out.scanLine(y), out.scanLine(0), bytes);
00588 return QPixmap::fromImage(out);
00589 }
00590
00591 int Palette::numColors() const
00592 {
00593 return colors_.size();
00594 }
00595
00596 Filterbank::Filterbank(double scale)
00597 : scale_(scale)
00598 {
00599 }
00600
00601 Filterbank::~Filterbank()
00602 {
00603 }
00604
00605 LinearFilterbank::LinearFilterbank(double scale, double base,
00606 double hzbandwidth, double overlap)
00607 : Filterbank(scale)
00608 , bandwidth_(hzbandwidth*scale)
00609 , startidx_(std::max(scale_*base-bandwidth_/2, 0.0))
00610 , step_((1-overlap)*bandwidth_)
00611 {
00612
00613
00614 assert(step_ > 0);
00615 }
00616
00617 int LinearFilterbank::num_bands_est(double maxfreq) const
00618 {
00619 return (maxfreq*scale_-startidx_)/step_;
00620 }
00621
00622 intpair LinearFilterbank::get_band(int i) const
00623 {
00624 intpair out;
00625 out.first = startidx_ + i*step_;
00626 out.second = out.first + bandwidth_;
00627 return out;
00628 }
00629
00630 int LinearFilterbank::get_center(int i) const
00631 {
00632 return startidx_ + i*step_ + bandwidth_/2.0;
00633 }
00634
00635 LogFilterbank::LogFilterbank(double scale, double base,
00636 double centsperband, double overlap)
00637 : Filterbank(scale)
00638 , centsperband_(centsperband)
00639 , logstart_(freq2cent(base))
00640 , logstep_((1-overlap)*centsperband_)
00641 {
00642 assert(logstep_ > 0);
00643
00644
00645 }
00646
00647 int LogFilterbank::num_bands_est(double maxfreq) const
00648 {
00649 return (freq2cent(maxfreq)-logstart_)/logstep_+4;
00650 }
00651
00652 int LogFilterbank::get_center(int i) const
00653 {
00654 const double logcenter = logstart_ + i*logstep_;
00655 return cent2freq(logcenter)*scale_;
00656 }
00657
00658 intpair LogFilterbank::get_band(int i) const
00659 {
00660 const double logcenter = logstart_ + i*logstep_;
00661 const double loglow = logcenter - centsperband_/2.0;
00662 const double loghigh = loglow + centsperband_;
00663 intpair out;
00664 out.first = cent2freq(loglow)*scale_;
00665 out.second = cent2freq(loghigh)*scale_;
00666
00667
00668 return out;
00669 }
00670
00671 std::auto_ptr<Filterbank> Filterbank::get_filterbank(AxisScale type,
00672 double scale, double base, double bandwidth, double overlap)
00673 {
00674 Filterbank* filterbank;
00675 if (type == SCALE_LINEAR)
00676 filterbank=new LinearFilterbank(scale, base, bandwidth, overlap);
00677 else
00678 filterbank=new LogFilterbank(scale, base, bandwidth, overlap);
00679 return std::auto_ptr<Filterbank>(filterbank);
00680 }