//
// Programmer: Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Fri Aug 18 21:51:09 PDT 2000
// Last Modified: Fri Jan 26 05:47:51 PST 2001
// Last Modified: Sun Feb 4 12:44:29 PST 2001
// Filename: ...sig/doc/examples/all/pitchdelac/pitchdelac.cpp
// Syntax: C++; sig
//
// Description: This program is an implementation of a pitch
// detection algorithm by Patrico de la Cuadra
// <pdelac@ccrma.stanford.edu> and Craig Sapp
// <craig@ccrma.stanford.edu>
//
#include "sigAudio.h"
// function declarations:
void checkOptions(Options& opts);
void example(void);
void usage(const char* command);
void analyze(Array<double>& result, Block<double>& buffer, int harmonics);
void fill(Block<double>& buffer, SoundFileIn& insound);
void printArray(int frame, const Array<double>& anArray);
void makeFrameWindow(Block<double>& signalwindow);
void makeSpectrumWindow(Array<double>& window);
void KaiserWindow(double* window, int size, double alpha);
double BesselI0(double x);
int findMax(Array<double>& input);
// global interface variables
double srate = 0; // the sampling rate of the input soundfile
int framesize = 0; // the number of samples in a primary frame
int harmonics = 0; // the number of harmonics to analyze
int quietQ = 0; // suppress status printing while program is running
int pad = 4; // zero padding factor
double alpha = 4.0; // used with the -a option
double scale = 1.0; // used with the -s option
double beta = 6.0; // used with the -b option
double gamme = 6.0; // used with the -g option
int maxQ = 0; // used with the -m option
int mine = 1; // used with the --min option
int width = 5; // used with the -w option
int swindowQ = 0; // used with the --spectrumwindow option
Block<double> signalwindow;
Array<double> spectrumwindow;
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char *argv[]) {
Options options(argc, argv);
checkOptions(options);
SoundFileIn insound(options.getArgument(1));
srate = insound.getSrate();
double frames = (double)insound.getSamples() / framesize;
int framecount;
if ((int)frames - frames == 0.0) {
framecount = (int)frames;
} else {
framecount = (int)frames + 1;
}
signalwindow.setSize(framesize);
makeFrameWindow(signalwindow);
spectrumwindow.setSize(framesize*pad);
makeSpectrumWindow(spectrumwindow);
Block<double> buffer(framesize);
Array<double> result;
result.setSize(framesize/harmonics + 1);
int max;
for (int i=0; i<framecount; i++) {
if (!quietQ) {
cout << "Processing frame: " << i << endl;
}
fill(buffer, insound);
buffer *= signalwindow;
analyze(result, buffer, harmonics);
if (maxQ) {
max = findMax(result);
cout << max << "\n";
} else {
printArray(i, result);
}
}
return 0;
}
////////////////////////////////////////////////////////////////////////////////
//////////////////////////////
//
// findMax -- find the index of the maximum value in an array.
//
int findMax(Array& input) {
int max = mine;
int i;
for (i=mine+1; i<input.getSize(); i++) {
if (input[i] > input[max]) {
max = i;
}
}
return max;
}
//////////////////////////////
//
// fill -- fill the signal buffers with data from the input soundfile.
//
void fill(Block& buffer, SoundFileIn& insound) {
for (int i=0; i<buffer.getSize(); i++) {
insound.action();
buffer[i] = insound.output(0);
}
}
//////////////////////////////
//
// analyze -- Do frequency transform of the sound data and
// generate a pitch array by multiplying the transforms
// together.
//
void analyze(Array& result, Block& signal, int harmonics) {
int i, j;
int framesize = signal.getSize();
Block<ComplexD> transform(framesize*pad);
Block<ComplexD> complexsignal(framesize*pad);
for (i=0; i<framesize; i++) {
complexsignal[i].re() = signal[i];
complexsignal[i].im() = 0.0;
}
for (i=framesize; i<framesize*pad; i++) {
complexsignal[i].re() = 0.0;
complexsignal[i].im() = 0.0;
}
FFT(transform, complexsignal);
Array<double> absfreq(transform.getSize());
for (i=0; i<framesize*pad; i++) {
absfreq[i] = transform[i].abs();
absfreq[i] -= absfreq[i] * spectrumwindow[i];
}
result.setSize(framesize/harmonics + 1);
// multiply adjacent harmonic spaces:
for (i=0; i<result.getSize(); i++) {
// result[i] = absfreq[i];
result[i] = log10(absfreq[i]);
for (j=2; j<=harmonics; j++) {
result[i] += log10(absfreq[j*i]);
// result[i] *= absfreq[j*i];
}
}
}
//////////////////////////////
//
// printArray -- print a pitch analysis array.
//
void printArray(int frame, const Array& anArray) {
if (!quietQ) {
cout << "\n# FRAME = " << frame << "\n";
}
for (int i=0; i<100; i++) {
cout << "\t" << anArray[i];
}
cout << "\n";
}
//////////////////////////////
//
// checkOptions -- make sure that all program options are ok and
// setup interface variables.
//
void checkOptions(Options& opts) {
opts.define("a|alpha=d:4.0", "alpha parameter for Kaiser window");
opts.define("b|beta=d:6.0", "alpha parameter for 2nd Kaiser window");
opts.define("g|gamma=d:6.0", "alpha parameter for 3rd Kaiser window");
opts.define("s|scale=d:1.0", "scaling parameter for 2nd Kaiser window");
opts.define("h|harmonics=i:5", "Num of harmonics to consider");
opts.define("f|framesize=i:1024", "Num of samples in fundamental frame");
opts.define("p|pad=i:4", "Zero-padding factor");
opts.define("m|max=b", "Maximum likely frequency bin");
opts.define("min=i:1", "minimum frequency bin to search");
opts.define("w|width=i:5", "low frequency filter size in power of two");
opts.define("spectrumwindow=b", "display the spectrum window and exit");
opts.define("q|quiet=b", "Suppress printing of status information");
opts.define("help=b");
opts.define("author=b");
opts.define("version=b");
opts.define("example=b");
opts.process();
if (opts.getBoolean("help")) {
usage(opts.getCommand());
exit(0);
}
if (opts.getBoolean("example")) {
example();
exit(0);
}
if (opts.getBoolean("author")) {
cout << "Written by Craig Stuart Sapp, "
<< "craig@ccrma.stanford.edu, August 2000" << endl;
exit(0);
}
if (opts.getBoolean("version")) {
cout << "pitchdelac version: Fri Aug 18 22:37:39 PDT 2000" << endl;
cout << SIG_VERSION << endl;
exit(0);
}
if (opts.getArgCount() != 1) {
cout << "Error: must specify one input soundfile" << endl;
usage(opts.getCommand());
exit(1);
}
harmonics = opts.getInt("harmonics");
framesize = opts.getInt("framesize");
if (framesize < 64) {
cout << "Error: frame size is too small: " << framesize << endl;
exit(1);
}
if (framesize > 10000) {
cout << "Error: frame size is too large: " << framesize << endl;
exit(1);
}
if (harmonics < 1) {
cout << "Error: harmonic count is too small: " << harmonics << endl;
exit(1);
}
if (harmonics > 30) {
cout << "Error: harmonic count is too large: " << harmonics << endl;
exit(1);
}
if (framesize/harmonics < 10) {
cout << "Error: too many harmonics for the given framesize" << endl;
exit(1);
}
quietQ = opts.getBoolean("quiet");
pad = opts.getInteger("pad");
if (pad < 1) {
pad = 1;
}
alpha = opts.getDouble("alpha");
scale = opts.getDouble("scale");
beta = opts.getDouble("beta");
maxQ = opts.getBoolean("max");
mine = opts.getInteger("min");
width = opts.getInteger("width");
gamme = opts.getDouble("gamma");
swindowQ = opts.getBoolean("spectrumwindow");
}
//////////////////////////////
//
// example -- example usage of the program.
//
void example(void) {
cout <<
" To be written. \n"
<< endl;
}
//////////////////////////////
//
// usage -- explaination about the program and its options.
//
void usage(const char* command) {
cout << "\nUsage: " << command
<< " [-f framesize][-h harmonics] soundfile\n";
cout << " Determines pitch in a soundfile.\n";
cout << " Options are:\n";
cout << " -f framesize = size of primary analysis frame\n";
cout << " -h harmonics = number of analysis frames per block\n";
cout << " -o frameoverlap = option not active\n";
cout << " --options a list of all options.\n";
cout << endl;
}
//////////////////////////////
//
// makeFrameWindow --
//
void makeFrameWindow(Block& signalwindow) {
KaiserWindow(signalwindow.getBase(), signalwindow.getSize(), alpha);
}
//////////////////////////////
//
// makeSpectrumWindow --
//
void makeSpectrumWindow(Array& window) {
int sws = (1 << width) * pad;
int lws = window.getSize() - sws;
Array<double> smallwindow(sws);
Array<double> largewindow(lws);
KaiserWindow(smallwindow.getBase(), sws, beta);
KaiserWindow(largewindow.getBase(), lws, gamme);
int i;
//for (i=0; i<sws/2; i++) {
// smallwindow[i] = (1 - (double)i/(sws/2));
// smallwindow[i+sws/2] = smallwindow[i];
//}
window.zero();
for (i=0; i<sws/2; i++) {
window[i] = smallwindow[i+sws/2] * scale;
window[window.getSize() - i - 1] = smallwindow[sws/2-i-1] * scale;
}
// largewindow.zero();
for (i=0; i<lws; i++) {
window[i+sws/2] = largewindow[i];
}
if (swindowQ) {
for (i=0; i<window.getSize(); i++) {
cout << window[i] << "\n";
}
exit(0);
}
}
//////////////////////////////
//
// KaiserWindow -- Kaiser Bessel Window
// fills the input window array with size samples of the
// Kaiser window with the given tuning parameter alpha.
//
#ifndef PI
#define PI 3.14159265358979323846264338328
#endif
void KaiserWindow(double* window, int size, double alpha) {
double sumvalue = 0.0;
int i;
for (i=0; i<size/2; i++) {
sumvalue += BesselI0(PI * alpha * sqrt(1.0 - pow(4.0*i/size - 1.0, 2)));
window[i] = sumvalue;
}
// need to add one more value to the nomalization factor at size/2:
sumvalue += BesselI0(PI * alpha * sqrt(1.0 - pow(4.0*(size/2)/size-1.0, 2)));
// normalize the window and fill in the righthand side of the window:
for (i=0; i<size/2; i++) {
window[i] = window[i]/sumvalue;
window[size-1-i] = window[i];
}
}
//////////////////////////////
//
// BesselI0 -- Regular Modified Cylindrical Bessel Function (Bessel I).
//
double BesselI0(double x) {
double denominator;
double numerator;
double z;
if (x == 0.0) {
return 1.0;
} else {
z = x * x;
numerator = (z* (z* (z* (z* (z* (z* (z* (z* (z* (z* (z* (z* (z*
(z* 0.210580722890567e-22 + 0.380715242345326e-19 ) +
0.479440257548300e-16) + 0.435125971262668e-13 ) +
0.300931127112960e-10) + 0.160224679395361e-7 ) +
0.654858370096785e-5) + 0.202591084143397e-2 ) +
0.463076284721000e0) + 0.754337328948189e2 ) +
0.830792541809429e4) + 0.571661130563785e6 ) +
0.216415572361227e8) + 0.356644482244025e9 ) +
0.144048298227235e10);
denominator = (z*(z*(z-0.307646912682801e4)+
0.347626332405882e7)-0.144048298227235e10);
}
return -numerator/denominator;
}
// md5sum: 5df6428f17158020df5ce8647e5f81f0 pitchdelac.cpp [20050403]