00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "el_processor.h"
00020 #include <cstdlib>
00021 #include <cstring>
00022 #include <complex>
00023 #include <cmath>
00024 #include <vector>
00025 #ifdef USE_FFTW3
00026 #include "fftw3.h"
00027 #else
00028 extern "C" {
00029 #include "dsputil.h"
00030 };
00031 typedef FFTSample FFTComplexArray[2];
00032 #endif
00033
00034
00035 #ifdef USE_FFTW3
00036 #pragma comment (lib,"libfftw3f-3.lib")
00037 #endif
00038
00039 typedef std::complex<float> cfloat;
00040
00041 const float PI = 3.141592654;
00042 const float epsilon = 0.000001;
00043
00044
00045 const float center_level = 1.0;
00046
00047
00048
00049
00050
00051
00052
00053 const float window_gain = 0.95;
00054
00055
00056 class decoder_impl {
00057 public:
00058
00059
00060 decoder_impl(unsigned blocksize=8192): N(blocksize), halfN(blocksize/2) {
00061 #ifdef USE_FFTW3
00062
00063 lt = (float*)fftwf_malloc(sizeof(float)*N);
00064 rt = (float*)fftwf_malloc(sizeof(float)*N);
00065 dst = (float*)fftwf_malloc(sizeof(float)*N);
00066 dftL = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*N);
00067 dftR = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*N);
00068 src = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*N);
00069 loadL = fftwf_plan_dft_r2c_1d(N, lt, dftL,FFTW_MEASURE);
00070 loadR = fftwf_plan_dft_r2c_1d(N, rt, dftR,FFTW_MEASURE);
00071 store = fftwf_plan_dft_c2r_1d(N, src, dst,FFTW_MEASURE);
00072 #else
00073
00074 lt = (float*)av_malloc(sizeof(FFTSample)*N);
00075 rt = (float*)av_malloc(sizeof(FFTSample)*N);
00076 dftL = (FFTComplexArray*)av_malloc(sizeof(FFTComplex)*N*2);
00077 dftR = (FFTComplexArray*)av_malloc(sizeof(FFTComplex)*N*2);
00078 src = (FFTComplexArray*)av_malloc(sizeof(FFTComplex)*N*2);
00079 fftContextForward = (FFTContext*)av_malloc(sizeof(FFTContext));
00080 memset(fftContextForward, 0, sizeof(FFTContext));
00081 fftContextReverse = (FFTContext*)av_malloc(sizeof(FFTContext));
00082 memset(fftContextReverse, 0, sizeof(FFTContext));
00083 ff_fft_init(fftContextForward, 13, 0);
00084 ff_fft_init(fftContextReverse, 13, 1);
00085 #endif
00086
00087 frontR.resize(N);
00088 frontL.resize(N);
00089 avg.resize(N);
00090 surR.resize(N);
00091 surL.resize(N);
00092 trueavg.resize(N);
00093 xfs.resize(N);
00094 yfs.resize(N);
00095 inbuf[0].resize(N);
00096 inbuf[1].resize(N);
00097 for (unsigned c=0;c<6;c++) {
00098 outbuf[c].resize(N);
00099 filter[c].resize(N);
00100 }
00101
00102 for (unsigned c=0;c<5;c++)
00103 {
00104 filter[c][0] = 0.0;
00105 filter[c][1] = 0.0;
00106 filter[c][halfN] = 0.0;
00107 }
00108 sample_rate(48000);
00109
00110 wnd.resize(N);
00111
00112
00113 surround_gain(1.0);
00114 current_buf = 0;
00115
00116 surround_coefficients(0.8165,0.5774);
00117 phase_mode(0);
00118 separation(1,1);
00119 steering_mode(1);
00120 }
00121
00122
00123 ~decoder_impl() {
00124 #ifdef USE_FFTW3
00125
00126 fftwf_destroy_plan(store);
00127 fftwf_destroy_plan(loadR);
00128 fftwf_destroy_plan(loadL);
00129 fftwf_free(src);
00130 fftwf_free(dftR);
00131 fftwf_free(dftL);
00132 fftwf_free(dst);
00133 fftwf_free(rt);
00134 fftwf_free(lt);
00135 #else
00136 ff_fft_end(fftContextForward);
00137 ff_fft_end(fftContextReverse);
00138 av_free(src);
00139 av_free(dftR);
00140 av_free(dftL);
00141 av_free(rt);
00142 av_free(lt);
00143 av_free(fftContextForward);
00144 av_free(fftContextReverse);
00145 #endif
00146 }
00147
00148 float ** getInputBuffers()
00149 {
00150 inbufs[0] = &inbuf[0][current_buf*halfN];
00151 inbufs[1] = &inbuf[1][current_buf*halfN];
00152 return inbufs;
00153 }
00154
00155 float ** getOutputBuffers()
00156 {
00157 outbufs[0] = &outbuf[0][current_buf*halfN];
00158 outbufs[1] = &outbuf[1][current_buf*halfN];
00159 outbufs[2] = &outbuf[2][current_buf*halfN];
00160 outbufs[3] = &outbuf[3][current_buf*halfN];
00161 outbufs[4] = &outbuf[4][current_buf*halfN];
00162 outbufs[5] = &outbuf[5][current_buf*halfN];
00163 return outbufs;
00164 }
00165
00166
00167
00168
00169
00170 void decode(float center_width, float dimension, float adaption_rate) {
00171
00172 int index;
00173 index = current_buf*halfN;
00174 float *in_second[2] = {&inbuf[0][index],&inbuf[1][index]};
00175 current_buf ^= 1;
00176 index = current_buf*halfN;
00177 float *in_first[2] = {&inbuf[0][index],&inbuf[1][index]};
00178 add_output(in_first,in_second,center_width,dimension,adaption_rate,true);
00179
00180 }
00181
00182
00183 void flush() {
00184 for (unsigned k=0;k<N;k++) {
00185 for (unsigned c=0;c<6;c++)
00186 outbuf[c][k] = 0;
00187 inbuf[0][k] = 0;
00188 inbuf[1][k] = 0;
00189 }
00190 }
00191
00192
00193 void sample_rate(unsigned int srate) {
00194
00195 unsigned int cutoff = (250*N)/srate;
00196 for (unsigned f=0;f<=halfN;f++) {
00197 if ((f>=2) && (f<cutoff))
00198 filter[5][f] = 1.0;
00199 else
00200 filter[5][f] = 0.0;
00201 }
00202 }
00203
00204
00205 void surround_coefficients(float a, float b) {
00206
00207 surround_high = a;
00208 surround_low = b;
00209 surround_balance = (a-b)/(a+b);
00210 surround_level = 1/(a+b);
00211
00212 cfloat i(0,1), u((a+b)*i), v((b-a)*i), n(0.25,0),o(1,0);
00213 A = (v-o)*n; B = (o-u)*n; C = (-o-v)*n; D = (o+u)*n;
00214 E = (o+v)*n; F = (o+u)*n; G = (o-v)*n; H = (o-u)*n;
00215 }
00216
00217 void surround_gain(float gain) {
00218 master_gain = gain * window_gain * 0.5 * 0.25;
00219 for (unsigned k=0;k<N;k++)
00220 wnd[k] = sqrt(master_gain*(1-cos(2*PI*k/N))/N);
00221 }
00222
00223
00224 void phase_mode(unsigned mode) {
00225 const float modes[4][2] = {{0,0},{0,PI},{PI,0},{-PI/2,PI/2}};
00226 phase_offsetL = modes[mode][0];
00227 phase_offsetR = modes[mode][1];
00228 }
00229
00230
00231 void steering_mode(bool mode) { linear_steering = mode; }
00232
00233
00234 void separation(float front, float rear) {
00235 front_separation = front;
00236 rear_separation = rear;
00237 }
00238
00239 private:
00240
00241 static inline float amplitude(const float cf[2]) { return sqrt(cf[0]*cf[0] + cf[1]*cf[1]); }
00242 static inline float phase(const float cf[2]) { return atan2(cf[1],cf[0]); }
00243 static inline cfloat polar(float a, float p) { return cfloat(a*cos(p),a*sin(p)); }
00244 static inline float sqr(float x) { return x*x; }
00245
00246 static inline float min(float a, float b) { return a<b?a:b; }
00247 static inline float max(float a, float b) { return a>b?a:b; }
00248 static inline float clamp(float x) { return max(-1,min(1,x)); }
00249
00250
00251 void add_output(float *input1[2], float *input2[2], float center_width, float dimension, float adaption_rate, bool result=false) {
00252
00253 float *out[6] = {&outbuf[0][0],&outbuf[1][0],&outbuf[2][0],&outbuf[3][0],&outbuf[4][0],&outbuf[5][0]};
00254 block_decode(input1,input2,out,center_width,dimension,adaption_rate);
00255 }
00256
00257
00258 void block_decode(float *input1[2], float *input2[2], float *output[6], float center_width, float dimension, float adaption_rate) {
00259
00260
00261
00262 {
00263 float* pWnd = &wnd[0];
00264 float* pLt = <[0];
00265 float* pRt = &rt[0];
00266 float* pIn0 = input1[0];
00267 float* pIn1 = input1[1];
00268 for (unsigned k=0;k<halfN;k++) {
00269 *pLt++ = *pIn0++ * *pWnd;
00270 *pRt++ = *pIn1++ * *pWnd++;
00271 }
00272 pIn0 = input2[0];
00273 pIn1 = input2[1];
00274 for (unsigned k=0;k<halfN;k++) {
00275 *pLt++ = *pIn0++ * *pWnd;
00276 *pRt++ = *pIn1++ * *pWnd++;
00277 }
00278 }
00279
00280 #ifdef USE_FFTW3
00281
00282 fftwf_execute(loadL);
00283 fftwf_execute(loadR);
00284 #else
00285 ff_fft_permuteRC(fftContextForward, lt, (FFTComplex*)&dftL[0]);
00286 ff_fft_permuteRC(fftContextForward, rt, (FFTComplex*)&dftR[0]);
00287 ff_fft_calc(fftContextForward, (FFTComplex*)&dftL[0]);
00288 ff_fft_calc(fftContextForward, (FFTComplex*)&dftR[0]);
00289 #endif
00290
00291
00292
00293 for (unsigned f=2;f<halfN;f++) {
00294
00295 float ampL = amplitude(dftL[f]), ampR = amplitude(dftR[f]);
00296 float phaseL = phase(dftL[f]), phaseR = phase(dftR[f]);
00297
00298
00299
00300
00301 float ampDiff = clamp((ampL+ampR < epsilon) ? 0 : (ampR-ampL) / (ampR+ampL));
00302 float phaseDiff = phaseL - phaseR;
00303 if (phaseDiff < -PI) phaseDiff += 2*PI;
00304 if (phaseDiff > PI) phaseDiff -= 2*PI;
00305 phaseDiff = abs(phaseDiff);
00306
00307 if (linear_steering) {
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346 yfs[f] = get_yfs(ampDiff,phaseDiff);
00347 xfs[f] = get_xfs(ampDiff,yfs[f]);
00348
00349
00350 yfs[f] = clamp(yfs[f] - dimension);
00351
00352
00353 xfs[f] = clamp(xfs[f] * (front_separation*(1+yfs[f])/2 + rear_separation*(1-yfs[f])/2));
00354
00355
00356 float left = (1-xfs[f])/2, right = (1+xfs[f])/2;
00357 float front = (1+yfs[f])/2, back = (1-yfs[f])/2;
00358 float volume[5] = {
00359 front * (left * center_width + max(0,-xfs[f]) * (1-center_width)),
00360 front * center_level*((1-abs(xfs[f])) * (1-center_width)),
00361 front * (right * center_width + max(0, xfs[f]) * (1-center_width)),
00362 back * surround_level * left,
00363 back * surround_level * right
00364 };
00365
00366
00367 for (unsigned c=0;c<5;c++)
00368 filter[c][f] = (1-adaption_rate)*filter[c][f] + adaption_rate*volume[c];
00369
00370 } else {
00371
00372
00373
00374 float ampDiff = clamp((ampL+ampR < epsilon) ? 0 : (ampR-ampL) / (ampR+ampL));
00375 float phaseDiff = phaseL - phaseR;
00376 if (phaseDiff < -PI) phaseDiff += 2*PI;
00377 if (phaseDiff > PI) phaseDiff -= 2*PI;
00378 phaseDiff = abs(phaseDiff);
00379
00380
00381 xfs[f] = ampDiff;
00382
00383
00384 yfs[f] = 1 - (phaseDiff/PI)*2;
00385
00386 if (abs(xfs[f]) > surround_balance) {
00387
00388
00389 float frontness = (abs(xfs[f]) - surround_balance)/(1-surround_balance);
00390 yfs[f] = (1-frontness) * yfs[f] + frontness * 1;
00391 }
00392
00393
00394 yfs[f] = clamp(yfs[f] - dimension);
00395
00396
00397 xfs[f] = clamp(xfs[f] * (front_separation*(1+yfs[f])/2 + rear_separation*(1-yfs[f])/2));
00398
00399
00400
00401 float left = (1-xfs[f])/2, right = (1+xfs[f])/2;
00402 float front = (1+yfs[f])/2, back = (1-yfs[f])/2;
00403 float volume[5] = {
00404 front * (left * center_width + max(0,-xfs[f]) * (1-center_width)),
00405 front * center_level*((1-abs(xfs[f])) * (1-center_width)),
00406 front * (right * center_width + max(0, xfs[f]) * (1-center_width)),
00407 back * surround_level*max(0,min(1,((1-(xfs[f]/surround_balance))/2))),
00408 back * surround_level*max(0,min(1,((1+(xfs[f]/surround_balance))/2)))
00409 };
00410
00411
00412 for (unsigned c=0;c<5;c++)
00413 filter[c][f] = (1-adaption_rate)*filter[c][f] + adaption_rate*volume[c];
00414 }
00415
00416
00417 frontL[f] = polar(ampL+ampR,phaseL);
00418 frontR[f] = polar(ampL+ampR,phaseR);
00419 avg[f] = frontL[f] + frontR[f];
00420 surL[f] = polar(ampL+ampR,phaseL+phase_offsetL);
00421 surR[f] = polar(ampL+ampR,phaseR+phase_offsetR);
00422 trueavg[f] = cfloat(dftL[f][0] + dftR[f][0], dftL[f][1] + dftR[f][1]);
00423 }
00424
00425
00426 apply_filter(&frontL[0],&filter[0][0],&output[0][0]);
00427 apply_filter(&avg[0], &filter[1][0],&output[1][0]);
00428 apply_filter(&frontR[0],&filter[2][0],&output[2][0]);
00429 apply_filter(&surL[0],&filter[3][0],&output[3][0]);
00430 apply_filter(&surR[0],&filter[4][0],&output[4][0]);
00431 apply_filter(&trueavg[0],&filter[5][0],&output[5][0]);
00432 }
00433
00434 #define FASTER_CALC
00435
00436 inline double get_yfs(double ampDiff, double phaseDiff) {
00437 double x = 1-(((1-sqr(ampDiff))*phaseDiff)/PI*2);
00438 #ifdef FASTER_CALC
00439 double tanX = tan(x);
00440 return 0.16468622925824683 + 0.5009268347818189*x - 0.06462757726992101*x*x
00441 + 0.09170680403453149*x*x*x + 0.2617754892323973*tanX - 0.04180413533856156*sqr(tanX);
00442 #else
00443 return 0.16468622925824683 + 0.5009268347818189*x - 0.06462757726992101*x*x
00444 + 0.09170680403453149*x*x*x + 0.2617754892323973*tan(x) - 0.04180413533856156*sqr(tan(x));
00445 #endif
00446 }
00447
00448
00449 inline double get_xfs(double ampDiff, double yfs) {
00450 double x=ampDiff,y=yfs;
00451 #ifdef FASTER_CALC
00452 double tanX = tan(x);
00453 double tanY = tan(y);
00454 double asinX = asin(x);
00455 double sinX = sin(x);
00456 double sinY = sin(y);
00457 double x3 = x*x*x;
00458 double y2 = y*y;
00459 double y3 = y*y2;
00460 return 2.464833559224702*x - 423.52131153259404*x*y +
00461 67.8557858606918*x3*y + 788.2429425544392*x*y2 -
00462 79.97650354902909*x3*y2 - 513.8966153850349*x*y3 +
00463 35.68117670186306*x3*y3 + 13867.406173420834*y*asinX -
00464 2075.8237075786396*y2*asinX - 908.2722068360281*y3*asinX -
00465 12934.654772878019*asinX*sinY - 13216.736529661162*y*tanX +
00466 1288.6463247741938*y2*tanX + 1384.372969378453*y3*tanX +
00467 12699.231471126128*sinY*tanX + 95.37131275594336*sinX*tanY -
00468 91.21223198407546*tanX*tanY;
00469 #else
00470 return 2.464833559224702*x - 423.52131153259404*x*y +
00471 67.8557858606918*x*x*x*y + 788.2429425544392*x*y*y -
00472 79.97650354902909*x*x*x*y*y - 513.8966153850349*x*y*y*y +
00473 35.68117670186306*x*x*x*y*y*y + 13867.406173420834*y*asin(x) -
00474 2075.8237075786396*y*y*asin(x) - 908.2722068360281*y*y*y*asin(x) -
00475 12934.654772878019*asin(x)*sin(y) - 13216.736529661162*y*tan(x) +
00476 1288.6463247741938*y*y*tan(x) + 1384.372969378453*y*y*y*tan(x) +
00477 12699.231471126128*sin(y)*tan(x) + 95.37131275594336*sin(x)*tan(y) -
00478 91.21223198407546*tan(x)*tan(y);
00479 #endif
00480 }
00481
00482
00483 void apply_filter(cfloat *signal, float *flt, float *target) {
00484
00485 unsigned f;
00486 for (f=0;f<=halfN;f++) {
00487 src[f][0] = signal[f].real() * flt[f];
00488 src[f][1] = signal[f].imag() * flt[f];
00489 }
00490 #ifdef USE_FFTW3
00491
00492 fftwf_execute(store);
00493
00494 float* pT1 = &target[current_buf*halfN];
00495 float* pWnd1 = &wnd[0];
00496 float* pDst1 = &dst[0];
00497 float* pT2 = &target[(current_buf^1)*halfN];
00498 float* pWnd2 = &wnd[halfN];
00499 float* pDst2 = &dst[halfN];
00500
00501 for (unsigned int k=0;k<halfN;k++)
00502 {
00503
00504 *pT1++ += *pWnd1++ * *pDst1++;
00505
00506 *pT2++ = *pWnd2++ * *pDst2++;
00507 }
00508 #else
00509
00510 for (f=1;f<halfN;f++) {
00511 src[N-f][0] = src[f][0];
00512 src[N-f][1] = -src[f][1];
00513 }
00514 ff_fft_permute(fftContextReverse, (FFTComplex*)&src[0]);
00515 ff_fft_calc(fftContextReverse, (FFTComplex*)&src[0]);
00516
00517 float* pT1 = &target[current_buf*halfN];
00518 float* pWnd1 = &wnd[0];
00519 float* pDst1 = &src[0][0];
00520 float* pT2 = &target[(current_buf^1)*halfN];
00521 float* pWnd2 = &wnd[halfN];
00522 float* pDst2 = &src[halfN][0];
00523
00524 for (unsigned int k=0;k<halfN;k++)
00525 {
00526
00527 *pT1++ += *pWnd1++ * *pDst1; pDst1 += 2;
00528
00529 *pT2++ = *pWnd2++ * *pDst2; pDst2 += 2;
00530 }
00531 #endif
00532 }
00533
00534 #ifndef USE_FFTW3
00535
00539 void ff_fft_permuteRC(FFTContext *s, FFTSample *r, FFTComplex *z)
00540 {
00541 int j, k, np;
00542 const uint16_t *revtab = s->revtab;
00543
00544
00545 np = 1 << s->nbits;
00546 for(j=0;j<np;j++) {
00547 k = revtab[j];
00548 z[k].re = r[j];
00549 z[k].im = 0.0;
00550 }
00551 }
00552
00558 void ff_fft_permuteCR(FFTContext *s, FFTComplex *z, FFTSample *r)
00559 {
00560 int j, k, np;
00561 FFTComplex tmp;
00562 const uint16_t *revtab = s->revtab;
00563
00564
00565 np = 1 << s->nbits;
00566 for(j=0;j<np;j++) {
00567 k = revtab[j];
00568 if (k < j) {
00569 r[k] = z[j].re;
00570 r[j] = z[k].re;
00571 }
00572 }
00573 }
00574 #endif
00575
00576 unsigned int N;
00577 unsigned int halfN;
00578 #ifdef USE_FFTW3
00579
00580 float *lt,*rt,*dst;
00581 fftwf_complex *dftL,*dftR,*src;
00582 fftwf_plan loadL,loadR,store;
00583 #else
00584 FFTContext *fftContextForward, *fftContextReverse;
00585 FFTSample *lt,*rt;
00586 FFTComplexArray *dftL,*dftR,*src;
00587 #endif
00588
00589 std::vector<cfloat> frontL,frontR,avg,surL,surR;
00590 std::vector<cfloat> trueavg;
00591 std::vector<float> xfs,yfs;
00592 std::vector<float> wnd;
00593 std::vector<float> filter[6];
00594 std::vector<float> inbuf[2];
00595 std::vector<float> outbuf[6];
00596
00597 float surround_high,surround_low;
00598 float surround_balance;
00599 float surround_level;
00600 float master_gain;
00601 float phase_offsetL, phase_offsetR;
00602 float front_separation;
00603 float rear_separation;
00604 bool linear_steering;
00605 cfloat A,B,C,D,E,F,G,H;
00606 int current_buf;
00607 float * inbufs[2];
00608 float * outbufs[6];
00609
00610 friend class fsurround_decoder;
00611 };
00612
00613
00614
00615
00616 fsurround_decoder::fsurround_decoder(unsigned blocksize): impl(new decoder_impl(blocksize)) { }
00617
00618 fsurround_decoder::~fsurround_decoder() { delete impl; }
00619
00620 void fsurround_decoder::decode(float center_width, float dimension, float adaption_rate) {
00621 impl->decode(center_width,dimension,adaption_rate);
00622 }
00623
00624 void fsurround_decoder::flush() { impl->flush(); }
00625
00626 void fsurround_decoder::surround_coefficients(float a, float b) { impl->surround_coefficients(a,b); }
00627
00628 void fsurround_decoder::gain(float gain) { impl->surround_gain(gain); }
00629
00630 void fsurround_decoder::phase_mode(unsigned mode) { impl->phase_mode(mode); }
00631
00632 void fsurround_decoder::steering_mode(bool mode) { impl->steering_mode(mode); }
00633
00634 void fsurround_decoder::separation(float front, float rear) { impl->separation(front,rear); }
00635
00636 float ** fsurround_decoder::getInputBuffers()
00637 {
00638 return impl->getInputBuffers();
00639 }
00640
00641 float ** fsurround_decoder::getOutputBuffers()
00642 {
00643 return impl->getOutputBuffers();
00644 }
00645
00646 void fsurround_decoder::sample_rate(unsigned int samplerate)
00647 {
00648 impl->sample_rate(samplerate);
00649 }