00001
00002
00003 #include <stdlib.h>
00004 #include <stdio.h>
00005 #include <unistd.h>
00006
00007 #include "mythconfig.h"
00008 #if HAVE_STDINT_H
00009 #include <stdint.h>
00010 #endif
00011
00012 #include <string.h>
00013 #include <math.h>
00014 #include <pthread.h>
00015
00016 #include "filter.h"
00017 #include "frame.h"
00018
00019 #include "mythlogging.h"
00020
00021 #include "../mm_arch.h"
00022
00023 #undef ABS
00024 #define ABS(A) ( (A) > 0 ? (A) : -(A) )
00025 #define CLAMP(A,L,U) ((A)>(U)?(U):((A)<(L)?(L):(A)))
00026
00027 #if HAVE_MMX
00028 #include "ffmpeg-mmx.h"
00029 #define THRESHOLD 12
00030 static const mmx_t mm_lthr = { w:{ -THRESHOLD, -THRESHOLD,
00031 -THRESHOLD, -THRESHOLD} };
00032 static const mmx_t mm_hthr = { w:{ THRESHOLD - 1, THRESHOLD - 1,
00033 THRESHOLD - 1, THRESHOLD - 1} };
00034 static const mmx_t mm_cpool[] = { { 0x0000000000000000LL }, };
00035 #else
00036 #define mmx_t int
00037 #endif
00038
00039 struct DeintThread
00040 {
00041 int ready;
00042 pthread_t id;
00043 int exists;
00044 };
00045
00046 typedef struct ThisFilter
00047 {
00048 VideoFilter vf;
00049
00050 struct DeintThread *threads;
00051 VideoFrame *frame;
00052 int field;
00053 int ready;
00054 int kill_threads;
00055 int actual_threads;
00056 int requested_threads;
00057 pthread_mutex_t mutex;
00058
00059 int skipchroma;
00060 int mm_flags;
00061 int width;
00062 int height;
00063 long long last_framenr;
00064 uint8_t *ref[3];
00065 int ref_stride[3];
00066
00067 int dirty_frame;
00068 int double_rate;
00069 int double_call;
00070 void (*line_filter)(uint8_t *dst, int width, int start_width,
00071 uint8_t *src1, uint8_t *src2, uint8_t *src3,
00072 uint8_t *src4, uint8_t *src5);
00073 void (*line_filter_fast)(uint8_t *dst, int width, int start_width,
00074 uint8_t *src1, uint8_t *src2, uint8_t *src3,
00075 uint8_t *src4, uint8_t *src5);
00076 TF_STRUCT;
00077 } ThisFilter;
00078
00079 static void line_filter_c_fast(uint8_t *dst, int width, int start_width,
00080 uint8_t *buf, uint8_t *src2, uint8_t *src3,
00081 uint8_t *src4, uint8_t *src5)
00082 {
00083 int X;
00084 uint8_t tmp;
00085 for (X = start_width; X < width; X++)
00086 {
00087 tmp = buf[X];
00088 buf[X] = src3[X];
00089 if (ABS((int)src3[X] - (int)src2[X]) > 11)
00090 dst[X] = CLAMP((src2[X] * 4 + src4[X] * 4
00091 + src3[X] * 2 - tmp - src5[X])
00092 / 8, 0, 255);
00093 }
00094 }
00095
00096 static void line_filter_c(uint8_t *dst, int width, int start_width,
00097 uint8_t *src1, uint8_t *src2, uint8_t *src3,
00098 uint8_t *src4, uint8_t *src5)
00099 {
00100 int X;
00101 for (X = start_width; X < width; X++)
00102 {
00103 if (ABS((int)src3[X] - (int)src2[X]) > 11)
00104 dst[X] = CLAMP((src2[X] * 4 + src4[X] * 4
00105 + src3[X] * 2 - src1[X] - src5[X])
00106 / 8, 0, 255);
00107 else
00108 dst[X] = src3[X];
00109 }
00110 }
00111
00112 #if HAVE_MMX
00113 static inline void mmx_start(uint8_t *src1, uint8_t *src2,
00114 uint8_t *src3, uint8_t *src4,
00115 int X)
00116 {
00117 movq_m2r (src2[X], mm0);
00118 movq_m2r (src2[X], mm1);
00119 movq_m2r (src4[X], mm2);
00120 movq_m2r (src4[X], mm3);
00121 movq_m2r (src3[X], mm4);
00122 movq_m2r (src3[X], mm5);
00123 punpcklbw_m2r (mm_cpool[0], mm0);
00124 punpckhbw_m2r (mm_cpool[0], mm1);
00125 punpcklbw_m2r (mm_cpool[0], mm2);
00126 punpckhbw_m2r (mm_cpool[0], mm3);
00127 movq_r2r (mm0, mm6);
00128 movq_r2r (mm1, mm7);
00129 paddw_r2r (mm2, mm0);
00130 paddw_r2r (mm3, mm1);
00131 movq_m2r (src3[X], mm2);
00132 movq_m2r (src3[X], mm3);
00133 psllw_i2r (2, mm0);
00134 psllw_i2r (2, mm1);
00135 punpcklbw_m2r (mm_cpool[0], mm2);
00136 punpckhbw_m2r (mm_cpool[0], mm3);
00137 psllw_i2r (1, mm2);
00138 psllw_i2r (1, mm3);
00139 paddw_r2r (mm2, mm0);
00140 paddw_r2r (mm3, mm1);
00141 movq_m2r (src1[X], mm2);
00142 movq_m2r (src1[X], mm3);
00143 punpcklbw_m2r (mm_cpool[0], mm2);
00144 punpckhbw_m2r (mm_cpool[0], mm3);
00145 }
00146
00147 static inline void mmx_end(uint8_t *src3, uint8_t *src5,
00148 uint8_t *dst, int X)
00149 {
00150 punpcklbw_m2r (mm_cpool[0], mm4);
00151 punpckhbw_m2r (mm_cpool[0], mm5);
00152 psubusw_r2r (mm2, mm0);
00153 psubusw_r2r (mm3, mm1);
00154 movq_m2r (src5[X], mm2);
00155 movq_m2r (src5[X], mm3);
00156 punpcklbw_m2r (mm_cpool[0], mm2);
00157 punpckhbw_m2r (mm_cpool[0], mm3);
00158 psubusw_r2r (mm2, mm0);
00159 psubusw_r2r (mm3, mm1);
00160 psrlw_i2r (3, mm0);
00161 psrlw_i2r (3, mm1);
00162 psubw_r2r (mm6, mm4);
00163 psubw_r2r (mm7, mm5);
00164 packuswb_r2r (mm1,mm0);
00165 movq_r2r (mm4, mm6);
00166 movq_r2r (mm5, mm7);
00167 pcmpgtw_m2r (mm_lthr, mm4);
00168 pcmpgtw_m2r (mm_lthr, mm5);
00169 pcmpgtw_m2r (mm_hthr, mm6);
00170 pcmpgtw_m2r (mm_hthr, mm7);
00171 packsswb_r2r (mm5, mm4);
00172 packsswb_r2r (mm7, mm6);
00173 pxor_r2r (mm6, mm4);
00174 movq_r2r (mm4, mm5);
00175 pandn_r2r (mm0, mm4);
00176 pand_m2r (src3[X], mm5);
00177 por_r2r (mm4, mm5);
00178 movq_r2m (mm5, dst[X]);
00179 }
00180
00181 static void line_filter_mmx_fast(uint8_t *dst, int width, int start_width,
00182 uint8_t *buf, uint8_t *src2, uint8_t *src3,
00183 uint8_t *src4, uint8_t *src5)
00184 {
00185 int X;
00186 for (X = start_width; X < width - 7; X += 8)
00187 {
00188 mmx_start(buf, src2, src3, src4, X);
00189 movq_r2m (mm4, buf[X]);
00190 mmx_end(src3, src5, dst, X);
00191 }
00192
00193 line_filter_c_fast(dst, width, X, buf, src2, src3, src4, src5);
00194 }
00195
00196 static void line_filter_mmx(uint8_t *dst, int width, int start_width,
00197 uint8_t *src1, uint8_t *src2, uint8_t *src3,
00198 uint8_t *src4, uint8_t *src5)
00199 {
00200 int X;
00201 for (X = start_width; X < width - 7; X += 8)
00202 {
00203 mmx_start(src1, src2, src3, src4, X);
00204 mmx_end(src3, src5, dst, X);
00205 }
00206
00207 line_filter_c(dst, width, X, src1, src2, src3, src4, src5);
00208 }
00209 #endif
00210
00211 static void store_ref(struct ThisFilter *p, uint8_t *src, int src_offsets[3],
00212 int src_stride[3], int width, int height)
00213 {
00214 int i;
00215 for (i = 0; i < 3; i++)
00216 {
00217 if (src_stride[i] < 1)
00218 continue;
00219
00220 int is_chroma = !!i;
00221 int h = height >> is_chroma;
00222 int w = width >> is_chroma;
00223
00224 if (p->ref_stride[i] == src_stride[i])
00225 {
00226 memcpy(p->ref[i], src + src_offsets[i], src_stride[i] * h);
00227 }
00228 else
00229 {
00230 int j;
00231 uint8_t *src2 = src + src_offsets[i];
00232 uint8_t *dest = p->ref[i];
00233 for (j = 0; j < h; j++)
00234 {
00235 memcpy(dest, src2, w);
00236 src2 += src_stride[i];
00237 dest += p->ref_stride[i];
00238 }
00239 }
00240 }
00241 }
00242
00243 static int AllocFilter(ThisFilter* filter, int width, int height)
00244 {
00245 if ((width != filter->width) || (height != filter->height))
00246 {
00247 int i;
00248 for (i = 0; i < 3; i++)
00249 {
00250 if (filter->ref[i])
00251 free(filter->ref[i]);
00252
00253 int is_chroma= !!i;
00254 int w = ((width + 31) & (~31)) >> is_chroma;
00255 int h = ((height + 6 + 31) & (~31)) >> is_chroma;
00256 int size = w * h * sizeof(uint8_t);
00257
00258 filter->ref_stride[i] = w;
00259 filter->ref[i] = (uint8_t*) malloc(size);
00260 if (!filter->ref[i])
00261 return 0;
00262 memset(filter->ref[i], is_chroma ? 127 : 0, size);
00263 }
00264 filter->width = width;
00265 filter->height = height;
00266 }
00267 return 1;
00268 }
00269
00270 static void filter_func(struct ThisFilter *p, uint8_t *dst, int dst_offsets[3],
00271 int dst_stride[3], int width, int height, int parity,
00272 int tff, int double_rate, int dirty,
00273 int this_slice, int total_slices)
00274 {
00275 if (height < 8 || total_slices < 1)
00276 return;
00277
00278 if (total_slices > 1 && !double_rate)
00279 {
00280 this_slice = 0;
00281 total_slices = 1;
00282 }
00283
00284 int i, y;
00285 uint8_t *dest, *src1, *src2, *src3, *src4, *src5;
00286 int channels = p->skipchroma ? 1 : 3;
00287 int field = parity ^ tff;
00288
00289 int first_slice = (this_slice == 0);
00290 int last_slice = 0;
00291 int slice_height = height / total_slices;
00292 slice_height = (slice_height >> 1) << 1;
00293 int starth = slice_height * this_slice;
00294 int endh = starth + slice_height;
00295
00296 if ((this_slice + 1) >= total_slices)
00297 {
00298 endh = height;
00299 last_slice = 1;
00300 }
00301
00302 for (i = 0; i < channels; i++)
00303 {
00304
00305 int is_chroma = !!i;
00306 int w = width >> is_chroma;
00307 int start = starth >> is_chroma;
00308 int end = endh >> is_chroma;
00309
00310 if (!first_slice)
00311 start -= 2;
00312 if (last_slice)
00313 end -= (5 + field);
00314
00315 int src_pitch = p->ref_stride[i];
00316 dest = dst + dst_offsets[i] + (start * dst_stride[i]);
00317 src1 = p->ref[i] + (start * src_pitch);
00318
00319 if (double_rate)
00320 {
00321 src2 = src1 + src_pitch;
00322 src3 = src2 + src_pitch;
00323 src4 = src3 + src_pitch;
00324 src5 = src4 + src_pitch;
00325
00326 if (first_slice)
00327 {
00328 if (!field)
00329 p->line_filter(dest, w, 0, src1, src1, src1, src2, src3);
00330 else if (dirty)
00331 memcpy(dest, src1, w);
00332 dest += dst_stride[i];
00333
00334 if (field)
00335 p->line_filter(dest, w, 0, src1, src1, src2, src3, src4);
00336 else if (dirty)
00337 memcpy(dest, src2, w);
00338 dest += dst_stride[i];
00339 }
00340 else
00341 {
00342 dest += dst_stride[i] << 1;
00343 }
00344
00345 for (y = start; y < end; y++)
00346 {
00347 if ((y ^ (1 - field)) & 1)
00348 p->line_filter(dest, w, 0, src1, src2, src3, src4, src5);
00349 else if (dirty)
00350 memcpy(dest, src3, w);
00351
00352 dest += dst_stride[i];
00353 src1 = src2;
00354 src2 = src3;
00355 src3 = src4;
00356 src4 = src5;
00357 src5 += src_pitch;
00358 }
00359
00360 if (last_slice)
00361 {
00362 if (!field)
00363 p->line_filter(dest, w, 0, src2, src3, src4, src5, src5);
00364 else if (dirty)
00365 memcpy(dest, src4, w);
00366 dest += dst_stride[i];
00367
00368 if (field)
00369 p->line_filter(dest, w, 0, src3, src4, src5, src5, src5);
00370 else if (dirty)
00371 memcpy(dest, src5, w);
00372 }
00373 }
00374 else
00375 {
00376 int field_stride = dst_stride[i] << 1;
00377 src2 = dest + dst_stride[i];
00378 src3 = src2 + dst_stride[i];
00379 src4 = src3 + dst_stride[i];
00380 src5 = src4 + dst_stride[i];
00381 memcpy(src1, dest, w);
00382
00383 if (field)
00384 {
00385 dest += dst_stride[i];
00386 p->line_filter_fast(dest, w, 0, src1, src2, src2, src3, src4);
00387 src2 = src3;
00388 src3 = src4;
00389 src4 = src5;
00390 src5 += dst_stride[i];
00391 }
00392 else
00393 {
00394 p->line_filter_fast(dest, w, 0, src1, src2, src2, src2, src3);
00395 }
00396 dest += field_stride;
00397
00398 for (y = start; y < end; y += 2)
00399 {
00400 p->line_filter_fast(dest, w, 0, src1, src2, src3, src4, src5);
00401 dest += field_stride;
00402 src2 = src4;
00403 src3 = src5;
00404 src4 += field_stride;
00405 src5 += field_stride;
00406 }
00407
00408 if (field)
00409 p->line_filter_fast(dest, w, 0, src1, src4, src5, src5, src5);
00410 else
00411 p->line_filter_fast(dest, w, 0, src1, src3, src4, src5, src5);
00412 }
00413 }
00414 #if HAVE_MMX
00415 if (p->mm_flags & AV_CPU_FLAG_MMX)
00416 emms();
00417 #endif
00418 }
00419
00420 static void *KernelThread(void *args)
00421 {
00422 ThisFilter *filter = (ThisFilter*)args;
00423
00424 pthread_mutex_lock(&(filter->mutex));
00425 int num = filter->actual_threads;
00426 filter->actual_threads = num + 1;
00427 pthread_mutex_unlock(&(filter->mutex));
00428
00429 while (!filter->kill_threads)
00430 {
00431 usleep(1000);
00432 if (filter->ready &&
00433 filter->frame != NULL &&
00434 filter->threads[num].ready)
00435 {
00436 filter_func(
00437 filter, filter->frame->buf, filter->frame->offsets,
00438 filter->frame->pitches, filter->frame->width,
00439 filter->frame->height, filter->field,
00440 filter->frame->top_field_first, filter->double_rate,
00441 filter->dirty_frame, num, filter->actual_threads);
00442
00443 pthread_mutex_lock(&(filter->mutex));
00444 filter->ready = filter->ready - 1;
00445 filter->threads[num].ready = 0;
00446 pthread_mutex_unlock(&(filter->mutex));
00447 }
00448 }
00449 pthread_exit(NULL);
00450 return NULL;
00451 }
00452
00453 static int KernelDeint(VideoFilter *f, VideoFrame *frame, int field)
00454 {
00455 ThisFilter *filter = (ThisFilter *) f;
00456 TF_VARS;
00457
00458 if (!AllocFilter(filter, frame->width, frame->height))
00459 {
00460 LOG(VB_GENERAL, LOG_ERR, "KernelDeint: failed to allocate buffers.");
00461 return -1;
00462 }
00463
00464 TF_START;
00465
00466 filter->dirty_frame = 1;
00467 if (filter->last_framenr == frame->frameNumber)
00468 {
00469 filter->double_call = 1;
00470 }
00471 else
00472 {
00473 filter->double_rate = filter->double_call;
00474 filter->double_call = 0;
00475 filter->dirty_frame = 0;
00476 if (filter->double_rate)
00477 {
00478 store_ref(filter, frame->buf, frame->offsets,
00479 frame->pitches, frame->width, frame->height);
00480 }
00481 }
00482
00483 if (filter->actual_threads > 1 && filter->double_rate)
00484 {
00485 int i;
00486 for (i = 0; i < filter->actual_threads; i++)
00487 filter->threads[i].ready = 1;
00488 filter->frame = frame;
00489 filter->field = field;
00490 filter->ready = filter->actual_threads;
00491 i = 0;
00492 while (filter->ready > 0 && i < 1000)
00493 {
00494 usleep(1000);
00495 i++;
00496 }
00497 }
00498 else
00499 {
00500 filter_func(
00501 filter, frame->buf, frame->offsets, frame->pitches,
00502 frame->width, frame->height, field, frame->top_field_first,
00503 filter->double_rate, filter->dirty_frame, 0, 1);
00504 }
00505
00506 filter->last_framenr = frame->frameNumber;
00507
00508 TF_END(filter, "KernelDeint: ");
00509
00510 return 0;
00511 }
00512
00513 static void CleanupKernelDeintFilter(VideoFilter *f)
00514 {
00515 ThisFilter *filter = (ThisFilter *) f;
00516
00517 int i;
00518 for (i = 0; i < 3; i++)
00519 {
00520 uint8_t **p= &filter->ref[i];
00521 if (*p)
00522 free(*p);
00523 *p= NULL;
00524 }
00525
00526 if (filter->threads != NULL)
00527 {
00528 filter->kill_threads = 1;
00529 for (i = 0; i < filter->requested_threads; i++)
00530 if (filter->threads[i].exists)
00531 pthread_join(filter->threads[i].id, NULL);
00532 free(filter->threads);
00533 }
00534 }
00535
00536 static VideoFilter *NewKernelDeintFilter(VideoFrameType inpixfmt,
00537 VideoFrameType outpixfmt,
00538 int *width, int *height,
00539 char *options, int threads)
00540 {
00541 ThisFilter *filter;
00542 (void) options;
00543 (void) height;
00544 (void) threads;
00545
00546 if (inpixfmt != FMT_YV12 || outpixfmt != FMT_YV12)
00547 {
00548 LOG(VB_GENERAL, LOG_ERR, "KernelDeint: valid formats are YV12->YV12");
00549 return NULL;
00550 }
00551
00552 filter = (ThisFilter *) malloc (sizeof(ThisFilter));
00553 if (filter == NULL)
00554 {
00555 LOG(VB_GENERAL, LOG_ERR,
00556 "KernelDeint: failed to allocate memory for filter.");
00557 return NULL;
00558 }
00559
00560 filter->mm_flags = 0;
00561 filter->line_filter = &line_filter_c;
00562 filter->line_filter_fast = &line_filter_c_fast;
00563 #if HAVE_MMX
00564 filter->mm_flags = av_get_cpu_flags();
00565 if (filter->mm_flags & AV_CPU_FLAG_MMX)
00566 {
00567 filter->line_filter = &line_filter_mmx;
00568 filter->line_filter_fast = &line_filter_mmx_fast;
00569 }
00570 #endif
00571
00572 filter->skipchroma = 0;
00573 filter->width = 0;
00574 filter->height = 0;
00575 filter->last_framenr = -1;
00576 filter->double_call = 0;
00577 filter->double_rate = 1;
00578 memset(filter->ref, 0, sizeof(filter->ref));
00579 if (!AllocFilter(filter, *width, *height))
00580 {
00581 LOG(VB_GENERAL, LOG_ERR, "KernelDeint: failed to allocate buffers.");
00582 free (filter);
00583 return NULL;
00584 }
00585
00586 TF_INIT(filter);
00587
00588 filter->vf.filter = &KernelDeint;
00589 filter->vf.cleanup = &CleanupKernelDeintFilter;
00590
00591 filter->frame = NULL;
00592 filter->field = 0;
00593 filter->ready = 0;
00594 filter->kill_threads = 0;
00595 filter->actual_threads = 0;
00596 filter->requested_threads = threads;
00597 filter->threads = NULL;
00598
00599 if (filter->requested_threads > 1)
00600 {
00601 filter->threads = (struct DeintThread *) calloc(threads,
00602 sizeof(struct DeintThread));
00603 if (filter->threads == NULL)
00604 {
00605 LOG(VB_GENERAL, LOG_ERR, "KernelDeint: failed to allocate memory "
00606 "for threads - falling back to existing, single thread.");
00607 filter->requested_threads = 1;
00608 }
00609 }
00610
00611 if (filter->requested_threads > 1)
00612 {
00613 pthread_mutex_init(&(filter->mutex), NULL);
00614 int success = 0;
00615 for (int i = 0; i < filter->requested_threads; i++)
00616 {
00617 if (pthread_create(&(filter->threads[i].id), NULL,
00618 KernelThread, (void*)filter) != 0)
00619 filter->threads[i].exists = 0;
00620 else
00621 {
00622 success++;
00623 filter->threads[i].exists = 1;
00624 }
00625 }
00626
00627 if (success < filter->requested_threads)
00628 {
00629 LOG(VB_GENERAL, LOG_NOTICE,
00630 "KernelDeint: failed to create all threads - "
00631 "falling back to existing, single thread.");
00632 }
00633 else
00634 {
00635 int timeout = 0;
00636 while (filter->actual_threads != filter->requested_threads)
00637 {
00638 timeout++;
00639 if (timeout > 5000)
00640 {
00641 LOG(VB_GENERAL, LOG_NOTICE,
00642 "KernelDeint: waited too long for "
00643 "threads to start.- continuing.");
00644 break;
00645 }
00646 usleep(1000);
00647 }
00648 LOG(VB_PLAYBACK, LOG_INFO, "KernelDeint: Created threads.");
00649 }
00650 }
00651
00652 if (filter->actual_threads < 1 )
00653 LOG(VB_PLAYBACK, LOG_INFO, "KernelDeint: Using existing thread.");
00654
00655 return (VideoFilter *) filter;
00656 }
00657
00658 static FmtConv FmtList[] =
00659 {
00660 { FMT_YV12, FMT_YV12 },
00661 FMT_NULL
00662 };
00663
00664 ConstFilterInfo filter_table[] =
00665 {
00666 {
00667 filter_init: &NewKernelDeintFilter,
00668 name: "kerneldeint",
00669 descript: "combines data from several fields to deinterlace "
00670 "with less motion blur",
00671 formats: FmtList,
00672 libname: NULL
00673 },
00674 {
00675 filter_init: &NewKernelDeintFilter,
00676 name: "kerneldoubleprocessdeint",
00677 descript: "combines data from several fields to deinterlace "
00678 "with less motion blur",
00679 formats: FmtList,
00680 libname: NULL
00681 },
00682 FILT_NULL
00683 };