avcodec/pthread_slice: use slice threading from avutil

Also remove pthread_cond_broadcast(progress_cond) on uninit.
Broadcasting it is not required because workers are always
parked when they are not in thread_execute. So it is imposible
that a worker is waiting on progress_cond when uninitialized.

Benchmark:
./ffmpeg -threads $threads -thread_type slice -i 10slices.mp4 -f null null
threads=2:
  old: 70.212s 70.525s 70.877s
  new: 65.219s 65.377s 65.484s
threads=3:
  old: 65.086s 66.306s 66.409s
  new: 63.229s 65.026s 65.116s
threads=4:
  old: 60.993s 61.482s 62.123s
  new: 59.224s 59.441s 59.667s
threads=5:
  old: 57.576s 57.860s 58.832s
  new: 53.032s 53.948s 54.086s

Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
This commit is contained in:
Muhammad Faiz 2017-07-12 06:58:07 +07:00
parent a3d3cfa65c
commit b505f15b15

@ -34,26 +34,19 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/mem.h" #include "libavutil/mem.h"
#include "libavutil/thread.h" #include "libavutil/thread.h"
#include "libavutil/slicethread.h"
typedef int (action_func)(AVCodecContext *c, void *arg); typedef int (action_func)(AVCodecContext *c, void *arg);
typedef int (action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr); typedef int (action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr);
typedef struct SliceThreadContext { typedef struct SliceThreadContext {
pthread_t *workers; AVSliceThread *thread;
action_func *func; action_func *func;
action_func2 *func2; action_func2 *func2;
void *args; void *args;
int *rets; int *rets;
int job_count;
int job_size; int job_size;
pthread_cond_t last_job_cond;
pthread_cond_t current_job_cond;
pthread_mutex_t current_job_lock;
unsigned current_execute;
int current_job;
int done;
int *entries; int *entries;
int entries_count; int entries_count;
int thread_count; int thread_count;
@ -61,43 +54,16 @@ typedef struct SliceThreadContext {
pthread_mutex_t *progress_mutex; pthread_mutex_t *progress_mutex;
} SliceThreadContext; } SliceThreadContext;
static void* attribute_align_arg worker(void *v) static void worker_func(void *priv, int jobnr, int threadnr, int nb_jobs, int nb_threads)
{ {
AVCodecContext *avctx = v; AVCodecContext *avctx = priv;
SliceThreadContext *c = avctx->internal->thread_ctx; SliceThreadContext *c = avctx->internal->thread_ctx;
unsigned last_execute = 0;
int our_job = c->job_count;
int thread_count = avctx->thread_count;
int self_id;
pthread_mutex_lock(&c->current_job_lock);
self_id = c->current_job++;
for (;;){
int ret; int ret;
while (our_job >= c->job_count) {
if (c->current_job == thread_count + c->job_count)
pthread_cond_signal(&c->last_job_cond);
while (last_execute == c->current_execute && !c->done) ret = c->func ? c->func(avctx, (char *)c->args + c->job_size * jobnr)
pthread_cond_wait(&c->current_job_cond, &c->current_job_lock); : c->func2(avctx, c->args, jobnr, threadnr);
last_execute = c->current_execute;
our_job = self_id;
if (c->done) {
pthread_mutex_unlock(&c->current_job_lock);
return NULL;
}
}
pthread_mutex_unlock(&c->current_job_lock);
ret = c->func ? c->func(avctx, (char*)c->args + our_job*c->job_size):
c->func2(avctx, c->args, our_job, self_id);
if (c->rets) if (c->rets)
c->rets[our_job%c->job_count] = ret; c->rets[jobnr] = ret;
pthread_mutex_lock(&c->current_job_lock);
our_job = c->current_job++;
}
} }
void ff_slice_thread_free(AVCodecContext *avctx) void ff_slice_thread_free(AVCodecContext *avctx)
@ -105,40 +71,19 @@ void ff_slice_thread_free(AVCodecContext *avctx)
SliceThreadContext *c = avctx->internal->thread_ctx; SliceThreadContext *c = avctx->internal->thread_ctx;
int i; int i;
pthread_mutex_lock(&c->current_job_lock); avpriv_slicethread_free(&c->thread);
c->done = 1;
pthread_cond_broadcast(&c->current_job_cond);
for (i = 0; i < c->thread_count; i++)
pthread_cond_broadcast(&c->progress_cond[i]);
pthread_mutex_unlock(&c->current_job_lock);
for (i=0; i<avctx->thread_count; i++)
pthread_join(c->workers[i], NULL);
for (i = 0; i < c->thread_count; i++) { for (i = 0; i < c->thread_count; i++) {
pthread_mutex_destroy(&c->progress_mutex[i]); pthread_mutex_destroy(&c->progress_mutex[i]);
pthread_cond_destroy(&c->progress_cond[i]); pthread_cond_destroy(&c->progress_cond[i]);
} }
pthread_mutex_destroy(&c->current_job_lock);
pthread_cond_destroy(&c->current_job_cond);
pthread_cond_destroy(&c->last_job_cond);
av_freep(&c->entries); av_freep(&c->entries);
av_freep(&c->progress_mutex); av_freep(&c->progress_mutex);
av_freep(&c->progress_cond); av_freep(&c->progress_cond);
av_freep(&c->workers);
av_freep(&avctx->internal->thread_ctx); av_freep(&avctx->internal->thread_ctx);
} }
static av_always_inline void thread_park_workers(SliceThreadContext *c, int thread_count)
{
while (c->current_job != thread_count + c->job_count)
pthread_cond_wait(&c->last_job_cond, &c->current_job_lock);
pthread_mutex_unlock(&c->current_job_lock);
}
static int thread_execute(AVCodecContext *avctx, action_func* func, void *arg, int *ret, int job_count, int job_size) static int thread_execute(AVCodecContext *avctx, action_func* func, void *arg, int *ret, int job_count, int job_size)
{ {
SliceThreadContext *c = avctx->internal->thread_ctx; SliceThreadContext *c = avctx->internal->thread_ctx;
@ -149,19 +94,12 @@ static int thread_execute(AVCodecContext *avctx, action_func* func, void *arg, i
if (job_count <= 0) if (job_count <= 0)
return 0; return 0;
pthread_mutex_lock(&c->current_job_lock);
c->current_job = avctx->thread_count;
c->job_count = job_count;
c->job_size = job_size; c->job_size = job_size;
c->args = arg; c->args = arg;
c->func = func; c->func = func;
c->rets = ret; c->rets = ret;
c->current_execute++;
pthread_cond_broadcast(&c->current_job_cond);
thread_park_workers(c, avctx->thread_count);
avpriv_slicethread_execute(c->thread, job_count, 0);
return 0; return 0;
} }
@ -174,7 +112,6 @@ static int thread_execute2(AVCodecContext *avctx, action_func2* func2, void *arg
int ff_slice_thread_init(AVCodecContext *avctx) int ff_slice_thread_init(AVCodecContext *avctx)
{ {
int i;
SliceThreadContext *c; SliceThreadContext *c;
int thread_count = avctx->thread_count; int thread_count = avctx->thread_count;
@ -204,35 +141,16 @@ int ff_slice_thread_init(AVCodecContext *avctx)
return 0; return 0;
} }
c = av_mallocz(sizeof(SliceThreadContext)); avctx->internal->thread_ctx = c = av_mallocz(sizeof(*c));
if (!c) if (!c || (thread_count = avpriv_slicethread_create(&c->thread, avctx, worker_func, NULL, thread_count)) <= 1) {
return -1; if (c)
avpriv_slicethread_free(&c->thread);
c->workers = av_mallocz_array(thread_count, sizeof(pthread_t)); av_freep(&avctx->internal->thread_ctx);
if (!c->workers) { avctx->thread_count = 1;
av_free(c); avctx->active_thread_type = 0;
return -1; return 0;
} }
avctx->thread_count = thread_count;
avctx->internal->thread_ctx = c;
c->current_job = 0;
c->job_count = 0;
c->job_size = 0;
c->done = 0;
pthread_cond_init(&c->current_job_cond, NULL);
pthread_cond_init(&c->last_job_cond, NULL);
pthread_mutex_init(&c->current_job_lock, NULL);
pthread_mutex_lock(&c->current_job_lock);
for (i=0; i<thread_count; i++) {
if(pthread_create(&c->workers[i], NULL, worker, avctx)) {
avctx->thread_count = i;
pthread_mutex_unlock(&c->current_job_lock);
ff_thread_free(avctx);
return -1;
}
}
thread_park_workers(c, thread_count);
avctx->execute = thread_execute; avctx->execute = thread_execute;
avctx->execute2 = thread_execute2; avctx->execute2 = thread_execute2;