llvmpipe: add compute threadpool + mutex

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
In order to efficiently run a number of compute blocks, use
a threadpool that just allows for jobs with unique sequential
ids to be dispatched.
This commit is contained in:
Dave Airlie 2019-08-27 12:45:39 +10:00
parent e5bf6b7013
commit 1b24e3ba75
6 changed files with 256 additions and 2 deletions

View File

@ -13,6 +13,8 @@ C_SOURCES := \
lp_clear.h \
lp_context.c \
lp_context.h \
lp_cs_tpool.c \
lp_cs_tpool.h \
lp_debug.h \
lp_draw_arrays.c \
lp_fence.c \

View File

@ -0,0 +1,153 @@
/**************************************************************************
*
* Copyright 2019 Red Hat.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**************************************************************************/
/**
* compute shader thread pool.
* based on threadpool.c but modified heavily to be compute shader tuned.
*/
#include "util/u_thread.h"
#include "util/u_memory.h"
#include "lp_cs_tpool.h"
static int
lp_cs_tpool_worker(void *data)
{
struct lp_cs_tpool *pool = data;
struct lp_cs_local_mem lmem;
memset(&lmem, 0, sizeof(lmem));
mtx_lock(&pool->m);
while (!pool->shutdown) {
struct lp_cs_tpool_task *task;
while (list_empty(&pool->workqueue) && !pool->shutdown)
cnd_wait(&pool->new_work, &pool->m);
if (pool->shutdown)
break;
task = list_first_entry(&pool->workqueue, struct lp_cs_tpool_task,
list);
unsigned this_iter = task->iter_start++;
if (task->iter_start == task->iter_total)
list_del(&task->list);
mtx_unlock(&pool->m);
task->work(task->data, this_iter, &lmem);
mtx_lock(&pool->m);
task->iter_finished++;
if (task->iter_finished == task->iter_total)
cnd_broadcast(&task->finish);
}
mtx_unlock(&pool->m);
free(lmem.local_mem_ptr);
return 0;
}
struct lp_cs_tpool *
lp_cs_tpool_create(unsigned num_threads)
{
struct lp_cs_tpool *pool = CALLOC_STRUCT(lp_cs_tpool);
if (!pool)
return NULL;
(void) mtx_init(&pool->m, mtx_plain);
cnd_init(&pool->new_work);
list_inithead(&pool->workqueue);
assert (num_threads <= LP_MAX_THREADS);
pool->num_threads = num_threads;
for (unsigned i = 0; i < num_threads; i++)
pool->threads[i] = u_thread_create(lp_cs_tpool_worker, pool);
return pool;
}
void
lp_cs_tpool_destroy(struct lp_cs_tpool *pool)
{
if (!pool)
return;
mtx_lock(&pool->m);
pool->shutdown = true;
cnd_broadcast(&pool->new_work);
mtx_unlock(&pool->m);
for (unsigned i = 0; i < pool->num_threads; i++) {
thrd_join(pool->threads[i], NULL);
}
cnd_destroy(&pool->new_work);
mtx_destroy(&pool->m);
free(pool);
}
struct lp_cs_tpool_task *
lp_cs_tpool_queue_task(struct lp_cs_tpool *pool,
lp_cs_tpool_task_func work, void *data, int num_iters)
{
struct lp_cs_tpool_task *task;
task = CALLOC_STRUCT(lp_cs_tpool_task);
if (!task) {
return NULL;
}
task->work = work;
task->data = data;
task->iter_total = num_iters;
cnd_init(&task->finish);
mtx_lock(&pool->m);
list_addtail(&task->list, &pool->workqueue);
cnd_signal(&pool->new_work);
mtx_unlock(&pool->m);
return task;
}
void
lp_cs_tpool_wait_for_task(struct lp_cs_tpool *pool,
struct lp_cs_tpool_task **task_handle)
{
struct lp_cs_tpool_task *task = *task_handle;
if (!pool || !task)
return;
mtx_lock(&pool->m);
while (task->iter_finished < task->iter_total)
cnd_wait(&task->finish, &pool->m);
mtx_unlock(&pool->m);
cnd_destroy(&task->finish);
free(task);
*task_handle = NULL;
}

View File

@ -0,0 +1,81 @@
/**************************************************************************
*
* Copyright 2019 Red Hat.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**************************************************************************/
/* This is a compute shader specific thread pool.
* It allows the queuing of a number of tasks per work item.
* The item is added to the work queue once, but it must execute
* number of iterations times. This saves storing a bunch of queue
* structs with just unique indexes in them.
* It also supports a local memory support struct to be passed from
* outside the thread exec function.
*/
#ifndef LP_CS_QUEUE
#define LP_CS_QUEUE
#include "pipe/p_compiler.h"
#include "util/u_thread.h"
#include "util/list.h"
#include "lp_limits.h"
struct lp_cs_tpool {
mtx_t m;
cnd_t new_work;
thrd_t threads[LP_MAX_THREADS];
unsigned num_threads;
struct list_head workqueue;
bool shutdown;
};
struct lp_cs_local_mem {
unsigned local_size;
void *local_mem_ptr;
};
typedef void (*lp_cs_tpool_task_func)(void *data, int iter_idx, struct lp_cs_local_mem *lmem);
struct lp_cs_tpool_task {
lp_cs_tpool_task_func work;
void *data;
struct list_head list;
cnd_t finish;
unsigned iter_total;
unsigned iter_start;
unsigned iter_finished;
};
struct lp_cs_tpool *lp_cs_tpool_create(unsigned num_threads);
void lp_cs_tpool_destroy(struct lp_cs_tpool *);
struct lp_cs_tpool_task *lp_cs_tpool_queue_task(struct lp_cs_tpool *,
lp_cs_tpool_task_func func,
void *data, int num_iters);
void lp_cs_tpool_wait_for_task(struct lp_cs_tpool *pool,
struct lp_cs_tpool_task **task);
#endif /* LP_BIN_QUEUE */

View File

@ -49,6 +49,7 @@
#include "lp_public.h"
#include "lp_limits.h"
#include "lp_rast.h"
#include "lp_cs_tpool.h"
#include "state_tracker/sw_winsys.h"
@ -595,6 +596,9 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen )
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
struct sw_winsys *winsys = screen->winsys;
if (screen->cs_tpool)
lp_cs_tpool_destroy(screen->cs_tpool);
if (screen->rast)
lp_rast_destroy(screen->rast);
@ -604,7 +608,7 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen )
winsys->destroy(winsys);
mtx_destroy(&screen->rast_mutex);
mtx_destroy(&screen->cs_mutex);
FREE(screen);
}
@ -717,5 +721,14 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
}
(void) mtx_init(&screen->rast_mutex, mtx_plain);
screen->cs_tpool = lp_cs_tpool_create(screen->num_threads);
if (!screen->cs_tpool) {
lp_rast_destroy(screen->rast);
lp_jit_screen_cleanup(screen);
FREE(screen);
return NULL;
}
(void) mtx_init(&screen->cs_mutex, mtx_plain);
return &screen->base;
}

View File

@ -41,7 +41,7 @@
struct sw_winsys;
struct lp_cs_tpool;
struct llvmpipe_screen
{
@ -57,6 +57,9 @@ struct llvmpipe_screen
struct lp_rasterizer *rast;
mtx_t rast_mutex;
struct lp_cs_tpool *cs_tpool;
mtx_t cs_mutex;
};

View File

@ -33,6 +33,8 @@ files_llvmpipe = files(
'lp_clear.h',
'lp_context.c',
'lp_context.h',
'lp_cs_tpool.h',
'lp_cs_tpool.c',
'lp_debug.h',
'lp_draw_arrays.c',
'lp_fence.c',