st/nine: Optimize EndScene

So far we did nothing on EndScene, but the API
doc says it flushes the GPU command queue.
The doc implies one can optimize CPU usage
by calling EndScene long before Present() is called.

Implementing the flush behaviour gives me +15-20%
on the CPU limited Halo. On the other hand, do limit
the flush to only once per frame.
3DMark03/3Mark05 get a 2% perf hit with the patch,
but 5% if I allow more flushes.

Signed-off-by: Axel Davy <davyaxel0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9451>
This commit is contained in:
Axel Davy 2021-03-05 18:23:08 +01:00 committed by Marge Bot
parent 2497195aab
commit 634adfa253
5 changed files with 39 additions and 0 deletions

View File

@ -2035,6 +2035,19 @@ NineDevice9_EndScene( struct NineDevice9 *This )
DBG("This=%p\n", This);
user_assert(This->in_scene, D3DERR_INVALIDCALL);
This->in_scene = FALSE;
This->end_scene_since_present++;
/* EndScene() is supposed to flush the GPU commands.
* The idea is to flush ahead of the Present() call.
* (Apps could take advantage of this by inserting CPU
* work between EndScene() and Present()).
* Most apps will have one EndScene per frame.
* Some will have 2 or 3.
* Some bad behaving apps do a lot of them.
* As flushing has a cost, do it only once. */
if (This->end_scene_since_present <= 1) {
nine_context_pipe_flush(This);
nine_csmt_flush(This);
}
return D3D_OK;
}

View File

@ -90,6 +90,7 @@ struct NineDevice9
boolean is_recording;
boolean in_scene;
unsigned end_scene_since_present;
uint16_t vs_const_size;
uint16_t ps_const_size;

View File

@ -211,6 +211,16 @@ nine_csmt_process( struct NineDevice9 *device )
nine_csmt_wait_processed(ctx);
}
void
nine_csmt_flush( struct NineDevice9* device )
{
if (!device->csmt_active)
return;
nine_queue_flush(device->csmt_ctx->pool);
}
/* Destroys a CSMT context.
* Waits for the worker thread to terminate.
*/
@ -2648,6 +2658,13 @@ nine_context_get_query_result(struct NineDevice9 *device, struct pipe_query *que
return ret;
}
CSMT_ITEM_NO_WAIT(nine_context_pipe_flush)
{
struct nine_context *context = &device->context;
context->pipe->flush(context->pipe, NULL, PIPE_FLUSH_ASYNC);
}
/* State defaults */
static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] =

View File

@ -604,6 +604,9 @@ nine_context_get_query_result(struct NineDevice9 *device, struct pipe_query *que
unsigned *counter, boolean flush, boolean wait,
union pipe_query_result *result);
void
nine_context_pipe_flush(struct NineDevice9 *device);
void nine_state_restore_non_cso(struct NineDevice9 *device);
void nine_state_set_defaults(struct NineDevice9 *, const D3DCAPS9 *,
boolean is_reset);
@ -648,9 +651,13 @@ nine_csmt_create( struct NineDevice9 *This );
void
nine_csmt_destroy( struct NineDevice9 *This, struct csmt_context *ctx );
/* Flushes and waits everything is executed */
void
nine_csmt_process( struct NineDevice9 *This );
/* Flushes and doesn't wait */
void
nine_csmt_flush( struct NineDevice9 *This );
/* Get the pipe_context (should not be called from the worker thread).
* All the work in the worker thread is finished before returning. */

View File

@ -930,6 +930,7 @@ bypass_rendering:
if (FAILED(hr)) { UNTESTED(3);return hr; }
}
This->base.device->end_scene_since_present = 0;
return D3D_OK;
}