radv: Use atomics to read query results.

The volatile pattern gives me flaky results for 32-bit builds on
ChromeOS Android. This is because on 32-bit the volatile 64-bit
loads gets split into 2 32-bit loads each.

So if we read the lower dword first and then the upper dword, it
can happen that the upper dword is already changed but the lower
dword isn't yet. In particular for occlusion queries this gives
false readings, as the upper dword commonly only constains the
ready bit.

With the GCC atomic intrinsics we get a call to __atomic_load_8
in libatomic.so which does the right thing.

An alternative fix would be to  explicitly split the 32-bit loads
in the right order and do a bunch of retries if things change, though
that gets messy quickly and for 32-bit builds only doesn't feel worth
it that much.

CC: mesa-stable
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6933>
This commit is contained in:
Bas Nieuwenhuizen 2020-09-30 13:07:43 +02:00 committed by Marge Bot
parent 7cc55df938
commit 7568c97df1
1 changed files with 22 additions and 20 deletions

View File

@ -1247,31 +1247,32 @@ VkResult radv_GetQueryPoolResults(
switch (pool->type) {
case VK_QUERY_TYPE_TIMESTAMP: {
volatile uint64_t const *src64 = (volatile uint64_t const *)src;
available = *src64 != TIMESTAMP_NOT_READY;
uint64_t const *src64 = (uint64_t const *)src;
uint64_t value;
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
while (*src64 == TIMESTAMP_NOT_READY)
;
available = true;
}
do {
value = p_atomic_read(src64);
} while (value == TIMESTAMP_NOT_READY &&
(flags & VK_QUERY_RESULT_WAIT_BIT));
available = value != TIMESTAMP_NOT_READY;
if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
result = VK_NOT_READY;
if (flags & VK_QUERY_RESULT_64_BIT) {
if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
*(uint64_t*)dest = *src64;
*(uint64_t*)dest = value;
dest += 8;
} else {
if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
*(uint32_t*)dest = *(volatile uint32_t*)src;
*(uint32_t*)dest = (uint32_t)value;
dest += 4;
}
break;
}
case VK_QUERY_TYPE_OCCLUSION: {
volatile uint64_t const *src64 = (volatile uint64_t const *)src;
uint64_t const *src64 = (uint64_t const *)src;
uint32_t db_count = device->physical_device->rad_info.num_render_backends;
uint32_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
uint64_t sample_count = 0;
@ -1284,8 +1285,8 @@ VkResult radv_GetQueryPoolResults(
continue;
do {
start = src64[2 * i];
end = src64[2 * i + 1];
start = p_atomic_read(src64 + 2 * i);
end = p_atomic_read(src64 + 2 * i + 1);
} while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && (flags & VK_QUERY_RESULT_WAIT_BIT));
if (!(start & (1ull << 63)) || !(end & (1ull << 63)))
@ -1310,16 +1311,17 @@ VkResult radv_GetQueryPoolResults(
break;
}
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
if (flags & VK_QUERY_RESULT_WAIT_BIT)
while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query))
;
available = *(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
const uint32_t *avail_ptr = (const uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
do {
available = p_atomic_read(avail_ptr);
} while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT));
if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
result = VK_NOT_READY;
const volatile uint64_t *start = (uint64_t*)src;
const volatile uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size);
const uint64_t *start = (uint64_t*)src;
const uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size);
if (flags & VK_QUERY_RESULT_64_BIT) {
uint64_t *dst = (uint64_t*)dest;
dest += util_bitcount(pool->pipeline_stats_mask) * 8;
@ -1347,7 +1349,7 @@ VkResult radv_GetQueryPoolResults(
break;
}
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
volatile uint64_t const *src64 = (volatile uint64_t const *)src;
uint64_t const *src64 = (uint64_t const *)src;
uint64_t num_primitives_written;
uint64_t primitive_storage_needed;
@ -1359,7 +1361,7 @@ VkResult radv_GetQueryPoolResults(
*/
available = 1;
for (int j = 0; j < 4; j++) {
if (!(src64[j] & 0x8000000000000000UL))
if (!(p_atomic_read(src64 + j) & 0x8000000000000000UL))
available = 0;
}