From 080b05e29e1f04f22a776057631f4061cf7c1824 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 21 Apr 2021 01:06:41 +0530 Subject: [PATCH] asahi: Add Gallium driver Forked from noop, with some code from Panfrost. Signed-off-by: Alyssa Rosenzweig Acked-by: Jason Ekstrand Acked-by: Bas Nieuwenhuizen Part-of: --- meson.build | 1 + meson_options.txt | 2 +- .../target-helpers/inline_sw_helper.h | 12 + .../auxiliary/target-helpers/sw_helper.h | 12 + src/gallium/drivers/asahi/agx_pipe.c | 963 ++++++++++++++ src/gallium/drivers/asahi/agx_public.h | 38 + src/gallium/drivers/asahi/agx_state.c | 1181 +++++++++++++++++ src/gallium/drivers/asahi/agx_state.h | 229 ++++ src/gallium/drivers/asahi/meson.build | 16 + src/gallium/meson.build | 5 + src/gallium/targets/dri/meson.build | 6 +- src/gallium/targets/libgl-xlib/meson.build | 4 +- src/meson.build | 2 +- 13 files changed, 2465 insertions(+), 6 deletions(-) create mode 100644 src/gallium/drivers/asahi/agx_pipe.c create mode 100644 src/gallium/drivers/asahi/agx_public.h create mode 100644 src/gallium/drivers/asahi/agx_state.c create mode 100644 src/gallium/drivers/asahi/agx_state.h diff --git a/meson.build b/meson.build index 19a4a9d4dcb..0c3d0c762b8 100644 --- a/meson.build +++ b/meson.build @@ -236,6 +236,7 @@ with_gallium_swr = gallium_drivers.contains('swr') with_gallium_lima = gallium_drivers.contains('lima') with_gallium_zink = gallium_drivers.contains('zink') with_gallium_d3d12 = gallium_drivers.contains('d3d12') +with_gallium_asahi = gallium_drivers.contains('asahi') with_gallium = gallium_drivers.length() != 0 with_gallium_kmsro = with_gallium_v3d or with_gallium_vc4 or with_gallium_etnaviv or with_gallium_panfrost or with_gallium_lima or with_gallium_freedreno diff --git a/meson_options.txt b/meson_options.txt index d725761df70..496091852c5 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -67,7 +67,7 @@ option( choices : [ 'auto', 'kmsro', 'radeonsi', 'r300', 'r600', 'nouveau', 'freedreno', 'swrast', 'v3d', 'vc4', 'etnaviv', 'tegra', 'i915', 'svga', 'virgl', - 'swr', 'panfrost', 'iris', 'lima', 'zink', 'd3d12' + 'swr', 'panfrost', 'iris', 'lima', 'zink', 'd3d12', 'asahi' ], description : 'List of gallium drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built' ) diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h index 76eda8467b8..8271df0cb0b 100644 --- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h @@ -33,6 +33,10 @@ #include "d3d12/d3d12_public.h" #endif +#ifdef GALLIUM_ASAHI +#include "asahi/agx_public.h" +#endif + static inline struct pipe_screen * sw_screen_create_named(struct sw_winsys *winsys, const char *driver) { @@ -71,6 +75,11 @@ sw_screen_create_named(struct sw_winsys *winsys, const char *driver) screen = d3d12_create_dxcore_screen(winsys, NULL); #endif +#if defined(GALLIUM_ASAHI) + if (screen == NULL && strcmp(driver, "asahi") == 0) + screen = agx_screen_create(winsys); +#endif + return screen ? debug_screen_wrap(screen) : NULL; } @@ -84,6 +93,9 @@ sw_screen_create(struct sw_winsys *winsys) #if defined(GALLIUM_D3D12) only_sw ? "" : "d3d12", #endif +#if defined(GALLIUM_ASAHI) + only_sw ? "" : "asahi", +#endif #if defined(GALLIUM_LLVMPIPE) "llvmpipe", #endif diff --git a/src/gallium/auxiliary/target-helpers/sw_helper.h b/src/gallium/auxiliary/target-helpers/sw_helper.h index 88a5086d261..343c0410558 100644 --- a/src/gallium/auxiliary/target-helpers/sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/sw_helper.h @@ -21,6 +21,10 @@ #include "d3d12/d3d12_public.h" #endif +#ifdef GALLIUM_ASAHI +#include "asahi/agx_public.h" +#endif + #ifdef GALLIUM_SOFTPIPE #include "softpipe/sp_public.h" #endif @@ -76,6 +80,11 @@ sw_screen_create_named(struct sw_winsys *winsys, const char *driver) screen = d3d12_create_dxcore_screen(winsys, NULL); #endif +#if defined(GALLIUM_ASAHI) + if (screen == NULL && strcmp(driver, "asahi") == 0) + screen = agx_screen_create(winsys); +#endif + return screen; } @@ -89,6 +98,9 @@ sw_screen_create(struct sw_winsys *winsys) #if defined(GALLIUM_D3D12) only_sw ? "" : "d3d12", #endif +#if defined(GALLIUM_ASAHI) + only_sw ? "" : "asahi", +#endif #if defined(GALLIUM_LLVMPIPE) "llvmpipe", #endif diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c new file mode 100644 index 00000000000..a19eb7d54f1 --- /dev/null +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -0,0 +1,963 @@ +/* + * Copyright 2010 Red Hat Inc. + * Copyright 2006 VMware, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_screen.h" +#include "util/u_inlines.h" +#include "util/format/u_format.h" +#include "util/u_upload_mgr.h" +#include "util/half_float.h" +#include "frontend/winsys_handle.h" +#include "frontend/sw_winsys.h" +#include "gallium/auxiliary/util/u_transfer.h" +#include "gallium/auxiliary/util/u_surface.h" +#include "agx_public.h" +#include "agx_state.h" +#include "magic.h" +#include "asahi/compiler/agx_compile.h" +#include "asahi/lib/decode.h" +#include "asahi/lib/tiling.h" + +static const struct debug_named_value agx_debug_options[] = { + {"trace", AGX_DBG_TRACE, "Trace the command stream"}, + DEBUG_NAMED_VALUE_END +}; + +void agx_init_state_functions(struct pipe_context *ctx); + +static struct pipe_query * +agx_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) +{ + struct agx_query *query = CALLOC_STRUCT(agx_query); + + return (struct pipe_query *)query; +} + +static void +agx_destroy_query(struct pipe_context *ctx, struct pipe_query *query) +{ + FREE(query); +} + +static bool +agx_begin_query(struct pipe_context *ctx, struct pipe_query *query) +{ + return true; +} + +static bool +agx_end_query(struct pipe_context *ctx, struct pipe_query *query) +{ + return true; +} + +static bool +agx_get_query_result(struct pipe_context *ctx, + struct pipe_query *query, + bool wait, + union pipe_query_result *vresult) +{ + uint64_t *result = (uint64_t*)vresult; + + *result = 0; + return true; +} + +static void +agx_set_active_query_state(struct pipe_context *pipe, bool enable) +{ +} + + +/* + * resource + */ + +static struct pipe_resource * +agx_resource_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *templat, + struct winsys_handle *whandle, + unsigned usage) +{ + unreachable("Imports todo"); +} + +static bool +agx_resource_get_handle(struct pipe_screen *pscreen, + struct pipe_context *ctx, + struct pipe_resource *pt, + struct winsys_handle *handle, + unsigned usage) +{ + unreachable("Handles todo"); +} + + +static struct pipe_resource * +agx_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + struct agx_device *dev = agx_device(screen); + struct agx_resource *nresource; + unsigned stride; + uint64_t modifier = DRM_FORMAT_MOD_LINEAR; + + if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) + modifier = DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER; + + nresource = CALLOC_STRUCT(agx_resource); + if (!nresource) + return NULL; + + stride = util_format_get_stride(templ->format, templ->width0); + nresource->base = *templ; + nresource->base.screen = screen; + nresource->modifier = modifier; + nresource->slices[0].line_stride = stride; + + unsigned size = 4 * ALIGN_POT(templ->width0, 64) * ALIGN_POT(templ->height0, 64) * templ->depth0; + nresource->bo = agx_bo_create(dev, size, AGX_MEMORY_TYPE_FRAMEBUFFER); + + if (!nresource->bo) { + FREE(nresource); + return NULL; + } + + pipe_reference_init(&nresource->base.reference, 1); + + struct sw_winsys *winsys = ((struct agx_screen *) screen)->winsys; + + if (templ->bind & (PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) { + nresource->dt = winsys->displaytarget_create(winsys, + templ->bind, + templ->format, + templ->width0, + templ->height0, + 64, + NULL /*map_front_private*/, + &nresource->dt_stride); + + if (nresource->dt == NULL) { + agx_bo_unreference(nresource->bo); + FREE(nresource); + return NULL; + } + } + + return &nresource->base; +} + +static void +agx_resource_destroy(struct pipe_screen *screen, + struct pipe_resource *prsrc) +{ + struct agx_resource *rsrc = (struct agx_resource *)prsrc; + + if (rsrc->dt) { + /* display target */ + struct agx_screen *agx_screen = (struct agx_screen*)screen; + struct sw_winsys *winsys = agx_screen->winsys; + winsys->displaytarget_destroy(winsys, rsrc->dt); + } + + agx_bo_unreference(rsrc->bo); + FREE(rsrc); +} + + +/* + * transfer + */ + +static void +agx_transfer_flush_region(struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box) +{ +} + +static void * +agx_transfer_map(struct pipe_context *pctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, /* a combination of PIPE_MAP_x */ + const struct pipe_box *box, + struct pipe_transfer **out_transfer) +{ + struct agx_context *ctx = agx_context(pctx); + struct agx_resource *rsrc = agx_resource(resource); + unsigned bytes_per_pixel = util_format_get_blocksize(resource->format); + struct agx_bo *bo = rsrc->bo; + + /* Can't map tiled/compressed directly */ + if ((usage & PIPE_MAP_DIRECTLY) && rsrc->modifier != DRM_FORMAT_MOD_LINEAR) + return NULL; + + if (ctx->batch->cbufs[0] && resource == ctx->batch->cbufs[0]->texture) + pctx->flush(pctx, NULL, 0); + + struct agx_transfer *transfer = CALLOC_STRUCT(agx_transfer); + transfer->base.level = level; + transfer->base.usage = usage; + transfer->base.box = *box; + + pipe_resource_reference(&transfer->base.resource, resource); + *out_transfer = &transfer->base; + + if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER) { + transfer->base.stride = box->width * bytes_per_pixel; + transfer->base.layer_stride = transfer->base.stride * box->height; + transfer->map = calloc(transfer->base.layer_stride, box->depth); + assert(box->depth == 1); + + if ((usage & PIPE_MAP_READ) && rsrc->slices[level].data_valid) { + agx_detile( + ((uint8_t *) bo->ptr.cpu) + rsrc->slices[level].offset, + transfer->map, + u_minify(resource->width0, level), bytes_per_pixel * 8, + transfer->base.stride / bytes_per_pixel, + box->x, box->y, box->x + box->width, box->y + box->height); + } + + return transfer->map; + } else { + assert (rsrc->modifier == DRM_FORMAT_MOD_LINEAR); + + transfer->base.stride = rsrc->slices[level].line_stride; + transfer->base.layer_stride = 0; // TODO + + /* Be conservative for direct writes */ + + if ((usage & PIPE_MAP_WRITE) && (usage & PIPE_MAP_DIRECTLY)) + rsrc->slices[level].data_valid = true; + + return ((uint8_t *) bo->ptr.cpu) + + rsrc->slices[level].offset + + transfer->base.box.z * transfer->base.layer_stride + + transfer->base.box.y * rsrc->slices[level].line_stride + + transfer->base.box.x * bytes_per_pixel; + } +} + +static void +agx_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + /* Gallium expects writeback here, so we tile */ + + struct agx_transfer *trans = agx_transfer(transfer); + struct pipe_resource *prsrc = transfer->resource; + struct agx_resource *rsrc = (struct agx_resource *) prsrc; + unsigned bytes_per_pixel = util_format_get_blocksize(prsrc->format); + + if (transfer->usage & PIPE_MAP_WRITE) + rsrc->slices[transfer->level].data_valid = true; + + /* Tiling will occur in software from a staging cpu buffer */ + if ((transfer->usage & PIPE_MAP_WRITE) && + rsrc->modifier == DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER) { + struct agx_bo *bo = rsrc->bo; + assert(trans->map != NULL); + assert(transfer->box.depth == 1); + + agx_tile( + ((uint8_t *) bo->ptr.cpu) + rsrc->slices[transfer->level].offset, + trans->map, + u_minify(transfer->resource->width0, transfer->level), + bytes_per_pixel * 8, + transfer->stride / bytes_per_pixel, + transfer->box.x, transfer->box.y, + transfer->box.x + transfer->box.width, + transfer->box.y + transfer->box.height); + } + + /* Free the transfer */ + free(trans->map); + pipe_resource_reference(&transfer->resource, NULL); + FREE(transfer); +} + +/* + * clear/copy + */ +static void +agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor_state *scissor_state, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct agx_context *ctx = agx_context(pctx); + ctx->batch->clear |= buffers; + memcpy(ctx->batch->clear_color, color->f, sizeof(color->f)); +} + +static void +agx_blit(struct pipe_context *ctx, + const struct pipe_blit_info *info) +{ +} + + +static void +agx_flush_resource(struct pipe_context *ctx, + struct pipe_resource *resource) +{ +} + +/* + * context + */ +static void +agx_flush(struct pipe_context *pctx, + struct pipe_fence_handle **fence, + unsigned flags) +{ + struct agx_context *ctx = agx_context(pctx); + + if (fence) + *fence = NULL; + + /* TODO */ + if (!ctx->batch->cbufs[0]) + return; + + /* Nothing to do */ + if (!(ctx->batch->draw | ctx->batch->clear)) + return; + + /* Finalize the encoder */ + uint8_t stop[] = { + 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, // Stop + }; + + memcpy(ctx->batch->encoder_current, stop, sizeof(stop)); + + /* Emit the commandbuffer */ + + uint16_t clear_colour[4] = { + _mesa_float_to_half(ctx->batch->clear_color[0]), + _mesa_float_to_half(ctx->batch->clear_color[1]), + _mesa_float_to_half(ctx->batch->clear_color[2]), + _mesa_float_to_half(ctx->batch->clear_color[3]) + }; + + struct agx_device *dev = agx_device(pctx->screen); + uint64_t pipeline_clear = + agx_build_clear_pipeline(ctx, + dev->internal.clear, + agx_pool_upload(&ctx->batch->pool, clear_colour, sizeof(clear_colour))); + + uint64_t pipeline_store = + agx_build_store_pipeline(ctx, + dev->internal.store, + agx_pool_upload(&ctx->batch->pool, ctx->render_target[0], sizeof(ctx->render_target))); + + /* Pipelines must 64 aligned */ + struct agx_ptr pipeline_null = + agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, 64, 64); + memset(pipeline_null.cpu, 0, 64); + + struct agx_resource *rt0 = agx_resource(ctx->batch->cbufs[0]->texture); + rt0->slices[0].data_valid = true; + + /* BO list for a given batch consists of: + * - BOs for the batch's framebuffer surfaces + * - BOs for the batch's pools + * - BOs for the encoder + * - BO for internal shaders + * - BOs added to the batch explicitly + */ + struct agx_batch *batch = ctx->batch; + + agx_batch_add_bo(batch, batch->encoder); + agx_batch_add_bo(batch, dev->internal.bo); + + for (unsigned i = 0; i < batch->nr_cbufs; ++i) { + struct pipe_surface *surf = batch->cbufs[i]; + assert(surf != NULL && surf->texture != NULL); + struct agx_resource *rsrc = agx_resource(surf->texture); + agx_batch_add_bo(batch, rsrc->bo); + } + + if (batch->zsbuf) + unreachable("todo: zsbuf"); + + unsigned handle_count = + BITSET_COUNT(batch->bo_list) + + agx_pool_num_bos(&batch->pool) + + agx_pool_num_bos(&batch->pipeline_pool); + + uint32_t *handles = calloc(sizeof(uint32_t), handle_count); + unsigned handle = 0, handle_i = 0; + + BITSET_FOREACH_SET(handle, batch->bo_list, sizeof(batch->bo_list) * 8) { + handles[handle_i++] = handle; + } + + agx_pool_get_bo_handles(&batch->pool, handles + handle_i); + handle_i += agx_pool_num_bos(&batch->pool); + + agx_pool_get_bo_handles(&batch->pipeline_pool, handles + handle_i); + handle_i += agx_pool_num_bos(&batch->pipeline_pool); + + /* Size calculation should've been exact */ + assert(handle_i == handle_count); + + /* Generate the mapping table from the BO list */ + demo_mem_map(dev->memmap.ptr.cpu, dev->memmap.size, handles, handle_count, + 0xDEADBEEF, 0xCAFECAFE); + + free(handles); + + demo_cmdbuf(dev->cmdbuf.ptr.cpu, + dev->cmdbuf.size, + &ctx->batch->pool, + ctx->batch->encoder->ptr.gpu, + ctx->batch->width, + ctx->batch->height, + pipeline_null.gpu, + pipeline_clear, + pipeline_store, + rt0->bo->ptr.gpu); + + agx_submit_cmdbuf(dev, dev->cmdbuf.handle, dev->memmap.handle, dev->queue.id); + + agx_wait_queue(dev->queue); + + if (dev->debug & AGX_DBG_TRACE) { + agxdecode_cmdstream(dev->cmdbuf.handle, dev->memmap.handle, true); + agxdecode_next_frame(); + } + + memset(batch->bo_list, 0, sizeof(batch->bo_list)); + agx_pool_cleanup(&ctx->batch->pool); + agx_pool_cleanup(&ctx->batch->pipeline_pool); + agx_pool_init(&ctx->batch->pool, dev, AGX_MEMORY_TYPE_FRAMEBUFFER, true); + agx_pool_init(&ctx->batch->pipeline_pool, dev, AGX_MEMORY_TYPE_CMDBUF_32, true); + ctx->batch->clear = 0; + ctx->batch->draw = 0; + ctx->batch->encoder_current = ctx->batch->encoder->ptr.cpu; +} + +static void +agx_destroy_context(struct pipe_context *ctx) +{ + if (ctx->stream_uploader) + u_upload_destroy(ctx->stream_uploader); + + FREE(ctx); +} + +static void +agx_invalidate_resource(struct pipe_context *ctx, + struct pipe_resource *resource) +{ +} + +static struct pipe_context * +agx_create_context(struct pipe_screen *screen, + void *priv, unsigned flags) +{ + struct agx_context *ctx = CALLOC_STRUCT(agx_context); + struct pipe_context *pctx = &ctx->base; + + if (!ctx) + return NULL; + + pctx->screen = screen; + pctx->priv = priv; + + ctx->batch = CALLOC_STRUCT(agx_batch); + agx_pool_init(&ctx->batch->pool, + agx_device(screen), AGX_MEMORY_TYPE_FRAMEBUFFER, true); + agx_pool_init(&ctx->batch->pipeline_pool, + agx_device(screen), AGX_MEMORY_TYPE_SHADER, true); + ctx->batch->encoder = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER); + ctx->batch->encoder_current = ctx->batch->encoder->ptr.cpu; + + /* Upload fixed shaders (TODO: compile them?) */ + + pctx->stream_uploader = u_upload_create_default(pctx); + if (!pctx->stream_uploader) { + FREE(pctx); + return NULL; + } + pctx->const_uploader = pctx->stream_uploader; + + pctx->destroy = agx_destroy_context; + pctx->flush = agx_flush; + pctx->clear = agx_clear; + pctx->resource_copy_region = util_resource_copy_region; + pctx->blit = agx_blit; + pctx->flush_resource = agx_flush_resource; + pctx->create_query = agx_create_query; + pctx->destroy_query = agx_destroy_query; + pctx->begin_query = agx_begin_query; + pctx->end_query = agx_end_query; + pctx->get_query_result = agx_get_query_result; + pctx->set_active_query_state = agx_set_active_query_state; + pctx->transfer_map = agx_transfer_map; + pctx->transfer_flush_region = agx_transfer_flush_region; + pctx->transfer_unmap = agx_transfer_unmap; + pctx->buffer_subdata = u_default_buffer_subdata; + pctx->texture_subdata = u_default_texture_subdata; + pctx->invalidate_resource = agx_invalidate_resource; + agx_init_state_functions(pctx); + + return pctx; +} + +static void +agx_flush_frontbuffer(struct pipe_screen *_screen, + struct pipe_context *pctx, + struct pipe_resource *prsrc, + unsigned level, unsigned layer, + void *context_private, struct pipe_box *box) +{ + struct agx_resource *rsrc = (struct agx_resource *) prsrc; + struct agx_screen *agx_screen = (struct agx_screen*)_screen; + struct sw_winsys *winsys = agx_screen->winsys; + + /* Dump the framebuffer */ + assert (rsrc->dt); + void *map = winsys->displaytarget_map(winsys, rsrc->dt, PIPE_USAGE_DEFAULT); + assert(map != NULL); + + agx_detile(rsrc->bo->ptr.cpu, map, + rsrc->base.width0, 32, rsrc->dt_stride / 4, + 0, 0, rsrc->base.width0, rsrc->base.height0); + + winsys->displaytarget_display(winsys, rsrc->dt, context_private, box); +} + +static const char * +agx_get_vendor(struct pipe_screen* pscreen) +{ + return "Asahi"; +} + +static const char * +agx_get_device_vendor(struct pipe_screen* pscreen) +{ + return "Apple"; +} + +static const char * +agx_get_name(struct pipe_screen* pscreen) +{ + return "Apple M1 (G13G B0)"; +} + +static int +agx_get_param(struct pipe_screen* pscreen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: + case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD: + case PIPE_CAP_VERTEX_SHADER_SATURATE: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + return 1; + + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + return 0; + + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX: + return true; + + case PIPE_CAP_SAMPLER_VIEW_TARGET: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_ACCELERATED: + case PIPE_CAP_UMA: + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_TGSI_ARRAY_COMPONENTS: + case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED: + case PIPE_CAP_PACKED_UNIFORMS: + return 1; + + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + return 0; + + case PIPE_CAP_GLSL_FEATURE_LEVEL: + case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: + return 130; + case PIPE_CAP_ESSL_FEATURE_LEVEL: + return 120; + + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 16; + + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return 65536; + + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + return 64; + + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + return 1; + + case PIPE_CAP_MAX_TEXTURE_2D_SIZE: + return 16384; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + return 0; + + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_TGSI_TEXCOORD: + return 1; + + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + return true; + + case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET: + return 0xffff; + + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 0; + + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; + + case PIPE_CAP_VIDEO_MEMORY: { + uint64_t system_memory; + + if (!os_get_total_physical_memory(&system_memory)) + return 0; + + return (int)(system_memory >> 20); + } + + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + return 4; + + case PIPE_CAP_MAX_VARYINGS: + return 16; + + case PIPE_CAP_FLATSHADE: + case PIPE_CAP_TWO_SIDED_COLOR: + case PIPE_CAP_ALPHA_TEST: + case PIPE_CAP_CLIP_PLANES: + case PIPE_CAP_PACKED_STREAM_OUTPUT: + case PIPE_CAP_NIR_IMAGES_AS_DEREF: + return 0; + + case PIPE_CAP_SHAREABLE_SHADERS: + return 1; + + default: + return u_pipe_screen_get_param_defaults(pscreen, param); + } +} + +static float +agx_get_paramf(struct pipe_screen* pscreen, + enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 1024.0; + + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0; + + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + + case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE: + case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE: + case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY: + return 0.0f; + + default: + debug_printf("Unexpected PIPE_CAPF %d query\n", param); + return 0.0; + } +} + +static int +agx_get_shader_param(struct pipe_screen* pscreen, + enum pipe_shader_type shader, + enum pipe_shader_cap param) +{ + if (shader != PIPE_SHADER_VERTEX && + shader != PIPE_SHADER_FRAGMENT) + return 0; + + /* this is probably not totally correct.. but it's a start: */ + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 1024; + + case PIPE_SHADER_CAP_MAX_INPUTS: + return 16; + + case PIPE_SHADER_CAP_MAX_OUTPUTS: + return shader == PIPE_SHADER_FRAGMENT ? 1 : 16; + + case PIPE_SHADER_CAP_MAX_TEMPS: + return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */ + + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return 16 * 1024 * sizeof(float); + + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 16; + + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 0; + + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + case PIPE_SHADER_CAP_SUBROUTINES: + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + return 0; + + case PIPE_SHADER_CAP_FP16: + case PIPE_SHADER_CAP_INTEGERS: + case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS: + case PIPE_SHADER_CAP_FP16_DERIVATIVES: + case PIPE_SHADER_CAP_FP16_CONST_BUFFERS: + case PIPE_SHADER_CAP_INT16: + return 1; + + case PIPE_SHADER_CAP_INT64_ATOMICS: + case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: + return 0; + + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: + return 16; /* XXX: How many? */ + + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_NIR; + + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_NIR_SERIALIZED); + + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; + + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: + case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: + case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: + return 0; + + default: + /* Other params are unknown */ + return 0; + } + + return 0; +} + +static int +agx_get_compute_param(struct pipe_screen *pscreen, + enum pipe_shader_ir ir_type, + enum pipe_compute_cap param, + void *ret) +{ + return 0; +} + +static bool +agx_is_format_supported(struct pipe_screen* pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned storage_sample_count, + unsigned usage) +{ + const struct util_format_description *format_desc; + + assert(target == PIPE_BUFFER || + target == PIPE_TEXTURE_1D || + target == PIPE_TEXTURE_1D_ARRAY || + target == PIPE_TEXTURE_2D || + target == PIPE_TEXTURE_2D_ARRAY || + target == PIPE_TEXTURE_RECT || + target == PIPE_TEXTURE_3D || + target == PIPE_TEXTURE_CUBE || + target == PIPE_TEXTURE_CUBE_ARRAY); + + format_desc = util_format_description(format); + + if (!format_desc) + return false; + + if (sample_count > 1) + return false; + + if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1)) + return false; + + /* TODO: formats */ + if (usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) + return (format == PIPE_FORMAT_B8G8R8A8_UNORM); + + /* TODO: formats */ + if (usage & PIPE_BIND_VERTEX_BUFFER) { + switch (format) { + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return true; + default: + return false; + } + } + + /* TODO */ + return true; +} + +static uint64_t +agx_get_timestamp(struct pipe_screen *pscreen) +{ + return 0; +} + +static void +agx_destroy_screen(struct pipe_screen *screen) +{ + agx_close_device(agx_device(screen)); + ralloc_free(screen); +} + +static void +agx_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ +} + +static bool +agx_fence_finish(struct pipe_screen *screen, + struct pipe_context *ctx, + struct pipe_fence_handle *fence, + uint64_t timeout) +{ + return true; +} + +static const void * +agx_get_compiler_options(struct pipe_screen *pscreen, + enum pipe_shader_ir ir, + enum pipe_shader_type shader) +{ + return &agx_nir_options; +} + +struct pipe_screen * +agx_screen_create(struct sw_winsys *winsys) +{ + struct agx_screen *agx_screen; + struct pipe_screen *screen; + + agx_screen = rzalloc(NULL, struct agx_screen); + if (!agx_screen) + return NULL; + + screen = &agx_screen->pscreen; + agx_screen->winsys = winsys; + + /* Set debug before opening */ + agx_screen->dev.debug = + debug_get_flags_option("ASAHI_MESA_DEBUG", agx_debug_options, 0); + + /* Try to open an AGX device */ + if (!agx_open_device(screen, &agx_screen->dev)) { + ralloc_free(agx_screen); + return NULL; + } + + screen->destroy = agx_destroy_screen; + screen->get_name = agx_get_name; + screen->get_vendor = agx_get_vendor; + screen->get_device_vendor = agx_get_device_vendor; + screen->get_param = agx_get_param; + screen->get_shader_param = agx_get_shader_param; + screen->get_compute_param = agx_get_compute_param; + screen->get_paramf = agx_get_paramf; + screen->is_format_supported = agx_is_format_supported; + screen->context_create = agx_create_context; + screen->resource_create = agx_resource_create; + screen->resource_from_handle = agx_resource_from_handle; + screen->resource_get_handle = agx_resource_get_handle; + screen->resource_destroy = agx_resource_destroy; + screen->flush_frontbuffer = agx_flush_frontbuffer; + screen->get_timestamp = agx_get_timestamp; + screen->fence_reference = agx_fence_reference; + screen->fence_finish = agx_fence_finish; + screen->get_compiler_options = agx_get_compiler_options; + + agx_internal_shaders(&agx_screen->dev); + + return screen; +} diff --git a/src/gallium/drivers/asahi/agx_public.h b/src/gallium/drivers/asahi/agx_public.h new file mode 100644 index 00000000000..4bf706286bd --- /dev/null +++ b/src/gallium/drivers/asahi/agx_public.h @@ -0,0 +1,38 @@ +/* + * Copyright 2010 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef AGX_PUBLIC_H +#define AGX_PUBLIC_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct pipe_screen; +struct sw_winsys; +struct pipe_screen *agx_screen_create(struct sw_winsys *winsys); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c new file mode 100644 index 00000000000..16751f6b126 --- /dev/null +++ b/src/gallium/drivers/asahi/agx_state.c @@ -0,0 +1,1181 @@ +/* + * Copyright 2021 Alyssa Rosenzweig + * Copyright (C) 2019-2020 Collabora, Ltd. + * Copyright 2010 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_transfer.h" +#include "gallium/auxiliary/util/u_draw.h" +#include "gallium/auxiliary/util/u_helpers.h" +#include "gallium/auxiliary/util/u_viewport.h" +#include "gallium/auxiliary/tgsi/tgsi_from_mesa.h" +#include "compiler/nir/nir.h" +#include "asahi/compiler/agx_compile.h" +#include "agx_state.h" +#include "asahi/lib/agx_pack.h" +#include "asahi/lib/agx_formats.h" + +static void +agx_set_blend_color(struct pipe_context *ctx, + const struct pipe_blend_color *state) +{ +} + +static void * +agx_create_blend_state(struct pipe_context *ctx, + const struct pipe_blend_state *state) +{ + return MALLOC(1); +} + +static void * +agx_create_zsa_state(struct pipe_context *ctx, + const struct pipe_depth_stencil_alpha_state *state) +{ + struct agx_zsa *so = CALLOC_STRUCT(agx_zsa); + assert(!state->depth_bounds_test && "todo"); + + so->disable_z_write = !state->depth_writemask; + + /* Z func can be used as-is */ + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_NEVER == AGX_ZS_FUNC_NEVER); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_LESS == AGX_ZS_FUNC_LESS); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_EQUAL == AGX_ZS_FUNC_EQUAL); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_LEQUAL == AGX_ZS_FUNC_LEQUAL); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_GREATER == AGX_ZS_FUNC_GREATER); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_NOTEQUAL == AGX_ZS_FUNC_NOT_EQUAL); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_GEQUAL == AGX_ZS_FUNC_GEQUAL); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_ALWAYS == AGX_ZS_FUNC_ALWAYS); + + so->z_func = state->depth_enabled ? + ((enum agx_zs_func) state->depth_func) : AGX_ZS_FUNC_ALWAYS; + + return so; +} + +static void +agx_bind_zsa_state(struct pipe_context *pctx, void *cso) +{ + struct agx_context *ctx = agx_context(pctx); + + if (cso) + memcpy(&ctx->zs, cso, sizeof(ctx->zs)); +} + +static void * +agx_create_rs_state(struct pipe_context *ctx, + const struct pipe_rasterizer_state *cso) +{ + struct agx_rasterizer *so = CALLOC_STRUCT(agx_rasterizer); + so->base = *cso; + + agx_pack(so->cull, CULL, cfg) { + /* TODO: debug culling */ + cfg.cull_front = cso->cull_face & PIPE_FACE_FRONT; + cfg.cull_back = cso->cull_face & PIPE_FACE_BACK; + cfg.front_face_ccw = cso->front_ccw; + // cfg.depth_clip = cso->depth_clip_near; + cfg.depth_clamp = !cso->depth_clip_near; + }; + + return so; +} + +static void +agx_bind_rasterizer_state(struct pipe_context *pctx, void *cso) +{ + struct agx_context *ctx = agx_context(pctx); + ctx->rast = cso; +} + +static enum agx_wrap +agx_wrap_from_pipe(enum pipe_tex_wrap in) +{ + switch (in) { + case PIPE_TEX_WRAP_REPEAT: return AGX_WRAP_REPEAT; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return AGX_WRAP_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_MIRROR_REPEAT: return AGX_WRAP_MIRRORED_REPEAT; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return AGX_WRAP_CLAMP_TO_BORDER; + default: unreachable("todo: more wrap modes"); + } +} + +static enum agx_mip_filter +agx_mip_filter_from_pipe(enum pipe_tex_mipfilter in) +{ + switch (in) { + case PIPE_TEX_MIPFILTER_NEAREST: return AGX_MIP_FILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: return AGX_MIP_FILTER_LINEAR; + case PIPE_TEX_MIPFILTER_NONE: return AGX_MIP_FILTER_NONE; + } + + unreachable("Invalid mip filter"); +} + +static const enum agx_compare_func agx_compare_funcs[PIPE_FUNC_ALWAYS + 1] = { + [PIPE_FUNC_NEVER] = AGX_COMPARE_FUNC_NEVER, + [PIPE_FUNC_LESS] = AGX_COMPARE_FUNC_LESS, + [PIPE_FUNC_EQUAL] = AGX_COMPARE_FUNC_EQUAL, + [PIPE_FUNC_LEQUAL] = AGX_COMPARE_FUNC_LEQUAL, + [PIPE_FUNC_GREATER] = AGX_COMPARE_FUNC_GREATER, + [PIPE_FUNC_NOTEQUAL] = AGX_COMPARE_FUNC_NOT_EQUAL, + [PIPE_FUNC_GEQUAL] = AGX_COMPARE_FUNC_GEQUAL, + [PIPE_FUNC_ALWAYS] = AGX_COMPARE_FUNC_ALWAYS, +}; + +static void * +agx_create_sampler_state(struct pipe_context *pctx, + const struct pipe_sampler_state *state) +{ + struct agx_device *dev = agx_device(pctx->screen); + struct agx_bo *bo = agx_bo_create(dev, AGX_SAMPLER_LENGTH, + AGX_MEMORY_TYPE_FRAMEBUFFER); + + agx_pack(bo->ptr.cpu, SAMPLER, cfg) { + cfg.magnify_linear = (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR); + cfg.minify_linear = (state->min_img_filter == PIPE_TEX_FILTER_LINEAR); + cfg.mip_filter = agx_mip_filter_from_pipe(state->min_mip_filter); + cfg.wrap_s = agx_wrap_from_pipe(state->wrap_s); + cfg.wrap_t = agx_wrap_from_pipe(state->wrap_t); + cfg.wrap_r = agx_wrap_from_pipe(state->wrap_r); + cfg.pixel_coordinates = !state->normalized_coords; + cfg.compare_func = agx_compare_funcs[state->compare_func]; + } + + uint64_t *m = (uint64_t *) ((uint8_t *) bo->ptr.cpu + AGX_SAMPLER_LENGTH); + m[3] = 0x40; // XXX - what is this? maybe spurious? + + return bo; +} + +static void +agx_delete_sampler_state(struct pipe_context *ctx, void *state) +{ + struct agx_bo *bo = state; + agx_bo_unreference(bo); +} + +static void +agx_bind_sampler_states(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start, unsigned count, + void **states) +{ + struct agx_context *ctx = agx_context(pctx); + + memcpy(&ctx->stage[shader].samplers[start], states, + sizeof(struct agx_bo *) * count); +} + +/* Channels agree for RGBA but are weird for force 0/1 */ + +static enum agx_channel +agx_channel_from_pipe(enum pipe_swizzle in) +{ + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_X == AGX_CHANNEL_R); + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_Y == AGX_CHANNEL_G); + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_Z == AGX_CHANNEL_B); + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_W == AGX_CHANNEL_A); + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_0 & 0x4); + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_1 & 0x4); + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_NONE & 0x4); + + if ((in & 0x4) == 0) + return (enum agx_channel) in; + else if (in == PIPE_SWIZZLE_1) + return AGX_CHANNEL_1; + else + return AGX_CHANNEL_0; +} + +static struct pipe_sampler_view * +agx_create_sampler_view(struct pipe_context *pctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *state) +{ + struct agx_device *dev = agx_device(pctx->screen); + struct agx_sampler_view *so = CALLOC_STRUCT(agx_sampler_view); + + if (!so) + return NULL; + + /* We prepare the descriptor at CSO create time */ + so->desc = agx_bo_create(dev, AGX_TEXTURE_LENGTH, + AGX_MEMORY_TYPE_FRAMEBUFFER); + + const struct util_format_description *desc = + util_format_description(state->format); + + /* We only have a single swizzle for the user swizzle and the format fixup, + * so compose them now. */ + uint8_t out_swizzle[4]; + uint8_t view_swizzle[4] = { + state->swizzle_r, state->swizzle_g, + state->swizzle_b, state->swizzle_a + }; + + util_format_compose_swizzles(desc->swizzle, view_swizzle, out_swizzle); + + /* Pack the descriptor into GPU memory */ + agx_pack(so->desc->ptr.cpu, TEXTURE, cfg) { + assert(state->format == PIPE_FORMAT_B8G8R8A8_UNORM); // TODO: format table + cfg.format = 0xa22; + cfg.swizzle_r = agx_channel_from_pipe(out_swizzle[0]); + cfg.swizzle_g = agx_channel_from_pipe(out_swizzle[1]); + cfg.swizzle_b = agx_channel_from_pipe(out_swizzle[2]); + cfg.swizzle_a = agx_channel_from_pipe(out_swizzle[3]); + cfg.width = texture->width0; + cfg.height = texture->height0; + cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); + cfg.unk_1 = agx_resource(texture)->bo->ptr.gpu; + cfg.unk_2 = 0x20000; + } + + /* Initialize base object */ + so->base = *state; + so->base.texture = NULL; + pipe_resource_reference(&so->base.texture, texture); + pipe_reference_init(&so->base.reference, 1); + so->base.context = pctx; + return &so->base; +} + +static void +agx_set_sampler_views(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start, unsigned count, + unsigned unbind_num_trailing_slots, + struct pipe_sampler_view **views) +{ + struct agx_context *ctx = agx_context(pctx); + unsigned new_nr = 0; + unsigned i; + + assert(start == 0); + + if (!views) + count = 0; + + for (i = 0; i < count; ++i) { + if (views[i]) + new_nr = i + 1; + + pipe_sampler_view_reference((struct pipe_sampler_view **) + &ctx->stage[shader].textures[i], views[i]); + } + + for (; i < ctx->stage[shader].texture_count; i++) { + pipe_sampler_view_reference((struct pipe_sampler_view **) + &ctx->stage[shader].textures[i], NULL); + } + ctx->stage[shader].texture_count = new_nr; +} + +static void +agx_sampler_view_destroy(struct pipe_context *ctx, + struct pipe_sampler_view *pview) +{ + struct agx_sampler_view *view = (struct agx_sampler_view *) pview; + pipe_resource_reference(&view->base.texture, NULL); + agx_bo_unreference(view->desc); + FREE(view); +} + +static struct pipe_surface * +agx_create_surface(struct pipe_context *ctx, + struct pipe_resource *texture, + const struct pipe_surface *surf_tmpl) +{ + struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); + + if (!surface) + return NULL; + pipe_reference_init(&surface->reference, 1); + pipe_resource_reference(&surface->texture, texture); + surface->context = ctx; + surface->format = surf_tmpl->format; + surface->width = texture->width0; + surface->height = texture->height0; + surface->texture = texture; + surface->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + surface->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + surface->u.tex.level = surf_tmpl->u.tex.level; + + return surface; +} + +static void +agx_set_clip_state(struct pipe_context *ctx, + const struct pipe_clip_state *state) +{ +} + +static void +agx_set_polygon_stipple(struct pipe_context *ctx, + const struct pipe_poly_stipple *state) +{ +} + +static void +agx_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ +} + +static void +agx_set_scissor_states(struct pipe_context *ctx, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *state) +{ +} + +static void +agx_set_stencil_ref(struct pipe_context *ctx, + const struct pipe_stencil_ref state) +{ +} + +static void +agx_set_viewport_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *vp) +{ + struct agx_context *ctx = agx_context(pctx); + + assert(start_slot == 0 && "no geometry shaders"); + assert(num_viewports == 1 && "no geometry shaders"); + + if (!vp) + return; + + float vp_minx = vp->translate[0] - fabsf(vp->scale[0]); + float vp_maxx = vp->translate[0] + fabsf(vp->scale[0]); + float vp_miny = vp->translate[1] - fabsf(vp->scale[1]); + float vp_maxy = vp->translate[1] + fabsf(vp->scale[1]); + + float near_z, far_z; + util_viewport_zmin_zmax(vp, false, &near_z, &far_z); + + agx_pack(ctx->viewport, VIEWPORT, cfg) { + cfg.min_tile_x = vp_minx / 32; + cfg.min_tile_y = vp_miny / 32; + cfg.max_tile_x = MAX2(ceilf(vp_maxx / 32.0), 1.0); + cfg.max_tile_y = MAX2(ceilf(vp_maxy / 32.0), 1.0); + cfg.clip_tile = true; + + cfg.translate_x = vp->translate[0]; + cfg.translate_y = vp->translate[1]; + cfg.scale_x = vp->scale[0]; + cfg.scale_y = vp->scale[1]; + cfg.near_z = near_z; + cfg.z_range = far_z - near_z; + }; +} + +/* A framebuffer state can be reused across batches, so it doesn't make sense + * to add surfaces to the BO list here. Instead we added them when flushing. + */ + +static void +agx_set_framebuffer_state(struct pipe_context *pctx, + const struct pipe_framebuffer_state *state) +{ + struct agx_context *ctx = agx_context(pctx); + + if (!state) + return; + + ctx->batch->width = state->width; + ctx->batch->height = state->height; + ctx->batch->nr_cbufs = state->nr_cbufs; + ctx->batch->cbufs[0] = state->cbufs[0]; + + for (unsigned i = 0; i < state->nr_cbufs; ++i) { + struct pipe_surface *surf = state->cbufs[i]; + struct agx_resource *tex = agx_resource(surf->texture); + agx_pack(ctx->render_target[i], RENDER_TARGET, cfg) { + assert(surf->format == PIPE_FORMAT_B8G8R8A8_UNORM); // TODO: format table + cfg.format = 0xa22; + cfg.swizzle_r = AGX_CHANNEL_B; + cfg.swizzle_g = AGX_CHANNEL_G; + cfg.swizzle_b = AGX_CHANNEL_R; + cfg.swizzle_a = AGX_CHANNEL_A; + cfg.width = state->width; + cfg.height = state->height; + cfg.buffer = tex->bo->ptr.gpu; + cfg.unk_100 = 0x1000000; + }; + } +} + +/* Likewise constant buffers, textures, and samplers are handled in a common + * per-draw path, with dirty tracking to reduce the costs involved. + */ + +static void +agx_set_constant_buffer(struct pipe_context *pctx, + enum pipe_shader_type shader, uint index, + bool take_ownership, + const struct pipe_constant_buffer *cb) +{ + struct agx_context *ctx = agx_context(pctx); + struct agx_stage *s = &ctx->stage[shader]; + + util_copy_constant_buffer(&s->cb[index], cb, take_ownership); + + unsigned mask = (1 << index); + + if (cb) + s->cb_mask |= mask; + else + s->cb_mask &= ~mask; +} + +static void +agx_surface_destroy(struct pipe_context *ctx, + struct pipe_surface *surface) +{ + pipe_resource_reference(&surface->texture, NULL); + FREE(surface); +} + +static void +agx_bind_state(struct pipe_context *ctx, void *state) +{ +} + +static void +agx_delete_state(struct pipe_context *ctx, void *state) +{ + FREE(state); +} + +/* BOs added to the batch in the uniform upload path */ + +static void +agx_set_vertex_buffers(struct pipe_context *pctx, + unsigned start_slot, unsigned count, + unsigned unbind_num_trailing_slots, + bool take_ownership, + const struct pipe_vertex_buffer *buffers) +{ + struct agx_context *ctx = agx_context(pctx); + + util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, + start_slot, count, unbind_num_trailing_slots, take_ownership); + + ctx->dirty |= AGX_DIRTY_VERTEX; +} + +static void * +agx_create_vertex_elements(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *state) +{ + assert(count < AGX_MAX_ATTRIBS); + + struct agx_attribute *attribs = calloc(sizeof(*attribs), AGX_MAX_ATTRIBS); + for (unsigned i = 0; i < count; ++i) { + const struct pipe_vertex_element ve = state[i]; + assert(ve.instance_divisor == 0 && "no instancing"); + + const struct util_format_description *desc = + util_format_description(ve.src_format); + + assert(desc->nr_channels >= 1 && desc->nr_channels <= 4); + assert((ve.src_offset & 0x3) == 0); + + attribs[i] = (struct agx_attribute) { + .buf = ve.vertex_buffer_index, + .src_offset = ve.src_offset / 4, + .nr_comps_minus_1 = desc->nr_channels - 1, + .format = agx_vertex_format[ve.src_format], + }; + } + + return attribs; +} + +static void +agx_bind_vertex_elements_state(struct pipe_context *pctx, void *cso) +{ + struct agx_context *ctx = agx_context(pctx); + ctx->attributes = cso; + ctx->dirty |= AGX_DIRTY_VERTEX; +} + +static void * +agx_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *cso) +{ + struct agx_uncompiled_shader *so = CALLOC_STRUCT(agx_uncompiled_shader); + + if (!so) + return NULL; + + /* TGSI unsupported */ + assert(cso->type == PIPE_SHADER_IR_NIR); + so->nir = cso->ir.nir; + + so->variants = _mesa_hash_table_create(NULL, + _mesa_hash_pointer, _mesa_key_pointer_equal); + return so; +} + +static bool +agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out, + enum pipe_shader_type stage, struct agx_shader_key *key) +{ + struct agx_uncompiled_shader *so = ctx->stage[stage].shader; + assert(so != NULL); + + struct hash_entry *he = _mesa_hash_table_search(so->variants, &key); + + if (he) { + if ((*out) == he->data) + return false; + + *out = he->data; + return true; + } + + struct agx_compiled_shader *compiled = CALLOC_STRUCT(agx_compiled_shader); + struct util_dynarray binary; + util_dynarray_init(&binary, NULL); + + nir_shader *nir = nir_shader_clone(NULL, so->nir); + agx_compile_shader_nir(nir, key, &binary, &compiled->info); + + /* TODO: emit this properly */ + nir_variable_mode varying_mode = (nir->info.stage == MESA_SHADER_FRAGMENT) ? + nir_var_shader_in : nir_var_shader_out; + + unsigned varying_count = 0; + + nir_foreach_variable_with_modes(var, nir, varying_mode) { + unsigned loc = var->data.driver_location; + unsigned sz = glsl_count_attribute_slots(var->type, FALSE); + + varying_count = MAX2(varying_count, loc + sz); + } + + compiled->varying_count = varying_count; + + unsigned varying_desc_len = AGX_VARYING_HEADER_LENGTH + varying_count * AGX_VARYING_LENGTH; + uint8_t *varying_desc = calloc(1, varying_desc_len); + + agx_pack(varying_desc, VARYING_HEADER, cfg) { + cfg.slots_1 = 1 + (4 * varying_count); + cfg.slots_2 = 1 + (4 * varying_count); + } + + for (unsigned i = 0; i < varying_count; ++i) { + agx_pack(varying_desc + AGX_VARYING_HEADER_LENGTH + (i * AGX_VARYING_LENGTH), VARYING, cfg) { + cfg.slot_1 = 1 + (4 * i); + cfg.slot_2 = 1 + (4 * i); + } + } + + if (binary.size) { + struct agx_device *dev = agx_device(ctx->base.screen); + compiled->bo = agx_bo_create(dev, + ALIGN_POT(binary.size, 256) + ((3 * (AGX_VARYING_HEADER_LENGTH + varying_count * AGX_VARYING_LENGTH)) + 20), + AGX_MEMORY_TYPE_SHADER); + memcpy(compiled->bo->ptr.cpu, binary.data, binary.size); + + + /* TODO: Why is the varying descriptor duplicated 3x? */ + unsigned offs = ALIGN_POT(binary.size, 256); + unsigned unk_offs = offs + 0x40; + for (unsigned copy = 0; copy < 3; ++copy) { + memcpy(((uint8_t *) compiled->bo->ptr.cpu) + offs, varying_desc, varying_desc_len); + offs += varying_desc_len; + } + + uint16_t *map = (uint16_t *) (((uint8_t *) compiled->bo->ptr.cpu) + unk_offs); + *map = 0x140; // 0x0100 with one varying + + + + compiled->varyings = compiled->bo->ptr.gpu + ALIGN_POT(binary.size, 256); + } + + ralloc_free(nir); + util_dynarray_fini(&binary); + + he = _mesa_hash_table_insert(so->variants, &key, compiled); + *out = he->data; + return true; +} + +static bool +agx_update_vs(struct agx_context *ctx) +{ + struct agx_vs_shader_key key = { + .num_vbufs = util_last_bit(ctx->vb_mask) + }; + + memcpy(key.attributes, ctx->attributes, + sizeof(key.attributes[0]) * AGX_MAX_ATTRIBS); + + for (unsigned i = 0; i < key.num_vbufs; ++i) { + assert((ctx->vertex_buffers[i].stride & 0x3) == 0); + key.vbuf_strides[i] = ctx->vertex_buffers[i].stride / 4; // TODO: alignment + } + + return agx_update_shader(ctx, &ctx->vs, PIPE_SHADER_VERTEX, + (struct agx_shader_key *) &key); +} + +static bool +agx_update_fs(struct agx_context *ctx) +{ + struct agx_fs_shader_key key = { + .tib_formats = { AGX_FORMAT_U8NORM } + }; + + return agx_update_shader(ctx, &ctx->fs, PIPE_SHADER_FRAGMENT, + (struct agx_shader_key *) &key); +} + +static void +agx_bind_shader_state(struct pipe_context *pctx, void *cso) +{ + if (!cso) + return; + + struct agx_context *ctx = agx_context(pctx); + struct agx_uncompiled_shader *so = cso; + + enum pipe_shader_type type = pipe_shader_type_from_mesa(so->nir->info.stage); + ctx->stage[type].shader = so; +} + +static void +agx_delete_compiled_shader(struct hash_entry *ent) +{ + struct agx_compiled_shader *so = ent->data; + agx_bo_unreference(so->bo); + FREE(so); +} + +static void +agx_delete_shader_state(struct pipe_context *ctx, + void *cso) +{ + struct agx_uncompiled_shader *so = cso; + _mesa_hash_table_destroy(so->variants, agx_delete_compiled_shader); + free(so); +} + +/* Pipeline consists of a sequence of binding commands followed by a set shader command */ +static uint32_t +agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum pipe_shader_type stage) +{ + /* Pipelines must be 64-byte aligned */ + struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, + (16 * AGX_BIND_UNIFORM_LENGTH) + // XXX: correct sizes, break up at compile time + (ctx->stage[stage].texture_count * AGX_BIND_TEXTURE_LENGTH) + + (PIPE_MAX_SAMPLERS * AGX_BIND_SAMPLER_LENGTH) + + AGX_SET_SHADER_EXTENDED_LENGTH + 8, + 64); + + uint8_t *record = ptr.cpu; + + /* There is a maximum number of half words we may push with a single + * BIND_UNIFORM record, so split up the range to fit. We only need to call + * agx_push_location once, however, which reduces the cost. */ + + for (unsigned i = 0; i < cs->info.push_ranges; ++i) { + struct agx_push push = cs->info.push[i]; + uint64_t buffer = agx_push_location(ctx, push, stage); + unsigned halfs_per_record = 14; + unsigned records = DIV_ROUND_UP(push.length, halfs_per_record); + + for (unsigned j = 0; j < records; ++j) { + agx_pack(record, BIND_UNIFORM, cfg) { + cfg.start_halfs = push.base + (j * halfs_per_record); + cfg.size_halfs = MIN2(push.length - (j * halfs_per_record), halfs_per_record); + cfg.buffer = buffer + (j * halfs_per_record * 2); + } + + record += AGX_BIND_UNIFORM_LENGTH; + } + } + + for (unsigned i = 0; i < ctx->stage[stage].texture_count; ++i) { + struct agx_sampler_view *tex = ctx->stage[stage].textures[i]; + agx_batch_add_bo(ctx->batch, tex->desc); + agx_batch_add_bo(ctx->batch, agx_resource(tex->base.texture)->bo); + + + agx_pack(record, BIND_TEXTURE, cfg) { + cfg.start = i; + cfg.count = 1; + cfg.buffer = tex->desc->ptr.gpu; + } + + record += AGX_BIND_TEXTURE_LENGTH; + } + + for (unsigned i = 0; i < PIPE_MAX_SAMPLERS; ++i) { + struct agx_bo *bo = ctx->stage[stage].samplers[i]; + + if (!bo) + continue; + + agx_batch_add_bo(ctx->batch, bo); + + agx_pack(record, BIND_SAMPLER, cfg) { + cfg.start = i; + cfg.count = 1; + cfg.buffer = bo->ptr.gpu; + } + + record += AGX_BIND_SAMPLER_LENGTH; + } + + /* TODO: Can we prepack this? */ + if (stage == PIPE_SHADER_FRAGMENT) { + agx_pack(record, SET_SHADER_EXTENDED, cfg) { + cfg.code = cs->bo->ptr.gpu; + cfg.register_quadwords = 0; + cfg.unk_3 = 0x8d; + cfg.unk_1 = 0x2010bd; + cfg.unk_2 = 0x0d; + cfg.unk_2b = 1; + cfg.unk_3b = 0x1; + cfg.unk_4 = 0x800; + cfg.preshader_unk = 0xc080; + cfg.spill_size = 0x2; + } + + record += AGX_SET_SHADER_EXTENDED_LENGTH; + } else { + agx_pack(record, SET_SHADER, cfg) { + cfg.code = cs->bo->ptr.gpu; + cfg.register_quadwords = 0; + cfg.unk_2b = (cs->varying_count * 4); + cfg.unk_2 = 0x0d; + } + + record += AGX_SET_SHADER_LENGTH; + } + + /* End pipeline */ + memset(record, 0, 8); + assert(ptr.gpu < (1ull << 32)); + return ptr.gpu; +} + +/* Internal pipelines (TODO: refactor?) */ +uint64_t +agx_build_clear_pipeline(struct agx_context *ctx, uint32_t code, uint64_t clear_buf) +{ + struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, + (1 * AGX_BIND_UNIFORM_LENGTH) + + AGX_SET_SHADER_EXTENDED_LENGTH + 8, + 64); + + uint8_t *record = ptr.cpu; + + agx_pack(record, BIND_UNIFORM, cfg) { + cfg.start_halfs = (6 * 2); + cfg.size_halfs = 4; + cfg.buffer = clear_buf; + } + + record += AGX_BIND_UNIFORM_LENGTH; + + /* TODO: Can we prepack this? */ + agx_pack(record, SET_SHADER_EXTENDED, cfg) { + cfg.code = code; + cfg.register_quadwords = 1; + cfg.unk_3 = 0x8d; + cfg.unk_2 = 0x0d; + cfg.unk_2b = 4; + cfg.frag_unk = 0x880100; + cfg.preshader_mode = 0; // XXX + } + + record += AGX_SET_SHADER_EXTENDED_LENGTH; + + /* End pipeline */ + memset(record, 0, 8); + return ptr.gpu; +} + +uint64_t +agx_build_store_pipeline(struct agx_context *ctx, uint32_t code, + uint64_t render_target) +{ + struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, + (1 * AGX_BIND_TEXTURE_LENGTH) + + (1 * AGX_BIND_UNIFORM_LENGTH) + + AGX_SET_SHADER_EXTENDED_LENGTH + 8, + 64); + + uint8_t *record = ptr.cpu; + + agx_pack(record, BIND_TEXTURE, cfg) { + cfg.start = 0; + cfg.count = 1; + cfg.buffer = render_target; + } + + record += AGX_BIND_TEXTURE_LENGTH; + + uint32_t unk[] = { 0, ~0 }; + + agx_pack(record, BIND_UNIFORM, cfg) { + cfg.start_halfs = 4; + cfg.size_halfs = 4; + cfg.buffer = agx_pool_upload_aligned(&ctx->batch->pool, unk, sizeof(unk), 16); + } + + record += AGX_BIND_UNIFORM_LENGTH; + + /* TODO: Can we prepack this? */ + agx_pack(record, SET_SHADER_EXTENDED, cfg) { + cfg.code = code; + cfg.register_quadwords = 1; + cfg.unk_2 = 0xd; + cfg.unk_3 = 0x8d; + cfg.frag_unk = 0x880100; + cfg.preshader_mode = 0; // XXX + } + + record += AGX_SET_SHADER_EXTENDED_LENGTH; + + /* End pipeline */ + memset(record, 0, 8); + return ptr.gpu; +} + +static uint64_t +demo_launch_fragment(struct agx_pool *pool, uint32_t pipeline, uint32_t varyings, unsigned input_count) +{ + uint32_t unk[] = { + 0x800000, + 0x1212 | (input_count << 16), // upper nibble is input count TODO: xmlify + pipeline, + varyings, + 0x0, + }; + + return agx_pool_upload(pool, unk, sizeof(unk)); +} + +static uint64_t +demo_unk8(struct agx_compiled_shader *fs, struct agx_pool *pool) +{ + /* Varying related */ + uint32_t unk[] = { + /* interpolated count */ + 0x100c0000, fs->varying_count * 4, 0x0, 0x0, 0x0, + }; + + return agx_pool_upload(pool, unk, sizeof(unk)); +} + +static uint64_t +demo_linkage(struct agx_compiled_shader *vs, struct agx_pool *pool) +{ + struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_LINKAGE_LENGTH, 64); + + agx_pack(t.cpu, LINKAGE, cfg) { + cfg.varying_count = 4 * vs->varying_count; + cfg.unk_1 = 0x10000; // varyings otherwise wrong + }; + + return t.gpu; +} + +static uint64_t +demo_rasterizer(struct agx_context *ctx, struct agx_pool *pool) +{ + struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_RASTERIZER_LENGTH, 64); + + agx_pack(t.cpu, RASTERIZER, cfg) { + cfg.front.depth_function = ctx->zs.z_func; + cfg.back.depth_function = ctx->zs.z_func; + + cfg.front.disable_depth_write = ctx->zs.disable_z_write; + cfg.back.disable_depth_write = ctx->zs.disable_z_write; + }; + + return t.gpu; +} + +static uint64_t +demo_unk11(struct agx_pool *pool, bool prim_lines) +{ +#define UNK11_FILL_MODE_LINES_1 (1 << 26) + +#define UNK11_FILL_MODE_LINES_2 (0x5004 << 16) +#define UNK11_LINES (0x10000000) + + uint32_t unk[] = { + 0x200004a, + 0x200 | (prim_lines ? UNK11_FILL_MODE_LINES_1 : 0), + 0x7e00000 | (prim_lines ? UNK11_LINES : 0), + 0x7e00000 | (prim_lines ? UNK11_LINES : 0), + + 0x1ffff + }; + + return agx_pool_upload(pool, unk, sizeof(unk)); +} + +static uint64_t +demo_unk12(struct agx_pool *pool) +{ + uint32_t unk[] = { + 0x410000, + 0x1e3ce508, + 0xa0 + }; + + return agx_pool_upload(pool, unk, sizeof(unk)); +} + +static uint64_t +demo_unk14(struct agx_pool *pool) +{ + uint32_t unk[] = { + 0x100, 0x0, + }; + + return agx_pool_upload(pool, unk, sizeof(unk)); +} + +static void +agx_push_record(uint8_t **out, unsigned size_words, uint64_t ptr) +{ + assert(ptr < (1ull << 40)); + assert(size_words < (1ull << 24)); + + uint64_t value = (size_words | (ptr << 24)); + memcpy(*out, &value, sizeof(value)); + *out += sizeof(value); +} + +static uint8_t * +agx_encode_state(struct agx_context *ctx, uint8_t *out, + uint32_t pipeline_vertex, uint32_t pipeline_fragment, uint32_t varyings, + bool is_lines) +{ + agx_pack(out, BIND_PIPELINE, cfg) { + cfg.pipeline = pipeline_vertex; + cfg.vs_output_count_1 = (ctx->vs->varying_count * 4); + cfg.vs_output_count_2 = (ctx->vs->varying_count * 4); + } + + /* yes, it's really 17 bytes */ + out += AGX_BIND_PIPELINE_LENGTH; + *(out++) = 0x0; + + struct agx_pool *pool = &ctx->batch->pool; + struct agx_ptr zero = agx_pool_alloc_aligned(pool, 16, 256); + memset(zero.cpu, 0, 16); + + agx_push_record(&out, 0, zero.gpu); + agx_push_record(&out, 5, demo_unk8(ctx->fs, pool)); + agx_push_record(&out, 5, demo_launch_fragment(pool, pipeline_fragment, varyings, ctx->fs->varying_count + 1)); + agx_push_record(&out, 4, demo_linkage(ctx->vs, pool)); + agx_push_record(&out, 7, demo_rasterizer(ctx, pool)); + agx_push_record(&out, 5, demo_unk11(pool, is_lines)); + agx_push_record(&out, 10, agx_pool_upload(pool, ctx->viewport, sizeof(ctx->viewport))); + agx_push_record(&out, 3, demo_unk12(pool)); + agx_push_record(&out, 2, agx_pool_upload(pool, ctx->rast->cull, sizeof(ctx->rast->cull))); + agx_push_record(&out, 2, demo_unk14(pool)); + + return (out - 1); // XXX: alignment fixup, or something +} + +static enum agx_primitive +agx_primitive_for_pipe(enum pipe_prim_type mode) +{ + switch (mode) { + case PIPE_PRIM_POINTS: return AGX_PRIMITIVE_POINTS; + case PIPE_PRIM_LINES: return AGX_PRIMITIVE_LINES; + case PIPE_PRIM_LINE_STRIP: return AGX_PRIMITIVE_LINE_STRIP; + case PIPE_PRIM_LINE_LOOP: return AGX_PRIMITIVE_LINE_LOOP; + case PIPE_PRIM_TRIANGLES: return AGX_PRIMITIVE_TRIANGLES; + case PIPE_PRIM_TRIANGLE_STRIP: return AGX_PRIMITIVE_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: return AGX_PRIMITIVE_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: return AGX_PRIMITIVE_QUADS; + case PIPE_PRIM_QUAD_STRIP: return AGX_PRIMITIVE_QUAD_STRIP; + default: unreachable("todo: other primitive types"); + } +} + +static uint64_t +agx_index_buffer_ptr(struct agx_batch *batch, + const struct pipe_draw_start_count_bias *draw, + const struct pipe_draw_info *info) +{ + off_t offset = draw->start * info->index_size; + + if (!info->has_user_indices) { + struct agx_bo *bo = agx_resource(info->index.resource)->bo; + agx_batch_add_bo(batch, bo); + + return bo->ptr.gpu + offset; + } else { + return agx_pool_upload_aligned(&batch->pool, + ((uint8_t *) info->index.user) + offset, + draw->count * info->index_size, 64); + } +} + +static void +agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, + unsigned drawid_offset, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) +{ + if (num_draws > 1) { + util_draw_multi(pctx, info, drawid_offset, indirect, draws, num_draws); + return; + } + + if (info->index_size && draws->index_bias) + unreachable("todo: index bias"); + if (info->instance_count != 1) + unreachable("todo: instancing"); + + struct agx_context *ctx = agx_context(pctx); + struct agx_batch *batch = ctx->batch; + + /* TODO: masks */ + ctx->batch->draw |= ~0; + + /* TODO: Dirty track */ + agx_update_vs(ctx); + agx_update_fs(ctx); + + agx_batch_add_bo(batch, ctx->vs->bo); + agx_batch_add_bo(batch, ctx->fs->bo); + + bool is_lines = + (info->mode == PIPE_PRIM_LINES) || + (info->mode == PIPE_PRIM_LINE_STRIP) || + (info->mode == PIPE_PRIM_LINE_LOOP); + + uint8_t *out = agx_encode_state(ctx, batch->encoder_current, + agx_build_pipeline(ctx, ctx->vs, PIPE_SHADER_VERTEX), + agx_build_pipeline(ctx, ctx->fs, PIPE_SHADER_FRAGMENT), + ctx->fs->varyings, is_lines); + + enum agx_primitive prim = agx_primitive_for_pipe(info->mode); + unsigned idx_size = info->index_size; + + if (idx_size) { + uint64_t ib = agx_index_buffer_ptr(batch, draws, info); + + /* Index sizes are encoded logarithmically */ + STATIC_ASSERT(__builtin_ctz(1) == AGX_INDEX_SIZE_U8); + STATIC_ASSERT(__builtin_ctz(2) == AGX_INDEX_SIZE_U16); + STATIC_ASSERT(__builtin_ctz(4) == AGX_INDEX_SIZE_U32); + assert((idx_size == 1) || (idx_size == 2) || (idx_size == 4)); + + agx_pack(out, INDEXED_DRAW, cfg) { + cfg.restart_index = 0xFFFF;//info->restart_index; + cfg.unk_2a = (ib >> 32); + cfg.primitive = prim; + cfg.restart_enable = info->primitive_restart; + cfg.index_size = __builtin_ctz(idx_size); + cfg.index_buffer_offset = (ib & BITFIELD_MASK(32)); + cfg.index_buffer_size = ALIGN_POT(draws->count * idx_size, 4); + cfg.index_count = draws->count; + cfg.instance_count = info->instance_count; + cfg.base_vertex = draws->index_bias; + }; + + out += AGX_INDEXED_DRAW_LENGTH; + } else { + agx_pack(out, DRAW, cfg) { + cfg.primitive = prim; + cfg.vertex_start = draws->start; + cfg.vertex_count = draws->count; + cfg.instance_count = info->instance_count; + }; + + out += AGX_DRAW_LENGTH; + } + + batch->encoder_current = out; +} + +void agx_init_state_functions(struct pipe_context *ctx); + +void +agx_init_state_functions(struct pipe_context *ctx) +{ + ctx->create_blend_state = agx_create_blend_state; + ctx->create_depth_stencil_alpha_state = agx_create_zsa_state; + ctx->create_fs_state = agx_create_shader_state; + ctx->create_rasterizer_state = agx_create_rs_state; + ctx->create_sampler_state = agx_create_sampler_state; + ctx->create_sampler_view = agx_create_sampler_view; + ctx->create_surface = agx_create_surface; + ctx->create_vertex_elements_state = agx_create_vertex_elements; + ctx->create_vs_state = agx_create_shader_state; + ctx->bind_blend_state = agx_bind_state; + ctx->bind_depth_stencil_alpha_state = agx_bind_zsa_state; + ctx->bind_sampler_states = agx_bind_sampler_states; + ctx->bind_fs_state = agx_bind_shader_state; + ctx->bind_rasterizer_state = agx_bind_rasterizer_state; + ctx->bind_vertex_elements_state = agx_bind_vertex_elements_state; + ctx->bind_vs_state = agx_bind_shader_state; + ctx->delete_blend_state = agx_delete_state; + ctx->delete_depth_stencil_alpha_state = agx_delete_state; + ctx->delete_fs_state = agx_delete_shader_state; + ctx->delete_rasterizer_state = agx_delete_state; + ctx->delete_sampler_state = agx_delete_sampler_state; + ctx->delete_vertex_elements_state = agx_delete_state; + ctx->delete_vs_state = agx_delete_state; + ctx->set_blend_color = agx_set_blend_color; + ctx->set_clip_state = agx_set_clip_state; + ctx->set_constant_buffer = agx_set_constant_buffer; + ctx->set_sampler_views = agx_set_sampler_views; + ctx->set_framebuffer_state = agx_set_framebuffer_state; + ctx->set_polygon_stipple = agx_set_polygon_stipple; + ctx->set_sample_mask = agx_set_sample_mask; + ctx->set_scissor_states = agx_set_scissor_states; + ctx->set_stencil_ref = agx_set_stencil_ref; + ctx->set_vertex_buffers = agx_set_vertex_buffers; + ctx->set_viewport_states = agx_set_viewport_states; + ctx->sampler_view_destroy = agx_sampler_view_destroy; + ctx->surface_destroy = agx_surface_destroy; + ctx->draw_vbo = agx_draw_vbo; +} diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h new file mode 100644 index 00000000000..9d860f41c21 --- /dev/null +++ b/src/gallium/drivers/asahi/agx_state.h @@ -0,0 +1,229 @@ +/* + * Copyright 2021 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef AGX_STATE_H +#define AGX_STATE_H + +#include "gallium/include/pipe/p_context.h" +#include "gallium/include/pipe/p_state.h" +#include "gallium/include/pipe/p_screen.h" +#include "asahi/lib/agx_pack.h" +#include "asahi/lib/agx_bo.h" +#include "asahi/lib/agx_device.h" +#include "asahi/lib/pool.h" +#include "asahi/compiler/agx_compile.h" +#include "util/hash_table.h" +#include "util/bitset.h" + +struct agx_compiled_shader { + /* Mapped executable memory */ + struct agx_bo *bo; + + /* Varying descriptor (TODO: is this the right place?) */ + uint64_t varyings; + + /* # of varyings (currently vec4, should probably be changed) */ + unsigned varying_count; + + /* Metadata returned from the compiler */ + struct agx_shader_info info; +}; + +struct agx_uncompiled_shader { + struct nir_shader *nir; + struct hash_table *variants; + + /* Set on VS, passed to FS for linkage */ + unsigned base_varying; +}; + +struct agx_stage { + struct agx_uncompiled_shader *shader; + uint32_t dirty; + + struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS]; + uint32_t cb_mask; + + /* BOs for bound samplers. This is all the information we need at + * draw time to assemble the pipeline */ + struct agx_bo *samplers[PIPE_MAX_SAMPLERS]; + + /* Sampler views need the full CSO due to Gallium state management */ + struct agx_sampler_view *textures[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + unsigned texture_count; +}; + +struct agx_batch { + unsigned width, height, nr_cbufs; + struct pipe_surface *cbufs[8]; + struct pipe_surface *zsbuf; + + /* PIPE_CLEAR_* bitmask */ + uint32_t clear, draw; + + float clear_color[4]; + + /* Resource list requirements, represented as a bit set indexed by BO + * handles (GEM handles on Linux, or IOGPU's equivalent on macOS) */ + BITSET_WORD bo_list[256]; + + struct agx_pool pool, pipeline_pool; + struct agx_bo *encoder; + uint8_t *encoder_current; +}; + +struct agx_zsa { + enum agx_zs_func z_func; + bool disable_z_write; +}; + +#define AGX_DIRTY_VERTEX (1 << 0) + +struct agx_context { + struct pipe_context base; + struct agx_compiled_shader *vs, *fs; + uint32_t dirty; + + struct agx_batch *batch; + + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + uint32_t vb_mask; + + struct agx_stage stage[PIPE_SHADER_TYPES]; + struct agx_attribute *attributes; + struct agx_rasterizer *rast; + struct agx_zsa zs; + + uint8_t viewport[AGX_VIEWPORT_LENGTH]; + uint8_t render_target[8][AGX_RENDER_TARGET_LENGTH]; +}; + +static inline struct agx_context * +agx_context(struct pipe_context *pctx) +{ + return (struct agx_context *) pctx; +} + +struct agx_rasterizer { + struct pipe_rasterizer_state base; + uint8_t cull[AGX_CULL_LENGTH]; +}; + +struct agx_query { + unsigned query; +}; + +struct agx_sampler_view { + struct pipe_sampler_view base; + + /* Prepared descriptor */ + struct agx_bo *desc; +}; + +struct agx_screen { + struct pipe_screen pscreen; + struct agx_device dev; + struct sw_winsys *winsys; +}; + +static inline struct agx_screen * +agx_screen(struct pipe_screen *p) +{ + return (struct agx_screen *)p; +} + +static inline struct agx_device * +agx_device(struct pipe_screen *p) +{ + return &(agx_screen(p)->dev); +} + +/* TODO: UABI, fake for macOS */ +#ifndef DRM_FORMAT_MOD_LINEAR +#define DRM_FORMAT_MOD_LINEAR 1 +#endif +#define DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER (2) + +struct agx_resource { + struct pipe_resource base; + uint64_t modifier; + + /* Hardware backing */ + struct agx_bo *bo; + + /* Software backing (XXX) */ + struct sw_displaytarget *dt; + unsigned dt_stride; + + struct { + bool data_valid; + unsigned offset; + unsigned line_stride; + } slices[PIPE_MAX_TEXTURE_LEVELS]; +}; + +static inline struct agx_resource * +agx_resource(struct pipe_resource *pctx) +{ + return (struct agx_resource *) pctx; +} + +struct agx_transfer { + struct pipe_transfer base; + void *map; + struct { + struct pipe_resource *rsrc; + struct pipe_box box; + } staging; +}; + +static inline struct agx_transfer * +agx_transfer(struct pipe_transfer *p) +{ + return (struct agx_transfer *)p; +} + +uint64_t +agx_push_location(struct agx_context *ctx, struct agx_push push, + enum pipe_shader_type stage); + +uint64_t +agx_build_clear_pipeline(struct agx_context *ctx, uint32_t code, uint64_t clear_buf); + +uint64_t +agx_build_store_pipeline(struct agx_context *ctx, uint32_t code, + uint64_t render_target); + +/* Add a BO to a batch. This needs to be amortized O(1) since it's called in + * hot paths. To achieve this we model BO lists by bit sets */ + +static inline void +agx_batch_add_bo(struct agx_batch *batch, struct agx_bo *bo) +{ + if (unlikely(bo->handle > (sizeof(batch->bo_list) * 8))) + unreachable("todo: growable"); + + BITSET_SET(batch->bo_list, bo->handle); +} + +#endif diff --git a/src/gallium/drivers/asahi/meson.build b/src/gallium/drivers/asahi/meson.build index efd2c2615cf..5766dd8f3f5 100644 --- a/src/gallium/drivers/asahi/meson.build +++ b/src/gallium/drivers/asahi/meson.build @@ -19,6 +19,22 @@ # SOFTWARE. files_asahi = files( + 'agx_pipe.c', + 'agx_state.c', 'agx_uniforms.c', 'magic.c', ) + +libasahi = static_library( + 'asahi', + files_asahi, + include_directories : [inc_gallium_aux, inc_gallium, inc_include, inc_src], + c_args : [c_msvc_compat_args], + gnu_symbol_visibility : 'hidden', + dependencies : idep_nir, +) + +driver_asahi = declare_dependency( + compile_args : '-DGALLIUM_ASAHI', + link_with : [libasahi, libasahi_compiler, libasahi_lib, libasahi_decode] +) diff --git a/src/gallium/meson.build b/src/gallium/meson.build index 733ab71e9a8..0b2e4c5e01e 100644 --- a/src/gallium/meson.build +++ b/src/gallium/meson.build @@ -58,6 +58,11 @@ if with_gallium_softpipe else driver_swrast = declare_dependency() endif +if with_gallium_asahi + subdir('drivers/asahi') +else + driver_asahi = declare_dependency() +endif if with_gallium_r300 or with_gallium_radeonsi or with_gallium_r600 subdir('winsys/radeon/drm') endif diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build index d12221e7598..90b48bf508e 100644 --- a/src/gallium/targets/dri/meson.build +++ b/src/gallium/targets/dri/meson.build @@ -57,7 +57,8 @@ libgallium_dri = shared_library( driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau, driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv, driver_tegra, driver_i915, driver_svga, driver_virgl, - driver_swr, driver_panfrost, driver_iris, driver_lima, driver_zink, driver_d3d12 + driver_swr, driver_panfrost, driver_iris, driver_lima, driver_zink, driver_d3d12, + driver_asahi ], # Will be deleted during installation, see install_megadrivers.py install : true, @@ -105,7 +106,8 @@ foreach d : [[with_gallium_kmsro, [ [with_gallium_virgl, 'virtio_gpu_dri.so'], [with_gallium_lima, 'lima_dri.so'], [with_gallium_zink, 'zink_dri.so'], - [with_gallium_d3d12, 'd3d12_dri.so']] + [with_gallium_d3d12, 'd3d12_dri.so'], + [with_gallium_asahi, 'asahi_dri.so']] if d[0] gallium_dri_drivers += d[1] endif diff --git a/src/gallium/targets/libgl-xlib/meson.build b/src/gallium/targets/libgl-xlib/meson.build index 3c1e1258ecc..7f161f7791a 100644 --- a/src/gallium/targets/libgl-xlib/meson.build +++ b/src/gallium/targets/libgl-xlib/meson.build @@ -52,8 +52,8 @@ libgl = shared_library( libxlib, libws_xlib, libglapi_static, libgallium, libmesa_gallium, gallium_xlib_link_with, ], - dependencies : [dep_x11, dep_thread, dep_clock, dep_unwind, driver_swrast, driver_swr, driver_virgl], + dependencies : [dep_x11, dep_thread, dep_clock, dep_unwind, driver_swrast, driver_swr, driver_virgl, driver_asahi], install : true, version : '1.5.0', - darwin_versions : '4.0.0', + darwin_versions: '4.0.0', ) diff --git a/src/meson.build b/src/meson.build index dc03cb1e2f9..6725bbac931 100644 --- a/src/meson.build +++ b/src/meson.build @@ -98,7 +98,7 @@ endif if with_gallium_nouveau subdir('nouveau') endif -if with_tools.contains('asahi') +if with_gallium_asahi or with_tools.contains('asahi') subdir('asahi') endif subdir('mesa')