/* * Copyright © 2021 Collabora Ltd. * * Derived from tu_cmd_buffer.c which is: * Copyright © 2016 Red Hat. * Copyright © 2016 Bas Nieuwenhuizen * Copyright © 2015 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include "genxml/gen_macros.h" #include "panvk_cs.h" #include "panvk_private.h" #include "panfrost-quirks.h" #include "pan_blitter.h" #include "pan_cs.h" #include "pan_encoder.h" #include "util/rounding.h" #include "util/u_pack_color.h" #include "vk_format.h" static void panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf) { const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; struct panvk_batch *batch = cmdbuf->state.batch; struct panfrost_ptr job_ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, FRAGMENT_JOB); GENX(pan_emit_fragment_job)(fbinfo, batch->fb.desc.gpu, job_ptr.cpu), batch->fragment_job = job_ptr.gpu; util_dynarray_append(&batch->jobs, void *, job_ptr.cpu); } #if PAN_ARCH == 5 void panvk_per_arch(cmd_get_polygon_list)(struct panvk_cmd_buffer *cmdbuf, unsigned width, unsigned height, bool has_draws) { struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; struct panvk_batch *batch = cmdbuf->state.batch; if (batch->tiler.ctx.midgard.polygon_list) return; unsigned size = panfrost_tiler_get_polygon_list_size(pdev, width, height, has_draws); size = util_next_power_of_two(size); /* Create the BO as invisible if we can. In the non-hierarchical tiler case, * we need to write the polygon list manually because there's not WRITE_VALUE * job in the chain. */ bool init_polygon_list = !has_draws && (pdev->quirks & MIDGARD_NO_HIER_TILING); batch->tiler.ctx.midgard.polygon_list = panfrost_bo_create(pdev, size, init_polygon_list ? 0 : PAN_BO_INVISIBLE, "Polygon list"); if (init_polygon_list) { assert(batch->tiler.ctx.midgard.polygon_list->ptr.cpu); uint32_t *polygon_list_body = batch->tiler.ctx.midgard.polygon_list->ptr.cpu + MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE; polygon_list_body[0] = 0xa0000000; } batch->tiler.ctx.midgard.disable = !has_draws; } #endif #if PAN_ARCH <= 5 static void panvk_copy_fb_desc(struct panvk_cmd_buffer *cmdbuf, void *src) { const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; struct panvk_batch *batch = cmdbuf->state.batch; uint32_t size = pan_size(FRAMEBUFFER); if (fbinfo->zs.view.zs || fbinfo->zs.view.s) size += pan_size(ZS_CRC_EXTENSION); size += MAX2(fbinfo->rt_count, 1) * pan_size(RENDER_TARGET); memcpy(batch->fb.desc.cpu, src, size); } #endif void panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf) { struct panvk_batch *batch = cmdbuf->state.batch; if (!batch) return; const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; #if PAN_ARCH <= 5 uint32_t tmp_fbd[(pan_size(FRAMEBUFFER) + pan_size(ZS_CRC_EXTENSION) + (MAX_RTS * pan_size(RENDER_TARGET))) / 4]; #endif assert(batch); bool clear = fbinfo->zs.clear.z | fbinfo->zs.clear.s; for (unsigned i = 0; i < fbinfo->rt_count; i++) clear |= fbinfo->rts[i].clear; if (!clear && !batch->scoreboard.first_job) { if (util_dynarray_num_elements(&batch->event_ops, struct panvk_event_op) == 0) { /* Content-less batch, let's drop it */ vk_free(&cmdbuf->pool->alloc, batch); } else { /* Batch has no jobs but is needed for synchronization, let's add a * NULL job so the SUBMIT ioctl doesn't choke on it. */ struct panfrost_ptr ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, JOB_HEADER); util_dynarray_append(&batch->jobs, void *, ptr.cpu); panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, MALI_JOB_TYPE_NULL, false, false, 0, 0, &ptr, false); list_addtail(&batch->node, &cmdbuf->batches); } cmdbuf->state.batch = NULL; return; } struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; list_addtail(&batch->node, &cmdbuf->batches); if (batch->scoreboard.first_tiler) { struct panfrost_ptr preload_jobs[2]; unsigned num_preload_jobs = GENX(pan_preload_fb)(&cmdbuf->desc_pool.base, &batch->scoreboard, &cmdbuf->state.fb.info, PAN_ARCH >= 6 ? batch->tls.gpu : batch->fb.desc.gpu, PAN_ARCH >= 6 ? batch->tiler.descs.gpu : 0, preload_jobs); for (unsigned i = 0; i < num_preload_jobs; i++) util_dynarray_append(&batch->jobs, void *, preload_jobs[i].cpu); } if (batch->tlsinfo.tls.size) { batch->tlsinfo.tls.ptr = pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, batch->tlsinfo.tls.size, 4096).gpu; } if (batch->tlsinfo.wls.size) { assert(batch->wls_total_size); batch->tlsinfo.wls.ptr = pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, batch->wls_total_size, 4096).gpu; } if ((PAN_ARCH >= 6 || !batch->fb.desc.cpu) && batch->tls.cpu) GENX(pan_emit_tls)(&batch->tlsinfo, batch->tls.cpu); if (batch->fb.desc.cpu) { #if PAN_ARCH == 5 panvk_per_arch(cmd_get_polygon_list)(cmdbuf, fbinfo->width, fbinfo->height, false); mali_ptr polygon_list = batch->tiler.ctx.midgard.polygon_list->ptr.gpu; struct panfrost_ptr writeval_job = panfrost_scoreboard_initialize_tiler(&cmdbuf->desc_pool.base, &batch->scoreboard, polygon_list); if (writeval_job.cpu) util_dynarray_append(&batch->jobs, void *, writeval_job.cpu); #endif #if PAN_ARCH <= 5 void *fbd = tmp_fbd; #else void *fbd = batch->fb.desc.cpu; #endif batch->fb.desc.gpu |= GENX(pan_emit_fbd)(pdev, &cmdbuf->state.fb.info, &batch->tlsinfo, &batch->tiler.ctx, fbd); #if PAN_ARCH <= 5 panvk_copy_fb_desc(cmdbuf, tmp_fbd); memcpy(batch->tiler.templ, pan_section_ptr(fbd, FRAMEBUFFER, TILER), pan_size(TILER_CONTEXT)); #endif panvk_cmd_prepare_fragment_job(cmdbuf); } cmdbuf->state.batch = NULL; } void panvk_per_arch(CmdNextSubpass2)(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo *pSubpassBeginInfo, const VkSubpassEndInfo *pSubpassEndInfo) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); panvk_per_arch(cmd_close_batch)(cmdbuf); cmdbuf->state.subpass++; panvk_cmd_fb_info_set_subpass(cmdbuf); panvk_cmd_open_batch(cmdbuf); } void panvk_per_arch(CmdNextSubpass)(VkCommandBuffer cmd, VkSubpassContents contents) { VkSubpassBeginInfo binfo = { .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO, .contents = contents }; VkSubpassEndInfo einfo = { .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, }; panvk_per_arch(CmdNextSubpass2)(cmd, &binfo, &einfo); } void panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf) { struct panvk_batch *batch = cmdbuf->state.batch; if (batch->fb.desc.gpu) return; const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s; unsigned tags = MALI_FBD_TAG_IS_MFBD; batch->fb.info = cmdbuf->state.framebuffer; batch->fb.desc = pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, PAN_DESC(FRAMEBUFFER), PAN_DESC_ARRAY(has_zs_ext ? 1 : 0, ZS_CRC_EXTENSION), PAN_DESC_ARRAY(MAX2(fbinfo->rt_count, 1), RENDER_TARGET)); /* Tag the pointer */ batch->fb.desc.gpu |= tags; #if PAN_ARCH >= 6 memset(&cmdbuf->state.fb.info.bifrost.pre_post.dcds, 0, sizeof(cmdbuf->state.fb.info.bifrost.pre_post.dcds)); #endif } void panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx) { struct panvk_batch *batch = cmdbuf->state.batch; assert(batch); if (batch->tls.gpu) return; if (PAN_ARCH == 5 && gfx) { panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); batch->tls = batch->fb.desc; batch->tls.gpu &= ~63ULL; } else { batch->tls = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE); } } static void panvk_cmd_upload_sysval(struct panvk_cmd_buffer *cmdbuf, unsigned id, union panvk_sysval_data *data) { switch (PAN_SYSVAL_TYPE(id)) { case PAN_SYSVAL_VIEWPORT_SCALE: panvk_sysval_upload_viewport_scale(&cmdbuf->state.viewport, data); break; case PAN_SYSVAL_VIEWPORT_OFFSET: panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport, data); break; case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: /* TODO: support base_{vertex,instance} */ data->u32[0] = data->u32[1] = data->u32[2] = 0; break; case PAN_SYSVAL_BLEND_CONSTANTS: memcpy(data->f32, cmdbuf->state.blend.constants, sizeof(data->f32)); break; default: unreachable("Invalid static sysval"); } } static void panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_bind_point_state *bind_point_state) { struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; const struct panvk_pipeline *pipeline = bind_point_state->pipeline; if (!pipeline->num_sysvals) return; for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sysvals); i++) { unsigned sysval_count = pipeline->sysvals[i].ids.sysval_count; if (!sysval_count || pipeline->sysvals[i].ubo || (desc_state->sysvals[i] && !(cmdbuf->state.dirty & pipeline->sysvals[i].dirty_mask))) continue; struct panfrost_ptr sysvals = pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, sysval_count * 16, 16); union panvk_sysval_data *data = sysvals.cpu; for (unsigned s = 0; s < pipeline->sysvals[i].ids.sysval_count; s++) { panvk_cmd_upload_sysval(cmdbuf, pipeline->sysvals[i].ids.sysvals[s], &data[s]); } desc_state->sysvals[i] = sysvals.gpu; } } static void panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_bind_point_state *bind_point_state) { struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; const struct panvk_pipeline *pipeline = bind_point_state->pipeline; if (!pipeline->num_ubos || desc_state->ubos) return; panvk_cmd_prepare_sysvals(cmdbuf, bind_point_state); struct panfrost_ptr ubos = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, pipeline->num_ubos, UNIFORM_BUFFER); panvk_per_arch(emit_ubos)(pipeline, desc_state, ubos.cpu); desc_state->ubos = ubos.gpu; } static void panvk_cmd_prepare_textures(struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_bind_point_state *bind_point_state) { struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; const struct panvk_pipeline *pipeline = bind_point_state->pipeline; unsigned num_textures = pipeline->layout->num_textures; if (!num_textures || desc_state->textures) return; unsigned tex_entry_size = PAN_ARCH >= 6 ? pan_size(TEXTURE) : sizeof(mali_ptr); struct panfrost_ptr textures = pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, num_textures * tex_entry_size, tex_entry_size); void *texture = textures.cpu; for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { if (!desc_state->sets[i].set) continue; memcpy(texture, desc_state->sets[i].set->textures, desc_state->sets[i].set->layout->num_textures * tex_entry_size); texture += desc_state->sets[i].set->layout->num_textures * tex_entry_size; } desc_state->textures = textures.gpu; } static void panvk_cmd_prepare_samplers(struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_bind_point_state *bind_point_state) { struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; const struct panvk_pipeline *pipeline = bind_point_state->pipeline; unsigned num_samplers = pipeline->layout->num_samplers; if (!num_samplers || desc_state->samplers) return; struct panfrost_ptr samplers = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, num_samplers, SAMPLER); void *sampler = samplers.cpu; for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { if (!desc_state->sets[i].set) continue; memcpy(sampler, desc_state->sets[i].set->samplers, desc_state->sets[i].set->layout->num_samplers * pan_size(SAMPLER)); sampler += desc_state->sets[i].set->layout->num_samplers; } desc_state->samplers = samplers.gpu; } static void panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); if (!pipeline->fs.dynamic_rsd) { draw->fs_rsd = pipeline->rsds[MESA_SHADER_FRAGMENT]; return; } if (!cmdbuf->state.fs_rsd) { struct panfrost_ptr rsd = pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, PAN_DESC(RENDERER_STATE), PAN_DESC_ARRAY(pipeline->blend.state.rt_count, BLEND)); struct mali_renderer_state_packed rsd_dyn; struct mali_renderer_state_packed *rsd_templ = (struct mali_renderer_state_packed *)&pipeline->fs.rsd_template; STATIC_ASSERT(sizeof(pipeline->fs.rsd_template) >= sizeof(*rsd_templ)); panvk_per_arch(emit_dyn_fs_rsd)(pipeline, &cmdbuf->state, &rsd_dyn); pan_merge(rsd_dyn, (*rsd_templ), RENDERER_STATE); memcpy(rsd.cpu, &rsd_dyn, sizeof(rsd_dyn)); void *bd = rsd.cpu + pan_size(RENDERER_STATE); for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) { if (pipeline->blend.constant[i].index != ~0) { struct mali_blend_packed bd_dyn; struct mali_blend_packed *bd_templ = (struct mali_blend_packed *)&pipeline->blend.bd_template[i]; STATIC_ASSERT(sizeof(pipeline->blend.bd_template[0]) >= sizeof(*bd_templ)); panvk_per_arch(emit_blend_constant)(cmdbuf->device, pipeline, i, cmdbuf->state.blend.constants, &bd_dyn); pan_merge(bd_dyn, (*bd_templ), BLEND); memcpy(bd, &bd_dyn, sizeof(bd_dyn)); } bd += pan_size(BLEND); } cmdbuf->state.fs_rsd = rsd.gpu; } draw->fs_rsd = cmdbuf->state.fs_rsd; } #if PAN_ARCH >= 6 void panvk_per_arch(cmd_get_tiler_context)(struct panvk_cmd_buffer *cmdbuf, unsigned width, unsigned height) { struct panvk_batch *batch = cmdbuf->state.batch; if (batch->tiler.descs.cpu) return; batch->tiler.descs = pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, PAN_DESC(TILER_CONTEXT), PAN_DESC(TILER_HEAP)); STATIC_ASSERT(sizeof(batch->tiler.templ) >= pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP)); struct panfrost_ptr desc = { .gpu = batch->tiler.descs.gpu, .cpu = batch->tiler.templ, }; panvk_per_arch(emit_tiler_context)(cmdbuf->device, width, height, &desc); memcpy(batch->tiler.descs.cpu, batch->tiler.templ, pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP)); batch->tiler.ctx.bifrost = batch->tiler.descs.gpu; } #endif void panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf) { const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; #if PAN_ARCH == 5 panvk_per_arch(cmd_get_polygon_list)(cmdbuf, fbinfo->width, fbinfo->height, true); #else panvk_per_arch(cmd_get_tiler_context)(cmdbuf, fbinfo->width, fbinfo->height); #endif } static void panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { struct panvk_batch *batch = cmdbuf->state.batch; panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf); draw->tiler_ctx = &batch->tiler.ctx; } static void panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); struct panvk_varyings_info *varyings = &cmdbuf->state.varyings; panvk_varyings_alloc(varyings, &cmdbuf->varying_pool.base, draw->vertex_count); unsigned buf_count = panvk_varyings_buf_count(varyings); struct panfrost_ptr bufs = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, buf_count, ATTRIBUTE_BUFFER); panvk_per_arch(emit_varying_bufs)(varyings, bufs.cpu); if (BITSET_TEST(varyings->active, VARYING_SLOT_POS)) { draw->position = varyings->buf[varyings->varying[VARYING_SLOT_POS].buf].address + varyings->varying[VARYING_SLOT_POS].offset; } if (BITSET_TEST(varyings->active, VARYING_SLOT_PSIZ)) { draw->psiz = varyings->buf[varyings->varying[VARYING_SLOT_PSIZ].buf].address + varyings->varying[VARYING_SLOT_POS].offset; } else if (pipeline->ia.topology == MALI_DRAW_MODE_LINES || pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP || pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) { draw->line_width = pipeline->dynamic_state_mask & PANVK_DYNAMIC_LINE_WIDTH ? cmdbuf->state.rast.line_width : pipeline->rast.line_width; } else { draw->line_width = 1.0f; } draw->varying_bufs = bufs.gpu; for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { if (!varyings->stage[s].count) continue; struct panfrost_ptr attribs = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, varyings->stage[s].count, ATTRIBUTE); panvk_per_arch(emit_varyings)(cmdbuf->device, varyings, s, attribs.cpu); draw->stages[s].varyings = attribs.gpu; } } static void panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); /* TODO: images */ if (!pipeline->attribs.buf_count) return; if (cmdbuf->state.vb.attribs) { draw->stages[MESA_SHADER_VERTEX].attributes = cmdbuf->state.vb.attribs; draw->attribute_bufs = cmdbuf->state.vb.attrib_bufs; return; } unsigned buf_count = pipeline->attribs.buf_count + (PAN_ARCH >= 6 ? 1 : 0); struct panfrost_ptr bufs = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, buf_count * 2, ATTRIBUTE_BUFFER); panvk_per_arch(emit_attrib_bufs)(&pipeline->attribs, cmdbuf->state.vb.bufs, cmdbuf->state.vb.count, draw, bufs.cpu); cmdbuf->state.vb.attrib_bufs = bufs.gpu; struct panfrost_ptr attribs = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, pipeline->attribs.attrib_count, ATTRIBUTE); panvk_per_arch(emit_attribs)(cmdbuf->device, &pipeline->attribs, cmdbuf->state.vb.bufs, cmdbuf->state.vb.count, attribs.cpu); cmdbuf->state.vb.attribs = attribs.gpu; draw->stages[MESA_SHADER_VERTEX].attributes = cmdbuf->state.vb.attribs; draw->attribute_bufs = cmdbuf->state.vb.attrib_bufs; } static void panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); if (pipeline->vpd) { draw->viewport = pipeline->vpd; } else if (cmdbuf->state.vpd) { draw->viewport = cmdbuf->state.vpd; } else { struct panfrost_ptr vp = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, VIEWPORT); const VkViewport *viewport = pipeline->dynamic_state_mask & PANVK_DYNAMIC_VIEWPORT ? &cmdbuf->state.viewport : &pipeline->viewport; const VkRect2D *scissor = pipeline->dynamic_state_mask & PANVK_DYNAMIC_SCISSOR ? &cmdbuf->state.scissor : &pipeline->scissor; panvk_per_arch(emit_viewport)(viewport, scissor, vp.cpu); draw->viewport = cmdbuf->state.vpd = vp.gpu; } } static void panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); struct panvk_batch *batch = cmdbuf->state.batch; struct panfrost_ptr ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.vertex = ptr; panvk_per_arch(emit_vertex_job)(pipeline, draw, ptr.cpu); } static void panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); struct panvk_batch *batch = cmdbuf->state.batch; struct panfrost_ptr ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB); util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.tiler = ptr; panvk_per_arch(emit_tiler_job)(pipeline, draw, ptr.cpu); } void panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); struct panvk_batch *batch = cmdbuf->state.batch; struct panvk_cmd_bind_point_state *bind_point_state = panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS); const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); /* There are only 16 bits in the descriptor for the job ID, make sure all * the 3 (2 in Bifrost) jobs in this draw are in the same batch. */ if (batch->scoreboard.job_index >= (UINT16_MAX - 3)) { panvk_per_arch(cmd_close_batch)(cmdbuf); panvk_cmd_preload_fb_after_batch_split(cmdbuf); batch = panvk_cmd_open_batch(cmdbuf); } if (pipeline->fs.required) panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); panvk_cmd_prepare_ubos(cmdbuf, bind_point_state); panvk_cmd_prepare_textures(cmdbuf, bind_point_state); panvk_cmd_prepare_samplers(cmdbuf, bind_point_state); /* TODO: indexed draws */ struct panvk_descriptor_state *desc_state = panvk_cmd_get_desc_state(cmdbuf, GRAPHICS); struct panvk_draw_info draw = { .first_vertex = firstVertex, .vertex_count = vertexCount, .first_instance = firstInstance, .instance_count = instanceCount, .padded_vertex_count = panfrost_padded_vertex_count(vertexCount), .offset_start = firstVertex, .tls = batch->tls.gpu, .fb = batch->fb.desc.gpu, .ubos = desc_state->ubos, .textures = desc_state->textures, .samplers = desc_state->samplers, }; STATIC_ASSERT(sizeof(draw.invocation) >= sizeof(struct mali_invocation_packed)); panfrost_pack_work_groups_compute((struct mali_invocation_packed *)&draw.invocation, 1, vertexCount, instanceCount, 1, 1, 1, true, false); panvk_draw_prepare_fs_rsd(cmdbuf, &draw); panvk_draw_prepare_varyings(cmdbuf, &draw); panvk_draw_prepare_attributes(cmdbuf, &draw); panvk_draw_prepare_viewport(cmdbuf, &draw); panvk_draw_prepare_tiler_context(cmdbuf, &draw); panvk_draw_prepare_vertex_job(cmdbuf, &draw); panvk_draw_prepare_tiler_job(cmdbuf, &draw); batch->tlsinfo.tls.size = MAX2(pipeline->tls_size, batch->tlsinfo.tls.size); assert(!pipeline->wls_size); unsigned vjob_id = panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, false, 0, 0, &draw.jobs.vertex, false); if (pipeline->fs.required) { panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, MALI_JOB_TYPE_TILER, false, false, vjob_id, 0, &draw.jobs.tiler, false); } /* Clear the dirty flags all at once */ cmdbuf->state.dirty = 0; } VkResult panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); panvk_per_arch(cmd_close_batch)(cmdbuf); cmdbuf->status = PANVK_CMD_BUFFER_STATUS_EXECUTABLE; return cmdbuf->record_result; } void panvk_per_arch(CmdEndRenderPass2)(VkCommandBuffer commandBuffer, const VkSubpassEndInfoKHR *pSubpassEndInfo) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); panvk_per_arch(cmd_close_batch)(cmdbuf); vk_free(&cmdbuf->pool->alloc, cmdbuf->state.clear); cmdbuf->state.batch = NULL; cmdbuf->state.pass = NULL; cmdbuf->state.subpass = NULL; cmdbuf->state.framebuffer = NULL; cmdbuf->state.clear = NULL; } void panvk_per_arch(CmdEndRenderPass)(VkCommandBuffer cmd) { VkSubpassEndInfoKHR einfo = { .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, }; panvk_per_arch(CmdEndRenderPass2)(cmd, &einfo); } void panvk_per_arch(CmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, VkDependencyFlags dependencyFlags, uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier *pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier *pImageMemoryBarriers) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); /* Caches are flushed/invalidated at batch boundaries for now, nothing to do * for memory barriers assuming we implement barriers with the creation of a * new batch. * FIXME: We can probably do better with a CacheFlush job that has the * barrier flag set to true. */ if (cmdbuf->state.batch) { panvk_per_arch(cmd_close_batch)(cmdbuf); panvk_cmd_preload_fb_after_batch_split(cmdbuf); panvk_cmd_open_batch(cmdbuf); } } static void panvk_add_set_event_operation(struct panvk_cmd_buffer *cmdbuf, struct panvk_event *event, enum panvk_event_op_type type) { struct panvk_event_op op = { .type = type, .event = event, }; if (cmdbuf->state.batch == NULL) { /* No open batch, let's create a new one so this operation happens in * the right order. */ panvk_cmd_open_batch(cmdbuf); util_dynarray_append(&cmdbuf->state.batch->event_ops, struct panvk_event_op, op); panvk_per_arch(cmd_close_batch)(cmdbuf); } else { /* Let's close the current batch so the operation executes before any * future commands. */ util_dynarray_append(&cmdbuf->state.batch->event_ops, struct panvk_event_op, op); panvk_per_arch(cmd_close_batch)(cmdbuf); panvk_cmd_preload_fb_after_batch_split(cmdbuf); panvk_cmd_open_batch(cmdbuf); } } static void panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf, struct panvk_event *event) { struct panvk_event_op op = { .type = PANVK_EVENT_OP_WAIT, .event = event, }; if (cmdbuf->state.batch == NULL) { /* No open batch, let's create a new one and have it wait for this event. */ panvk_cmd_open_batch(cmdbuf); util_dynarray_append(&cmdbuf->state.batch->event_ops, struct panvk_event_op, op); } else { /* Let's close the current batch so any future commands wait on the * event signal operation. */ if (cmdbuf->state.batch->fragment_job || cmdbuf->state.batch->scoreboard.first_job) { panvk_per_arch(cmd_close_batch)(cmdbuf); panvk_cmd_preload_fb_after_batch_split(cmdbuf); panvk_cmd_open_batch(cmdbuf); } util_dynarray_append(&cmdbuf->state.batch->event_ops, struct panvk_event_op, op); } } void panvk_per_arch(CmdSetEvent)(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags stageMask) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); VK_FROM_HANDLE(panvk_event, event, _event); /* vkCmdSetEvent cannot be called inside a render pass */ assert(cmdbuf->state.pass == NULL); panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_SET); } void panvk_per_arch(CmdResetEvent)(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags stageMask) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); VK_FROM_HANDLE(panvk_event, event, _event); /* vkCmdResetEvent cannot be called inside a render pass */ assert(cmdbuf->state.pass == NULL); panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_RESET); } void panvk_per_arch(CmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier *pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier *pImageMemoryBarriers) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); assert(eventCount > 0); for (uint32_t i = 0; i < eventCount; i++) { VK_FROM_HANDLE(panvk_event, event, pEvents[i]); panvk_add_wait_event_operation(cmdbuf, event); } } static VkResult panvk_reset_cmdbuf(struct panvk_cmd_buffer *cmdbuf) { vk_command_buffer_reset(&cmdbuf->vk); cmdbuf->record_result = VK_SUCCESS; list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) { list_del(&batch->node); util_dynarray_fini(&batch->jobs); #if PAN_ARCH <= 5 panfrost_bo_unreference(batch->tiler.ctx.midgard.polygon_list); #endif util_dynarray_fini(&batch->event_ops); vk_free(&cmdbuf->pool->alloc, batch); } panvk_pool_reset(&cmdbuf->desc_pool); panvk_pool_reset(&cmdbuf->tls_pool); panvk_pool_reset(&cmdbuf->varying_pool); cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL; for (unsigned i = 0; i < MAX_BIND_POINTS; i++) memset(&cmdbuf->bind_points[i].desc_state.sets, 0, sizeof(cmdbuf->bind_points[0].desc_state.sets)); return cmdbuf->record_result; } static void panvk_destroy_cmdbuf(struct panvk_cmd_buffer *cmdbuf) { struct panvk_device *device = cmdbuf->device; list_del(&cmdbuf->pool_link); list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) { list_del(&batch->node); util_dynarray_fini(&batch->jobs); #if PAN_ARCH <= 5 panfrost_bo_unreference(batch->tiler.ctx.midgard.polygon_list); #endif util_dynarray_fini(&batch->event_ops); vk_free(&cmdbuf->pool->alloc, batch); } panvk_pool_cleanup(&cmdbuf->desc_pool); panvk_pool_cleanup(&cmdbuf->tls_pool); panvk_pool_cleanup(&cmdbuf->varying_pool); vk_command_buffer_finish(&cmdbuf->vk); vk_free(&device->vk.alloc, cmdbuf); } static VkResult panvk_create_cmdbuf(struct panvk_device *device, struct panvk_cmd_pool *pool, VkCommandBufferLevel level, struct panvk_cmd_buffer **cmdbuf_out) { struct panvk_cmd_buffer *cmdbuf; cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!cmdbuf) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); VkResult result = vk_command_buffer_init(&cmdbuf->vk, &device->vk); if (result != VK_SUCCESS) { vk_free(&device->vk.alloc, cmdbuf); return result; } cmdbuf->device = device; cmdbuf->level = level; cmdbuf->pool = pool; if (pool) { list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers); cmdbuf->queue_family_index = pool->queue_family_index; } else { /* Init the pool_link so we can safely call list_del when we destroy * the command buffer */ list_inithead(&cmdbuf->pool_link); cmdbuf->queue_family_index = PANVK_QUEUE_GENERAL; } panvk_pool_init(&cmdbuf->desc_pool, &device->physical_device->pdev, pool ? &pool->desc_bo_pool : NULL, 0, 64 * 1024, "Command buffer descriptor pool", true); panvk_pool_init(&cmdbuf->tls_pool, &device->physical_device->pdev, pool ? &pool->tls_bo_pool : NULL, PAN_BO_INVISIBLE, 64 * 1024, "TLS pool", false); panvk_pool_init(&cmdbuf->varying_pool, &device->physical_device->pdev, pool ? &pool->varying_bo_pool : NULL, PAN_BO_INVISIBLE, 64 * 1024, "Varyings pool", false); list_inithead(&cmdbuf->batches); cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL; *cmdbuf_out = cmdbuf; return VK_SUCCESS; } VkResult panvk_per_arch(AllocateCommandBuffers)(VkDevice _device, const VkCommandBufferAllocateInfo *pAllocateInfo, VkCommandBuffer *pCommandBuffers) { VK_FROM_HANDLE(panvk_device, device, _device); VK_FROM_HANDLE(panvk_cmd_pool, pool, pAllocateInfo->commandPool); VkResult result = VK_SUCCESS; unsigned i; for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { struct panvk_cmd_buffer *cmdbuf = NULL; if (!list_is_empty(&pool->free_cmd_buffers)) { cmdbuf = list_first_entry( &pool->free_cmd_buffers, struct panvk_cmd_buffer, pool_link); list_del(&cmdbuf->pool_link); list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers); cmdbuf->level = pAllocateInfo->level; vk_command_buffer_finish(&cmdbuf->vk); result = vk_command_buffer_init(&cmdbuf->vk, &device->vk); } else { result = panvk_create_cmdbuf(device, pool, pAllocateInfo->level, &cmdbuf); } if (result != VK_SUCCESS) goto err_free_cmd_bufs; pCommandBuffers[i] = panvk_cmd_buffer_to_handle(cmdbuf); } return VK_SUCCESS; err_free_cmd_bufs: panvk_per_arch(FreeCommandBuffers)(_device, pAllocateInfo->commandPool, i, pCommandBuffers); for (unsigned j = 0; j < i; j++) pCommandBuffers[j] = VK_NULL_HANDLE; return result; } void panvk_per_arch(FreeCommandBuffers)(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, const VkCommandBuffer *pCommandBuffers) { for (uint32_t i = 0; i < commandBufferCount; i++) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, pCommandBuffers[i]); if (cmdbuf) { if (cmdbuf->pool) { list_del(&cmdbuf->pool_link); panvk_reset_cmdbuf(cmdbuf); list_addtail(&cmdbuf->pool_link, &cmdbuf->pool->free_cmd_buffers); } else panvk_destroy_cmdbuf(cmdbuf); } } } VkResult panvk_per_arch(ResetCommandBuffer)(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); return panvk_reset_cmdbuf(cmdbuf); } VkResult panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); VkResult result = VK_SUCCESS; if (cmdbuf->status != PANVK_CMD_BUFFER_STATUS_INITIAL) { /* If the command buffer has already been reset with * vkResetCommandBuffer, no need to do it again. */ result = panvk_reset_cmdbuf(cmdbuf); if (result != VK_SUCCESS) return result; } memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); cmdbuf->status = PANVK_CMD_BUFFER_STATUS_RECORDING; return VK_SUCCESS; } void panvk_per_arch(DestroyCommandPool)(VkDevice _device, VkCommandPool commandPool, const VkAllocationCallbacks *pAllocator) { VK_FROM_HANDLE(panvk_device, device, _device); VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, &pool->active_cmd_buffers, pool_link) panvk_destroy_cmdbuf(cmdbuf); list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, &pool->free_cmd_buffers, pool_link) panvk_destroy_cmdbuf(cmdbuf); panvk_bo_pool_cleanup(&pool->desc_bo_pool); panvk_bo_pool_cleanup(&pool->varying_bo_pool); panvk_bo_pool_cleanup(&pool->tls_bo_pool); vk_object_free(&device->vk, pAllocator, pool); } VkResult panvk_per_arch(ResetCommandPool)(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags) { VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); VkResult result; list_for_each_entry(struct panvk_cmd_buffer, cmdbuf, &pool->active_cmd_buffers, pool_link) { result = panvk_reset_cmdbuf(cmdbuf); if (result != VK_SUCCESS) return result; } return VK_SUCCESS; } void panvk_per_arch(TrimCommandPool)(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlags flags) { VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); if (!pool) return; list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, &pool->free_cmd_buffers, pool_link) panvk_destroy_cmdbuf(cmdbuf); }