/* * Copyright (C) 2021 Collabora, Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Authors: * Alyssa Rosenzweig * Boris Brezillon */ #include "util/macros.h" #include "panfrost-quirks.h" #include "pan_cs.h" #include "pan_encoder.h" #include "pan_texture.h" static unsigned mod_to_block_fmt(uint64_t mod) { switch (mod) { case DRM_FORMAT_MOD_LINEAR: return MALI_BLOCK_FORMAT_LINEAR; case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED: return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED; default: #if PAN_ARCH >= 5 if (drm_is_afbc(mod)) return MALI_BLOCK_FORMAT_AFBC; #endif unreachable("Unsupported modifer"); } } static enum mali_msaa mali_sampling_mode(const struct pan_image_view *view) { if (view->image->layout.nr_samples > 1) { assert(view->nr_samples == view->image->layout.nr_samples); assert(view->image->layout.slices[0].surface_stride != 0); return MALI_MSAA_LAYERED; } if (view->nr_samples > view->image->layout.nr_samples) { assert(view->image->layout.nr_samples == 1); return MALI_MSAA_AVERAGE; } assert(view->nr_samples == view->image->layout.nr_samples); assert(view->nr_samples == 1); return MALI_MSAA_SINGLE; } static inline enum mali_sample_pattern pan_sample_pattern(unsigned samples) { switch (samples) { case 1: return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED; case 4: return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID; case 8: return MALI_SAMPLE_PATTERN_D3D_8X_GRID; case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID; default: unreachable("Unsupported sample count"); } } int GENX(pan_select_crc_rt)(const struct pan_fb_info *fb) { #if PAN_ARCH <= 6 if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard && fb->rts[0].view->image->layout.crc_mode != PAN_IMAGE_CRC_NONE) return 0; return -1; #else bool best_rt_valid = false; int best_rt = -1; for (unsigned i = 0; i < fb->rt_count; i++) { if (!fb->rts[i].view || fb->rts[0].discard || fb->rts[i].view->image->layout.crc_mode == PAN_IMAGE_CRC_NONE) continue; bool valid = *(fb->rts[i].crc_valid); bool full = !fb->extent.minx && !fb->extent.miny && fb->extent.maxx == (fb->width - 1) && fb->extent.maxy == (fb->height - 1); if (!full && !valid) continue; if (best_rt < 0 || (valid && !best_rt_valid)) { best_rt = i; best_rt_valid = valid; } if (valid) break; } return best_rt; #endif } static enum mali_zs_format translate_zs_format(enum pipe_format in) { switch (in) { case PIPE_FORMAT_Z16_UNORM: return MALI_ZS_FORMAT_D16; case PIPE_FORMAT_Z24_UNORM_S8_UINT: return MALI_ZS_FORMAT_D24S8; case PIPE_FORMAT_Z24X8_UNORM: return MALI_ZS_FORMAT_D24X8; case PIPE_FORMAT_Z32_FLOAT: return MALI_ZS_FORMAT_D32; case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return MALI_ZS_FORMAT_D32_S8X24; default: unreachable("Unsupported depth/stencil format."); } } #if PAN_ARCH >= 5 static enum mali_s_format translate_s_format(enum pipe_format in) { switch (in) { case PIPE_FORMAT_S8_UINT: return MALI_S_FORMAT_S8; case PIPE_FORMAT_S8_UINT_Z24_UNORM: case PIPE_FORMAT_S8X24_UINT: return MALI_S_FORMAT_S8X24; case PIPE_FORMAT_Z24_UNORM_S8_UINT: case PIPE_FORMAT_X24S8_UINT: return MALI_S_FORMAT_X24S8; case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return MALI_S_FORMAT_X32_S8X24; default: unreachable("Unsupported stencil format."); } } static void pan_prepare_s(const struct pan_fb_info *fb, struct MALI_ZS_CRC_EXTENSION *ext) { const struct pan_image_view *s = fb->zs.view.s; if (!s) return; unsigned level = s->first_level; ext->s_msaa = mali_sampling_mode(s); struct pan_surface surf; pan_iview_get_surface(s, 0, 0, 0, &surf); assert(s->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || s->image->layout.modifier == DRM_FORMAT_MOD_LINEAR); ext->s_writeback_base = surf.data; ext->s_writeback_row_stride = s->image->layout.slices[level].row_stride; ext->s_writeback_surface_stride = (s->image->layout.nr_samples > 1) ? s->image->layout.slices[level].surface_stride : 0; ext->s_block_format = mod_to_block_fmt(s->image->layout.modifier); ext->s_write_format = translate_s_format(s->format); } static void pan_prepare_zs(const struct pan_fb_info *fb, struct MALI_ZS_CRC_EXTENSION *ext) { const struct pan_image_view *zs = fb->zs.view.zs; if (!zs) return; unsigned level = zs->first_level; ext->zs_msaa = mali_sampling_mode(zs); struct pan_surface surf; pan_iview_get_surface(zs, 0, 0, 0, &surf); if (drm_is_afbc(zs->image->layout.modifier)) { #if PAN_ARCH >= 6 const struct pan_image_slice_layout *slice = &zs->image->layout.slices[level]; ext->zs_afbc_row_stride = slice->afbc.row_stride / AFBC_HEADER_BYTES_PER_TILE; #else ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC; ext->zs_afbc_body_size = 0x1000; ext->zs_afbc_chunk_size = 9; ext->zs_afbc_sparse = true; #endif ext->zs_afbc_header = surf.afbc.header; ext->zs_afbc_body = surf.afbc.body; } else { assert(zs->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || zs->image->layout.modifier == DRM_FORMAT_MOD_LINEAR); /* TODO: Z32F(S8) support, which is always linear */ ext->zs_writeback_base = surf.data; ext->zs_writeback_row_stride = zs->image->layout.slices[level].row_stride; ext->zs_writeback_surface_stride = (zs->image->layout.nr_samples > 1) ? zs->image->layout.slices[level].surface_stride : 0; } ext->zs_block_format = mod_to_block_fmt(zs->image->layout.modifier); ext->zs_write_format = translate_zs_format(zs->format); if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8) ext->s_writeback_base = ext->zs_writeback_base; } static void pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc, struct MALI_ZS_CRC_EXTENSION *ext) { if (rt_crc < 0) return; assert(rt_crc < fb->rt_count); const struct pan_image_view *rt = fb->rts[rt_crc].view; const struct pan_image_slice_layout *slice = &rt->image->layout.slices[rt->first_level]; ext->crc_base = (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND ? (rt->image->data.bo->ptr.gpu + rt->image->data.offset) : (rt->image->crc.bo->ptr.gpu + rt->image->crc.offset)) + slice->crc.offset; ext->crc_row_stride = slice->crc.stride; #if PAN_ARCH >= 7 ext->crc_render_target = rt_crc; if (fb->rts[rt_crc].clear) { uint32_t clear_val = fb->rts[rt_crc].clear_value[0]; ext->crc_clear_color = clear_val | 0xc000000000000000 | (((uint64_t)clear_val & 0xffff) << 32); } #endif } static void pan_emit_zs_crc_ext(const struct pan_fb_info *fb, int rt_crc, void *zs_crc_ext) { pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) { pan_prepare_crc(fb, rt_crc, &cfg); cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s; pan_prepare_zs(fb, &cfg); pan_prepare_s(fb, &cfg); } } /* Measure format as it appears in the tile buffer */ static unsigned pan_bytes_per_pixel_tib(enum pipe_format format) { if (panfrost_blendable_formats_v7[format].internal) { /* Blendable formats are always 32-bits in the tile buffer, * extra bits are used as padding or to dither */ return 4; } else { /* Non-blendable formats are raw, rounded up to the nearest * power-of-two size */ unsigned bytes = util_format_get_blocksize(format); return util_next_power_of_two(bytes); } } static unsigned pan_internal_cbuf_size(const struct pan_fb_info *fb, unsigned *tile_size) { unsigned total_size = 0; *tile_size = 16 * 16; for (int cb = 0; cb < fb->rt_count; ++cb) { const struct pan_image_view *rt = fb->rts[cb].view; if (!rt) continue; total_size += pan_bytes_per_pixel_tib(rt->format) * rt->nr_samples * (*tile_size); } /* We have a 4KB budget, let's reduce the tile size until it fits. */ while (total_size > 4096) { total_size >>= 1; *tile_size >>= 1; } /* Align on 1k. */ total_size = ALIGN_POT(total_size, 1024); /* Minimum tile size is 4x4. */ assert(*tile_size >= 4 * 4); return total_size; } static enum mali_color_format pan_mfbd_raw_format(unsigned bits) { switch (bits) { case 8: return MALI_COLOR_FORMAT_RAW8; case 16: return MALI_COLOR_FORMAT_RAW16; case 24: return MALI_COLOR_FORMAT_RAW24; case 32: return MALI_COLOR_FORMAT_RAW32; case 48: return MALI_COLOR_FORMAT_RAW48; case 64: return MALI_COLOR_FORMAT_RAW64; case 96: return MALI_COLOR_FORMAT_RAW96; case 128: return MALI_COLOR_FORMAT_RAW128; case 192: return MALI_COLOR_FORMAT_RAW192; case 256: return MALI_COLOR_FORMAT_RAW256; case 384: return MALI_COLOR_FORMAT_RAW384; case 512: return MALI_COLOR_FORMAT_RAW512; case 768: return MALI_COLOR_FORMAT_RAW768; case 1024: return MALI_COLOR_FORMAT_RAW1024; case 1536: return MALI_COLOR_FORMAT_RAW1536; case 2048: return MALI_COLOR_FORMAT_RAW2048; default: unreachable("invalid raw bpp"); } } static void pan_rt_init_format(const struct pan_image_view *rt, struct MALI_RENDER_TARGET *cfg) { /* Explode details on the format */ const struct util_format_description *desc = util_format_description(rt->format); /* The swizzle for rendering is inverted from texturing */ unsigned char swizzle[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W, }; /* Fill in accordingly, defaulting to 8-bit UNORM */ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) cfg->srgb = true; struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format]; if (fmt.internal) { cfg->internal_format = fmt.internal; cfg->writeback_format = fmt.writeback; panfrost_invert_swizzle(desc->swizzle, swizzle); } else { /* Construct RAW internal/writeback, where internal is * specified logarithmically (round to next power-of-two). * Offset specified from RAW8, where 8 = 2^3 */ unsigned bits = desc->block.bits; unsigned offset = util_logbase2_ceil(bits) - 3; assert(offset <= 4); cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset; cfg->writeback_format = pan_mfbd_raw_format(bits); } cfg->swizzle = panfrost_translate_swizzle_4(swizzle); } static void pan_prepare_rt(const struct pan_fb_info *fb, unsigned idx, unsigned cbuf_offset, struct MALI_RENDER_TARGET *cfg) { cfg->clean_pixel_write_enable = fb->rts[idx].clear; cfg->internal_buffer_offset = cbuf_offset; if (fb->rts[idx].clear) { cfg->clear.color_0 = fb->rts[idx].clear_value[0]; cfg->clear.color_1 = fb->rts[idx].clear_value[1]; cfg->clear.color_2 = fb->rts[idx].clear_value[2]; cfg->clear.color_3 = fb->rts[idx].clear_value[3]; } const struct pan_image_view *rt = fb->rts[idx].view; if (!rt || fb->rts[idx].discard) { cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8; cfg->internal_buffer_offset = cbuf_offset; #if PAN_ARCH >= 7 cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED; cfg->dithering_enable = true; #endif return; } cfg->write_enable = true; cfg->dithering_enable = true; unsigned level = rt->first_level; assert(rt->last_level == rt->first_level); assert(rt->last_layer == rt->first_layer); int row_stride = rt->image->layout.slices[level].row_stride; /* Only set layer_stride for layered MSAA rendering */ unsigned layer_stride = (rt->image->layout.nr_samples > 1) ? rt->image->layout.slices[level].surface_stride : 0; cfg->writeback_msaa = mali_sampling_mode(rt); pan_rt_init_format(rt, cfg); #if PAN_ARCH <= 5 cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier); #else cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier); #endif struct pan_surface surf; pan_iview_get_surface(rt, 0, 0, 0, &surf); if (drm_is_afbc(rt->image->layout.modifier)) { const struct pan_image_slice_layout *slice = &rt->image->layout.slices[level]; #if PAN_ARCH >= 6 cfg->afbc.row_stride = slice->afbc.row_stride / AFBC_HEADER_BYTES_PER_TILE; cfg->afbc.afbc_wide_block_enable = panfrost_block_dim(rt->image->layout.modifier, true, 0) > 16; #else cfg->afbc.chunk_size = 9; cfg->afbc.sparse = true; cfg->afbc.body_size = slice->afbc.body_size; #endif cfg->afbc.header = surf.afbc.header; cfg->afbc.body = surf.afbc.body; if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR) cfg->afbc.yuv_transform_enable = true; } else { assert(rt->image->layout.modifier == DRM_FORMAT_MOD_LINEAR || rt->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED); cfg->rgb.base = surf.data; cfg->rgb.row_stride = row_stride; cfg->rgb.surface_stride = layer_stride; } } #endif void GENX(pan_emit_tls)(const struct pan_tls_info *info, void *out) { pan_pack(out, LOCAL_STORAGE, cfg) { if (info->tls.size) { unsigned shift = panfrost_get_stack_shift(info->tls.size); cfg.tls_size = shift; cfg.tls_base_pointer = info->tls.ptr; } if (info->wls.size) { assert(!(info->wls.ptr & 4095)); assert((info->wls.ptr & 0xffffffff00000000ULL) == ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL)); cfg.wls_base_pointer = info->wls.ptr; unsigned wls_size = pan_wls_adjust_size(info->wls.size); cfg.wls_instances = pan_wls_instances(&info->wls.dim); cfg.wls_size_scale = util_logbase2(wls_size) + 1; } else { cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM; } } } #if PAN_ARCH <= 5 static void pan_emit_midgard_tiler(const struct panfrost_device *dev, const struct pan_fb_info *fb, const struct pan_tiler_context *tiler_ctx, void *out) { bool hierarchy = !(dev->quirks & MIDGARD_NO_HIER_TILING); assert(tiler_ctx->midgard.polygon_list->ptr.gpu); pan_pack(out, TILER_CONTEXT, cfg) { unsigned header_size; if (tiler_ctx->midgard.disable) { cfg.hierarchy_mask = hierarchy ? MALI_MIDGARD_TILER_DISABLED : MALI_MIDGARD_TILER_USER; header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE; cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4); cfg.heap_start = tiler_ctx->midgard.polygon_list->ptr.gpu; cfg.heap_end = tiler_ctx->midgard.polygon_list->ptr.gpu; } else { cfg.hierarchy_mask = panfrost_choose_hierarchy_mask(fb->width, fb->height, 1, hierarchy); header_size = panfrost_tiler_header_size(fb->width, fb->height, cfg.hierarchy_mask, hierarchy); cfg.polygon_list_size = panfrost_tiler_full_size(fb->width, fb->height, cfg.hierarchy_mask, hierarchy); cfg.heap_start = dev->tiler_heap->ptr.gpu; cfg.heap_end = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size; } cfg.polygon_list = tiler_ctx->midgard.polygon_list->ptr.gpu; cfg.polygon_list_body = cfg.polygon_list + header_size; } } #endif #if PAN_ARCH >= 5 static void pan_emit_rt(const struct pan_fb_info *fb, unsigned idx, unsigned cbuf_offset, void *out) { pan_pack(out, RENDER_TARGET, cfg) { pan_prepare_rt(fb, idx, cbuf_offset, &cfg); } } #if PAN_ARCH >= 6 /* All Bifrost and Valhall GPUs are affected by issue TSIX-2033: * * Forcing clean_tile_writes breaks INTERSECT readbacks * * To workaround, use the frame shader mode ALWAYS instead of INTERSECT if * clean tile writes is forced. Since INTERSECT is a hint that the hardware may * ignore, this cannot affect correctness, only performance */ static enum mali_pre_post_frame_shader_mode pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode, bool force_clean_tile) { if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT) return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS; else return mode; } /* Regardless of clean_tile_write_enable, the hardware writes clean tiles if * the effective tile size differs from the superblock size of any enabled AFBC * render target. Check this condition. */ static bool pan_force_clean_write_rt(const struct pan_image_view *rt, unsigned tile_size) { if (!drm_is_afbc(rt->image->layout.modifier)) return false; unsigned superblock = panfrost_block_dim(rt->image->layout.modifier, true, 0); assert(superblock >= 16); assert(tile_size <= 16*16); /* Tile size and superblock differ unless they are both 16x16 */ return !(superblock == 16 && tile_size == 16*16); } static bool pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size) { /* Maximum tile size */ assert(tile_size <= 16*16); for (unsigned i = 0; i < fb->rt_count; ++i) { if (fb->rts[i].view && !fb->rts[i].discard && pan_force_clean_write_rt(fb->rts[i].view, tile_size)) return true; } if (fb->zs.view.zs && !fb->zs.discard.z && pan_force_clean_write_rt(fb->zs.view.zs, tile_size)) return true; if (fb->zs.view.s && !fb->zs.discard.s && pan_force_clean_write_rt(fb->zs.view.s, tile_size)) return true; return false; } #endif static unsigned pan_emit_mfbd(const struct panfrost_device *dev, const struct pan_fb_info *fb, const struct pan_tls_info *tls, const struct pan_tiler_context *tiler_ctx, void *out) { unsigned tags = MALI_FBD_TAG_IS_MFBD; void *fbd = out; void *rtd = out + pan_size(FRAMEBUFFER); #if PAN_ARCH <= 5 GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE)); #endif unsigned tile_size; unsigned internal_cbuf_size = pan_internal_cbuf_size(fb, &tile_size); int crc_rt = GENX(pan_select_crc_rt)(fb); bool has_zs_crc_ext = pan_fbd_has_zs_crc_ext(fb); pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) { #if PAN_ARCH >= 6 bool force_clean_write = pan_force_clean_write(fb, tile_size); cfg.sample_locations = panfrost_sample_positions(dev, pan_sample_pattern(fb->nr_samples)); cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0], force_clean_write); cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1], force_clean_write); cfg.post_frame = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2], force_clean_write); cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu; cfg.tiler = tiler_ctx->bifrost; #endif cfg.width = fb->width; cfg.height = fb->height; cfg.bound_max_x = fb->width - 1; cfg.bound_max_y = fb->height - 1; cfg.effective_tile_size = tile_size; cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT; cfg.render_target_count = MAX2(fb->rt_count, 1); /* Default to 24 bit depth if there's no surface. */ cfg.z_internal_format = fb->zs.view.zs ? panfrost_get_z_internal_format(fb->zs.view.zs->format) : MALI_Z_INTERNAL_FORMAT_D24; cfg.z_clear = fb->zs.clear_value.depth; cfg.s_clear = fb->zs.clear_value.stencil; cfg.color_buffer_allocation = internal_cbuf_size; cfg.sample_count = fb->nr_samples; cfg.sample_pattern = pan_sample_pattern(fb->nr_samples); cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z); cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s); cfg.has_zs_crc_extension = has_zs_crc_ext; if (crc_rt >= 0) { bool *valid = fb->rts[crc_rt].crc_valid; bool full = !fb->extent.minx && !fb->extent.miny && fb->extent.maxx == (fb->width - 1) && fb->extent.maxy == (fb->height - 1); cfg.crc_read_enable = *valid; /* If the data is currently invalid, still write CRC * data if we are doing a full write, so that it is * valid for next time. */ cfg.crc_write_enable = *valid || full; *valid |= full; } } #if PAN_ARCH >= 6 pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding); #else pan_emit_midgard_tiler(dev, fb, tiler_ctx, pan_section_ptr(fbd, FRAMEBUFFER, TILER)); /* All weights set to 0, nothing to do here */ pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w); #endif if (has_zs_crc_ext) { pan_emit_zs_crc_ext(fb, crc_rt, out + pan_size(FRAMEBUFFER)); rtd += pan_size(ZS_CRC_EXTENSION); tags |= MALI_FBD_TAG_HAS_ZS_RT; } unsigned rt_count = MAX2(fb->rt_count, 1); unsigned cbuf_offset = 0; for (unsigned i = 0; i < rt_count; i++) { pan_emit_rt(fb, i, cbuf_offset, rtd); rtd += pan_size(RENDER_TARGET); if (!fb->rts[i].view) continue; cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) * tile_size * fb->rts[i].view->image->layout.nr_samples; if (i != crc_rt) *(fb->rts[i].crc_valid) = false; } tags |= MALI_POSITIVE(MAX2(fb->rt_count, 1)) << 2; return tags; } #else /* PAN_ARCH == 4 */ static void pan_emit_sfbd_tiler(const struct panfrost_device *dev, const struct pan_fb_info *fb, const struct pan_tiler_context *ctx, void *fbd) { pan_emit_midgard_tiler(dev, fb, ctx, pan_section_ptr(fbd, FRAMEBUFFER, TILER)); /* All weights set to 0, nothing to do here */ pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding); pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w); } static void pan_emit_sfbd(const struct panfrost_device *dev, const struct pan_fb_info *fb, const struct pan_tls_info *tls, const struct pan_tiler_context *tiler_ctx, void *fbd) { GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE)); pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) { cfg.bound_max_x = fb->width - 1; cfg.bound_max_y = fb->height - 1; cfg.dithering_enable = true; cfg.clean_pixel_write_enable = true; cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT; if (fb->rts[0].clear) { cfg.clear_color_0 = fb->rts[0].clear_value[0]; cfg.clear_color_1 = fb->rts[0].clear_value[1]; cfg.clear_color_2 = fb->rts[0].clear_value[2]; cfg.clear_color_3 = fb->rts[0].clear_value[3]; } if (fb->zs.clear.z) cfg.z_clear = fb->zs.clear_value.depth; if (fb->zs.clear.s) cfg.s_clear = fb->zs.clear_value.stencil; if (fb->rt_count && fb->rts[0].view) { const struct pan_image_view *rt = fb->rts[0].view; const struct util_format_description *desc = util_format_description(rt->format); /* The swizzle for rendering is inverted from texturing */ unsigned char swizzle[4]; panfrost_invert_swizzle(desc->swizzle, swizzle); cfg.swizzle = panfrost_translate_swizzle_4(swizzle); struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format]; if (fmt.internal) { cfg.internal_format = fmt.internal; cfg.color_writeback_format = fmt.writeback; } else { unreachable("raw formats not finished for SFBD"); } unsigned level = rt->first_level; struct pan_surface surf; pan_iview_get_surface(rt, 0, 0, 0, &surf); cfg.color_write_enable = !fb->rts[0].discard; cfg.color_writeback.base = surf.data; cfg.color_writeback.row_stride = rt->image->layout.slices[level].row_stride; cfg.color_block_format = mod_to_block_fmt(rt->image->layout.modifier); assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR || cfg.color_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED); if (rt->image->layout.crc_mode != PAN_IMAGE_CRC_NONE) { const struct pan_image_slice_layout *slice = &rt->image->layout.slices[level]; cfg.crc_buffer.row_stride = slice->crc.stride; if (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND) { cfg.crc_buffer.base = rt->image->data.bo->ptr.gpu + rt->image->data.offset + slice->crc.offset; } else { cfg.crc_buffer.base = rt->image->crc.bo->ptr.gpu + rt->image->crc.offset + slice->crc.offset; } } } if (fb->zs.view.zs) { const struct pan_image_view *zs = fb->zs.view.zs; unsigned level = zs->first_level; struct pan_surface surf; pan_iview_get_surface(zs, 0, 0, 0, &surf); cfg.zs_write_enable = !fb->zs.discard.z; cfg.zs_writeback.base = surf.data; cfg.zs_writeback.row_stride = zs->image->layout.slices[level].row_stride; cfg.zs_block_format = mod_to_block_fmt(zs->image->layout.modifier); assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR || cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED); cfg.zs_format = translate_zs_format(zs->format); } cfg.sample_count = fb->nr_samples; if (fb->rt_count) cfg.msaa = mali_sampling_mode(fb->rts[0].view); } pan_emit_sfbd_tiler(dev, fb, tiler_ctx, fbd); pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding); } #endif unsigned GENX(pan_emit_fbd)(const struct panfrost_device *dev, const struct pan_fb_info *fb, const struct pan_tls_info *tls, const struct pan_tiler_context *tiler_ctx, void *out) { #if PAN_ARCH == 4 assert(fb->rt_count <= 1); pan_emit_sfbd(dev, fb, tls, tiler_ctx, out); return 0; #else return pan_emit_mfbd(dev, fb, tls, tiler_ctx, out); #endif } #if PAN_ARCH >= 6 void GENX(pan_emit_tiler_heap)(const struct panfrost_device *dev, void *out) { pan_pack(out, TILER_HEAP, heap) { heap.size = dev->tiler_heap->size; heap.base = dev->tiler_heap->ptr.gpu; heap.bottom = dev->tiler_heap->ptr.gpu; heap.top = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size; } } void GENX(pan_emit_tiler_ctx)(const struct panfrost_device *dev, unsigned fb_width, unsigned fb_height, unsigned nr_samples, mali_ptr heap, void *out) { unsigned max_levels = dev->tiler_features.max_levels; assert(max_levels >= 2); pan_pack(out, TILER_CONTEXT, tiler) { /* TODO: Select hierarchy mask more effectively */ tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28; tiler.fb_width = fb_width; tiler.fb_height = fb_height; tiler.heap = heap; tiler.sample_pattern = pan_sample_pattern(nr_samples); } } #endif void GENX(pan_emit_fragment_job)(const struct pan_fb_info *fb, mali_ptr fbd, void *out) { pan_section_pack(out, FRAGMENT_JOB, HEADER, header) { header.type = MALI_JOB_TYPE_FRAGMENT; header.index = 1; } pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) { payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT; payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT; payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT; payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT; payload.framebuffer = fbd; #if PAN_ARCH >= 5 if (fb->tile_map.base) { payload.has_tile_enable_map = true; payload.tile_enable_map = fb->tile_map.base; payload.tile_enable_map_row_stride = fb->tile_map.stride; } #endif } }