/* * Copyright 2015 Advanced Micro Devices, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "ac_gpu_info.h" #include "ac_perfcounter.h" #include "util/u_memory.h" #include "macros.h" /* cik_CB */ static unsigned cik_CB_select0[] = { R_037004_CB_PERFCOUNTER0_SELECT, R_03700C_CB_PERFCOUNTER1_SELECT, R_037010_CB_PERFCOUNTER2_SELECT, R_037014_CB_PERFCOUNTER3_SELECT, }; static unsigned cik_CB_select1[] = { R_037008_CB_PERFCOUNTER0_SELECT1, }; static struct ac_pc_block_base cik_CB = { .gpu_block = CB, .name = "CB", .num_counters = 4, .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS, .select0 = cik_CB_select0, .select1 = cik_CB_select1, .counter0_lo = R_035018_CB_PERFCOUNTER0_LO, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0x0, }; /* cik_CPC */ static unsigned cik_CPC_select0[] = { R_036024_CPC_PERFCOUNTER0_SELECT, R_03600C_CPC_PERFCOUNTER1_SELECT, }; static unsigned cik_CPC_select1[] = { R_036010_CPC_PERFCOUNTER0_SELECT1, }; static unsigned cik_CPC_counters[] = { R_034018_CPC_PERFCOUNTER0_LO, R_034010_CPC_PERFCOUNTER1_LO, }; static struct ac_pc_block_base cik_CPC = { .gpu_block = CPC, .name = "CPC", .num_counters = 2, .select0 = cik_CPC_select0, .select1 = cik_CPC_select1, .counters = cik_CPC_counters, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0x1, }; /* cik_CPF */ static unsigned cik_CPF_select0[] = { R_03601C_CPF_PERFCOUNTER0_SELECT, R_036014_CPF_PERFCOUNTER1_SELECT, }; static unsigned cik_CPF_select1[] = { R_036018_CPF_PERFCOUNTER0_SELECT1, }; static unsigned cik_CPF_counters[] = { R_034028_CPF_PERFCOUNTER0_LO, R_034020_CPF_PERFCOUNTER1_LO, }; static struct ac_pc_block_base cik_CPF = { .gpu_block = CPF, .name = "CPF", .num_counters = 2, .select0 = cik_CPF_select0, .select1 = cik_CPF_select1, .counters = cik_CPF_counters, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0x2, }; /* cik_CPG */ static unsigned cik_CPG_select0[] = { R_036008_CPG_PERFCOUNTER0_SELECT, R_036000_CPG_PERFCOUNTER1_SELECT, }; static unsigned cik_CPG_select1[] = { R_036004_CPG_PERFCOUNTER0_SELECT1 }; static unsigned cik_CPG_counters[] = { R_034008_CPG_PERFCOUNTER0_LO, R_034000_CPG_PERFCOUNTER1_LO, }; static struct ac_pc_block_base cik_CPG = { .gpu_block = CPG, .name = "CPG", .num_counters = 2, .select0 = cik_CPG_select0, .select1 = cik_CPG_select1, .counters = cik_CPG_counters, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0x0, }; /* cik_DB */ static unsigned cik_DB_select0[] = { R_037100_DB_PERFCOUNTER0_SELECT, R_037108_DB_PERFCOUNTER1_SELECT, R_037110_DB_PERFCOUNTER2_SELECT, R_037118_DB_PERFCOUNTER3_SELECT, }; static unsigned cik_DB_select1[] = { R_037104_DB_PERFCOUNTER0_SELECT1, R_03710C_DB_PERFCOUNTER1_SELECT1, }; static struct ac_pc_block_base cik_DB = { .gpu_block = DB, .name = "DB", .num_counters = 4, .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS, .select0 = cik_DB_select0, .select1 = cik_DB_select1, .counter0_lo = R_035100_DB_PERFCOUNTER0_LO, .num_spm_counters = 2, .num_spm_wires = 3, .spm_block_select = 0x1, }; /* cik_GDS */ static unsigned cik_GDS_select0[] = { R_036A00_GDS_PERFCOUNTER0_SELECT, R_036A04_GDS_PERFCOUNTER1_SELECT, R_036A08_GDS_PERFCOUNTER2_SELECT, R_036A0C_GDS_PERFCOUNTER3_SELECT, }; static unsigned cik_GDS_select1[] = { R_036A10_GDS_PERFCOUNTER0_SELECT1, }; static struct ac_pc_block_base cik_GDS = { .gpu_block = GDS, .name = "GDS", .num_counters = 4, .select0 = cik_GDS_select0, .select1 = cik_GDS_select1, .counter0_lo = R_034A00_GDS_PERFCOUNTER0_LO, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0x3, }; /* cik_GRBM */ static unsigned cik_GRBM_select0[] = { R_036100_GRBM_PERFCOUNTER0_SELECT, R_036104_GRBM_PERFCOUNTER1_SELECT, }; static unsigned cik_GRBM_counters[] = { R_034100_GRBM_PERFCOUNTER0_LO, R_03410C_GRBM_PERFCOUNTER1_LO, }; static struct ac_pc_block_base cik_GRBM = { .gpu_block = GRBM, .name = "GRBM", .num_counters = 2, .select0 = cik_GRBM_select0, .counters = cik_GRBM_counters, }; /* cik_GRBMSE */ static unsigned cik_GRBMSE_select0[] = { R_036108_GRBM_SE0_PERFCOUNTER_SELECT, R_03610C_GRBM_SE1_PERFCOUNTER_SELECT, R_036110_GRBM_SE2_PERFCOUNTER_SELECT, R_036114_GRBM_SE3_PERFCOUNTER_SELECT, }; static struct ac_pc_block_base cik_GRBMSE = { .gpu_block = GRBMSE, .name = "GRBMSE", .num_counters = 4, .select0 = cik_GRBMSE_select0, .counter0_lo = R_034114_GRBM_SE0_PERFCOUNTER_LO, }; /* cik_IA */ static unsigned cik_IA_select0[] = { R_036210_IA_PERFCOUNTER0_SELECT, R_036214_IA_PERFCOUNTER1_SELECT, R_036218_IA_PERFCOUNTER2_SELECT, R_03621C_IA_PERFCOUNTER3_SELECT, }; static unsigned cik_IA_select1[] = { R_036220_IA_PERFCOUNTER0_SELECT1, }; static struct ac_pc_block_base cik_IA = { .gpu_block = IA, .name = "IA", .num_counters = 4, .select0 = cik_IA_select0, .select1 = cik_IA_select1, .counter0_lo = R_034220_IA_PERFCOUNTER0_LO, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0x6, }; /* cik_PA_SC */ static unsigned cik_PA_SC_select0[] = { R_036500_PA_SC_PERFCOUNTER0_SELECT, R_036508_PA_SC_PERFCOUNTER1_SELECT, R_03650C_PA_SC_PERFCOUNTER2_SELECT, R_036510_PA_SC_PERFCOUNTER3_SELECT, R_036514_PA_SC_PERFCOUNTER4_SELECT, R_036518_PA_SC_PERFCOUNTER5_SELECT, R_03651C_PA_SC_PERFCOUNTER6_SELECT, R_036520_PA_SC_PERFCOUNTER7_SELECT, }; static unsigned cik_PA_SC_select1[] = { R_036504_PA_SC_PERFCOUNTER0_SELECT1, }; static struct ac_pc_block_base cik_PA_SC = { .gpu_block = PA_SC, .name = "PA_SC", .num_counters = 8, .flags = AC_PC_BLOCK_SE, .select0 = cik_PA_SC_select0, .select1 = cik_PA_SC_select1, .counter0_lo = R_034500_PA_SC_PERFCOUNTER0_LO, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0x4, }; /* cik_PA_SU */ static unsigned cik_PA_SU_select0[] = { R_036400_PA_SU_PERFCOUNTER0_SELECT, R_036408_PA_SU_PERFCOUNTER1_SELECT, R_036410_PA_SU_PERFCOUNTER2_SELECT, R_036414_PA_SU_PERFCOUNTER3_SELECT, }; static unsigned cik_PA_SU_select1[] = { R_036404_PA_SU_PERFCOUNTER0_SELECT1, R_03640C_PA_SU_PERFCOUNTER1_SELECT1, }; /* According to docs, PA_SU counters are only 48 bits wide. */ static struct ac_pc_block_base cik_PA_SU = { .gpu_block = PA_SU, .name = "PA_SU", .num_counters = 4, .flags = AC_PC_BLOCK_SE, .select0 = cik_PA_SU_select0, .select1 = cik_PA_SU_select1, .counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO, .num_spm_counters = 2, .num_spm_wires = 3, .spm_block_select = 0x2, }; /* cik_SPI */ static unsigned cik_SPI_select0[] = { R_036600_SPI_PERFCOUNTER0_SELECT, R_036604_SPI_PERFCOUNTER1_SELECT, R_036608_SPI_PERFCOUNTER2_SELECT, R_03660C_SPI_PERFCOUNTER3_SELECT, R_036620_SPI_PERFCOUNTER4_SELECT, R_036624_SPI_PERFCOUNTER5_SELECT, }; static unsigned cik_SPI_select1[] = { R_036610_SPI_PERFCOUNTER0_SELECT1, R_036614_SPI_PERFCOUNTER1_SELECT1, R_036618_SPI_PERFCOUNTER2_SELECT1, R_03661C_SPI_PERFCOUNTER3_SELECT1 }; static struct ac_pc_block_base cik_SPI = { .gpu_block = SPI, .name = "SPI", .num_counters = 6, .flags = AC_PC_BLOCK_SE, .select0 = cik_SPI_select0, .select1 = cik_SPI_select1, .counter0_lo = R_034604_SPI_PERFCOUNTER0_LO, .num_spm_counters = 4, .num_spm_wires = 8, .spm_block_select = 0x8, }; /* cik_SQ */ static unsigned cik_SQ_select0[] = { R_036700_SQ_PERFCOUNTER0_SELECT, R_036704_SQ_PERFCOUNTER1_SELECT, R_036708_SQ_PERFCOUNTER2_SELECT, R_03670C_SQ_PERFCOUNTER3_SELECT, R_036710_SQ_PERFCOUNTER4_SELECT, R_036714_SQ_PERFCOUNTER5_SELECT, R_036718_SQ_PERFCOUNTER6_SELECT, R_03671C_SQ_PERFCOUNTER7_SELECT, R_036720_SQ_PERFCOUNTER8_SELECT, R_036724_SQ_PERFCOUNTER9_SELECT, R_036728_SQ_PERFCOUNTER10_SELECT, R_03672C_SQ_PERFCOUNTER11_SELECT, R_036730_SQ_PERFCOUNTER12_SELECT, R_036734_SQ_PERFCOUNTER13_SELECT, R_036738_SQ_PERFCOUNTER14_SELECT, R_03673C_SQ_PERFCOUNTER15_SELECT, }; static struct ac_pc_block_base cik_SQ = { .gpu_block = SQ, .name = "SQ", .num_counters = 16, .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER, .select0 = cik_SQ_select0, .select_or = S_036700_SQC_BANK_MASK(15) | S_036700_SQC_CLIENT_MASK(15) | S_036700_SIMD_MASK(15), .counter0_lo = R_034700_SQ_PERFCOUNTER0_LO, .num_spm_wires = 8, .spm_block_select = 0x9, }; /* cik_SX */ static unsigned cik_SX_select0[] = { R_036900_SX_PERFCOUNTER0_SELECT, R_036904_SX_PERFCOUNTER1_SELECT, R_036908_SX_PERFCOUNTER2_SELECT, R_03690C_SX_PERFCOUNTER3_SELECT, }; static unsigned cik_SX_select1[] = { R_036910_SX_PERFCOUNTER0_SELECT1, R_036914_SX_PERFCOUNTER1_SELECT1, }; static struct ac_pc_block_base cik_SX = { .gpu_block = SX, .name = "SX", .num_counters = 4, .flags = AC_PC_BLOCK_SE, .select0 = cik_SX_select0, .select1 = cik_SX_select1, .counter0_lo = R_034900_SX_PERFCOUNTER0_LO, .num_spm_counters = 2, .num_spm_wires = 4, .spm_block_select = 0x3, }; /* cik_TA */ static unsigned cik_TA_select0[] = { R_036B00_TA_PERFCOUNTER0_SELECT, R_036B08_TA_PERFCOUNTER1_SELECT, }; static unsigned cik_TA_select1[] = { R_036B04_TA_PERFCOUNTER0_SELECT1, }; static struct ac_pc_block_base cik_TA = { .gpu_block = TA, .name = "TA", .num_counters = 2, .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED, .select0 = cik_TA_select0, .select1 = cik_TA_select1, .counter0_lo = R_034B00_TA_PERFCOUNTER0_LO, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0x5, }; /* cik_TD */ static unsigned cik_TD_select0[] = { R_036C00_TD_PERFCOUNTER0_SELECT, R_036C08_TD_PERFCOUNTER1_SELECT, }; static unsigned cik_TD_select1[] = { R_036C04_TD_PERFCOUNTER0_SELECT1, }; static struct ac_pc_block_base cik_TD = { .gpu_block = TD, .name = "TD", .num_counters = 2, .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED, .select0 = cik_TD_select0, .select1 = cik_TD_select1, .counter0_lo = R_034C00_TD_PERFCOUNTER0_LO, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0x6, }; /* cik_TCA */ static unsigned cik_TCA_select0[] = { R_036E40_TCA_PERFCOUNTER0_SELECT, R_036E48_TCA_PERFCOUNTER1_SELECT, R_036E50_TCA_PERFCOUNTER2_SELECT, R_036E54_TCA_PERFCOUNTER3_SELECT, }; static unsigned cik_TCA_select1[] = { R_036E44_TCA_PERFCOUNTER0_SELECT1, R_036E4C_TCA_PERFCOUNTER1_SELECT1, }; static struct ac_pc_block_base cik_TCA = { .gpu_block = TCA, .name = "TCA", .num_counters = 4, .flags = AC_PC_BLOCK_INSTANCE_GROUPS, .select0 = cik_TCA_select0, .select1 = cik_TCA_select1, .counter0_lo = R_034E40_TCA_PERFCOUNTER0_LO, .num_spm_counters = 2, .num_spm_wires = 4, .spm_block_select = 0x5, }; /* cik_TCC */ static unsigned cik_TCC_select0[] = { R_036E00_TCC_PERFCOUNTER0_SELECT, R_036E08_TCC_PERFCOUNTER1_SELECT, R_036E10_TCC_PERFCOUNTER2_SELECT, R_036E14_TCC_PERFCOUNTER3_SELECT, }; static unsigned cik_TCC_select1[] = { R_036E04_TCC_PERFCOUNTER0_SELECT1, R_036E0C_TCC_PERFCOUNTER1_SELECT1, }; static struct ac_pc_block_base cik_TCC = { .gpu_block = TCC, .name = "TCC", .num_counters = 4, .flags = AC_PC_BLOCK_INSTANCE_GROUPS, .select0 = cik_TCC_select0, .select1 = cik_TCC_select1, .counter0_lo = R_034E00_TCC_PERFCOUNTER0_LO, .num_spm_counters = 2, .num_spm_wires = 4, .spm_block_select = 0x4, }; /* cik_TCP */ static unsigned cik_TCP_select0[] = { R_036D00_TCP_PERFCOUNTER0_SELECT, R_036D08_TCP_PERFCOUNTER1_SELECT, R_036D10_TCP_PERFCOUNTER2_SELECT, R_036D14_TCP_PERFCOUNTER3_SELECT, }; static unsigned cik_TCP_select1[] = { R_036D04_TCP_PERFCOUNTER0_SELECT1, R_036D0C_TCP_PERFCOUNTER1_SELECT1, }; static struct ac_pc_block_base cik_TCP = { .gpu_block = TCP, .name = "TCP", .num_counters = 4, .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED, .select0 = cik_TCP_select0, .select1 = cik_TCP_select1, .counter0_lo = R_034D00_TCP_PERFCOUNTER0_LO, .num_spm_counters = 2, .num_spm_wires = 3, .spm_block_select = 0x7, }; /* cik_VGT */ static unsigned cik_VGT_select0[] = { R_036230_VGT_PERFCOUNTER0_SELECT, R_036234_VGT_PERFCOUNTER1_SELECT, R_036238_VGT_PERFCOUNTER2_SELECT, R_03623C_VGT_PERFCOUNTER3_SELECT, }; static unsigned cik_VGT_select1[] = { R_036240_VGT_PERFCOUNTER0_SELECT1, R_036244_VGT_PERFCOUNTER1_SELECT1, }; static struct ac_pc_block_base cik_VGT = { .gpu_block = VGT, .name = "VGT", .num_counters = 4, .flags = AC_PC_BLOCK_SE, .select0 = cik_VGT_select0, .select1 = cik_VGT_select1, .counter0_lo = R_034240_VGT_PERFCOUNTER0_LO, .num_spm_counters = 2, .num_spm_wires = 3, .spm_block_select = 0xa, }; /* cik_WD */ static unsigned cik_WD_select0[] = { R_036200_WD_PERFCOUNTER0_SELECT, R_036204_WD_PERFCOUNTER1_SELECT, R_036208_WD_PERFCOUNTER2_SELECT, R_03620C_WD_PERFCOUNTER3_SELECT, }; static struct ac_pc_block_base cik_WD = { .gpu_block = WD, .name = "WD", .num_counters = 4, .select0 = cik_WD_select0, .counter0_lo = R_034200_WD_PERFCOUNTER0_LO, }; /* cik_MC */ static struct ac_pc_block_base cik_MC = { .gpu_block = MC, .name = "MC", .num_counters = 4, }; /* cik_SRBM */ static struct ac_pc_block_base cik_SRBM = { .gpu_block = SRBM, .name = "SRBM", .num_counters = 2, }; /* gfx10_CHA */ static unsigned gfx10_CHA_select0[] = { R_037780_CHA_PERFCOUNTER0_SELECT, R_037788_CHA_PERFCOUNTER1_SELECT, R_03778C_CHA_PERFCOUNTER2_SELECT, R_037790_CHA_PERFCOUNTER3_SELECT, }; static unsigned gfx10_CHA_select1[] = { R_037784_CHA_PERFCOUNTER0_SELECT1, }; static struct ac_pc_block_base gfx10_CHA = { .gpu_block = CHA, .name = "CHA", .num_counters = 4, .select0 = gfx10_CHA_select0, .select1 = gfx10_CHA_select1, .counter0_lo = R_035800_CHA_PERFCOUNTER0_LO, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0xc, }; /* gfx10_CHCG */ static unsigned gfx10_CHCG_select0[] = { R_036F18_CHCG_PERFCOUNTER0_SELECT, R_036F20_CHCG_PERFCOUNTER1_SELECT, R_036F24_CHCG_PERFCOUNTER2_SELECT, R_036F28_CHCG_PERFCOUNTER3_SELECT, }; static unsigned gfx10_CHCG_select1[] = { R_036F1C_CHCG_PERFCOUNTER0_SELECT1, }; static struct ac_pc_block_base gfx10_CHCG = { .gpu_block = CHCG, .name = "CHCG", .num_counters = 4, .select0 = gfx10_CHCG_select0, .select1 = gfx10_CHCG_select1, .counter0_lo = R_034F20_CHCG_PERFCOUNTER0_LO, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0xe, }; /* gfx10_CHC */ static unsigned gfx10_CHC_select0[] = { R_036F00_CHC_PERFCOUNTER0_SELECT, R_036F08_CHC_PERFCOUNTER1_SELECT, R_036F0C_CHC_PERFCOUNTER2_SELECT, R_036F10_CHC_PERFCOUNTER3_SELECT, }; static unsigned gfx10_CHC_select1[] = { R_036F04_CHC_PERFCOUNTER0_SELECT1, }; static struct ac_pc_block_base gfx10_CHC = { .gpu_block = CHC, .name = "CHC", .num_counters = 4, .select0 = gfx10_CHC_select0, .select1 = gfx10_CHC_select1, .counter0_lo = R_034F00_CHC_PERFCOUNTER0_LO, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0xd, }; /* gfx10_DB */ static struct ac_pc_block_base gfx10_DB = { .gpu_block = DB, .name = "DB", .num_counters = 4, .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS, .select0 = cik_DB_select0, .select1 = cik_DB_select1, .counter0_lo = R_035100_DB_PERFCOUNTER0_LO, .num_spm_counters = 2, .num_spm_wires = 4, .spm_block_select = 0x1, }; /* gfx10_GCR */ static unsigned gfx10_GCR_select0[] = { R_037580_GCR_PERFCOUNTER0_SELECT, R_037588_GCR_PERFCOUNTER1_SELECT, }; static unsigned gfx10_GCR_select1[] = { R_037584_GCR_PERFCOUNTER0_SELECT1, }; static struct ac_pc_block_base gfx10_GCR = { .gpu_block = GCR, .name = "GCR", .num_counters = 2, .select0 = gfx10_GCR_select0, .select1 = gfx10_GCR_select1, .counter0_lo = R_035480_GCR_PERFCOUNTER0_LO, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0x4, }; /* gfx10_GE */ static unsigned gfx10_GE_select0[] = { R_036200_GE_PERFCOUNTER0_SELECT, R_036208_GE_PERFCOUNTER1_SELECT, R_036210_GE_PERFCOUNTER2_SELECT, R_036218_GE_PERFCOUNTER3_SELECT, R_036220_GE_PERFCOUNTER4_SELECT, R_036228_GE_PERFCOUNTER5_SELECT, R_036230_GE_PERFCOUNTER6_SELECT, R_036238_GE_PERFCOUNTER7_SELECT, R_036240_GE_PERFCOUNTER8_SELECT, R_036248_GE_PERFCOUNTER9_SELECT, R_036250_GE_PERFCOUNTER10_SELECT, R_036258_GE_PERFCOUNTER11_SELECT, }; static unsigned gfx10_GE_select1[] = { R_036204_GE_PERFCOUNTER0_SELECT1, R_03620C_GE_PERFCOUNTER1_SELECT1, R_036214_GE_PERFCOUNTER2_SELECT1, R_03621C_GE_PERFCOUNTER3_SELECT1, }; static struct ac_pc_block_base gfx10_GE = { .gpu_block = GE, .name = "GE", .num_counters = 12, .select0 = gfx10_GE_select0, .select1 = gfx10_GE_select1, .counter0_lo = R_034200_GE_PERFCOUNTER0_LO, .num_spm_counters = 4, .num_spm_wires = 8, .spm_block_select = 0x6, }; /* gfx10_GL1A */ static unsigned gfx10_GL1A_select0[] = { R_037700_GL1A_PERFCOUNTER0_SELECT, R_037708_GL1A_PERFCOUNTER1_SELECT, R_03770C_GL1A_PERFCOUNTER2_SELECT, R_037710_GL1A_PERFCOUNTER3_SELECT, }; static unsigned gfx10_GL1A_select1[] = { R_037704_GL1A_PERFCOUNTER0_SELECT1, }; static struct ac_pc_block_base gfx10_GL1A = { .gpu_block = GL1A, .name = "GL1A", .num_counters = 4, .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED, .select0 = gfx10_GL1A_select0, .select1 = gfx10_GL1A_select1, .counter0_lo = R_035700_GL1A_PERFCOUNTER0_LO, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0xa, }; /* gfx10_GL1C */ static unsigned gfx10_GL1C_select0[] = { R_036E80_GL1C_PERFCOUNTER0_SELECT, R_036E88_GL1C_PERFCOUNTER1_SELECT, R_036E8C_GL1C_PERFCOUNTER2_SELECT, R_036E90_GL1C_PERFCOUNTER3_SELECT, }; static unsigned gfx10_GL1C_select1[] = { R_036E84_GL1C_PERFCOUNTER0_SELECT1, }; static struct ac_pc_block_base gfx10_GL1C = { .gpu_block = GL1C, .name = "GL1C", .num_counters = 4, .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED, .select0 = gfx10_GL1C_select0, .select1 = gfx10_GL1C_select1, .counter0_lo = R_034E80_GL1C_PERFCOUNTER0_LO, .num_spm_counters = 1, .num_spm_wires = 2, .spm_block_select = 0xc }; /* gfx10_GL2A */ static unsigned gfx10_GL2A_select0[] = { R_036E40_GL2A_PERFCOUNTER0_SELECT, R_036E48_GL2A_PERFCOUNTER1_SELECT, R_036E50_GL2A_PERFCOUNTER2_SELECT, R_036E54_GL2A_PERFCOUNTER3_SELECT, }; static unsigned gfx10_GL2A_select1[] = { R_036E44_GL2A_PERFCOUNTER0_SELECT1, R_036E4C_GL2A_PERFCOUNTER1_SELECT1, }; static struct ac_pc_block_base gfx10_GL2A = { .gpu_block = GL2A, .name = "GL2A", .num_counters = 4, .select0 = gfx10_GL2A_select0, .select1 = gfx10_GL2A_select1, .counter0_lo = R_034E40_GL2A_PERFCOUNTER0_LO, .num_spm_counters = 2, .num_spm_wires = 4, .spm_block_select = 0x7, }; /* gfx10_GL2C */ static unsigned gfx10_GL2C_select0[] = { R_036E00_GL2C_PERFCOUNTER0_SELECT, R_036E08_GL2C_PERFCOUNTER1_SELECT, R_036E10_GL2C_PERFCOUNTER2_SELECT, R_036E14_GL2C_PERFCOUNTER3_SELECT, }; static unsigned gfx10_GL2C_select1[] = { R_036E04_GL2C_PERFCOUNTER0_SELECT1, R_036E0C_GL2C_PERFCOUNTER1_SELECT1, }; static struct ac_pc_block_base gfx10_GL2C = { .gpu_block = GL2C, .name = "GL2C", .num_counters = 4, .select0 = gfx10_GL2C_select0, .select1 = gfx10_GL2C_select1, .counter0_lo = R_034E00_GL2C_PERFCOUNTER0_LO, .num_spm_counters = 2, .num_spm_wires = 4, .spm_block_select = 0x8, }; /* gfx10_PA_PH */ static unsigned gfx10_PA_PH_select0[] = { R_037600_PA_PH_PERFCOUNTER0_SELECT, R_037608_PA_PH_PERFCOUNTER1_SELECT, R_03760C_PA_PH_PERFCOUNTER2_SELECT, R_037610_PA_PH_PERFCOUNTER3_SELECT, R_037614_PA_PH_PERFCOUNTER4_SELECT, R_037618_PA_PH_PERFCOUNTER5_SELECT, R_03761C_PA_PH_PERFCOUNTER6_SELECT, R_037620_PA_PH_PERFCOUNTER7_SELECT, }; static unsigned gfx10_PA_PH_select1[] = { R_037604_PA_PH_PERFCOUNTER0_SELECT1, R_037640_PA_PH_PERFCOUNTER1_SELECT1, R_037644_PA_PH_PERFCOUNTER2_SELECT1, R_037648_PA_PH_PERFCOUNTER3_SELECT1, }; static struct ac_pc_block_base gfx10_PA_PH = { .gpu_block = PA_PH, .name = "PA_PH", .num_counters = 8, .flags = AC_PC_BLOCK_SE, .select0 = gfx10_PA_PH_select0, .select1 = gfx10_PA_PH_select1, .counter0_lo = R_035600_PA_PH_PERFCOUNTER0_LO, .num_spm_counters = 4, .num_spm_wires = 8, .spm_block_select = 0x5, }; /* gfx10_PA_SU */ static unsigned gfx10_PA_SU_select0[] = { R_036400_PA_SU_PERFCOUNTER0_SELECT, R_036408_PA_SU_PERFCOUNTER1_SELECT, R_036410_PA_SU_PERFCOUNTER2_SELECT, R_036418_PA_SU_PERFCOUNTER3_SELECT, }; static unsigned gfx10_PA_SU_select1[] = { R_036404_PA_SU_PERFCOUNTER0_SELECT1, R_03640C_PA_SU_PERFCOUNTER1_SELECT1, R_036414_PA_SU_PERFCOUNTER2_SELECT1, R_03641C_PA_SU_PERFCOUNTER3_SELECT1, }; static struct ac_pc_block_base gfx10_PA_SU = { .gpu_block = PA_SU, .name = "PA_SU", .num_counters = 4, .flags = AC_PC_BLOCK_SE, .select0 = gfx10_PA_SU_select0, .select1 = gfx10_PA_SU_select1, .counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO, .num_spm_counters = 4, .num_spm_wires = 8, .spm_block_select = 0x2, }; /* gfx10_RLC */ static unsigned gfx10_RLC_select0[] = { R_037304_RLC_PERFCOUNTER0_SELECT, R_037308_RLC_PERFCOUNTER1_SELECT, }; static struct ac_pc_block_base gfx10_RLC = { .gpu_block = RLC, .name = "RLC", .num_counters = 2, .select0 = gfx10_RLC_select0, .counter0_lo = R_035200_RLC_PERFCOUNTER0_LO, .num_spm_counters = 0, }; /* gfx10_RMI */ static unsigned gfx10_RMI_select0[] = { R_037400_RMI_PERFCOUNTER0_SELECT, R_037408_RMI_PERFCOUNTER1_SELECT, R_03740C_RMI_PERFCOUNTER2_SELECT, R_037414_RMI_PERFCOUNTER3_SELECT, }; static unsigned gfx10_RMI_select1[] = { R_037404_RMI_PERFCOUNTER0_SELECT1, R_037410_RMI_PERFCOUNTER2_SELECT1, }; static struct ac_pc_block_base gfx10_RMI = { .gpu_block = RMI, .name = "RMI", .num_counters = 4, .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS, .select0 = gfx10_RMI_select0, .select1 = gfx10_RMI_select1, .counter0_lo = R_035300_RMI_PERFCOUNTER0_LO, .num_spm_counters = 2, .num_spm_wires = 2, .spm_block_select = 0xb, }; /* gfx10_SQ */ static struct ac_pc_block_base gfx10_SQ = { .gpu_block = SQ, .name = "SQ", .num_counters = 16, .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER, .select0 = cik_SQ_select0, .select_or = S_036700_SQC_BANK_MASK(15), .counter0_lo = R_034700_SQ_PERFCOUNTER0_LO, .num_spm_wires = 16, .spm_block_select = 0x9, }; /* gfx10_TCP */ static struct ac_pc_block_base gfx10_TCP = { .gpu_block = TCP, .name = "TCP", .num_counters = 4, .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED, .select0 = cik_TCP_select0, .select1 = cik_TCP_select1, .counter0_lo = R_034D00_TCP_PERFCOUNTER0_LO, .num_spm_counters = 2, .num_spm_wires = 4, .spm_block_select = 0x7, }; /* gfx10_UTCL1 */ static unsigned gfx10_UTCL1_select0[] = { R_03758C_UTCL1_PERFCOUNTER0_SELECT, R_037590_UTCL1_PERFCOUNTER1_SELECT, }; static struct ac_pc_block_base gfx10_UTCL1 = { .gpu_block = UTCL1, .name = "UTCL1", .num_counters = 2, .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED, .select0 = gfx10_UTCL1_select0, .counter0_lo = R_035470_UTCL1_PERFCOUNTER0_LO, .num_spm_counters = 0, }; /* Both the number of instances and selectors varies between chips of the same * class. We only differentiate by class here and simply expose the maximum * number over all chips in a class. * * Unfortunately, GPUPerfStudio uses the order of performance counter groups * blindly once it believes it has identified the hardware, so the order of * blocks here matters. */ static struct ac_pc_block_gfxdescr groups_CIK[] = { {&cik_CB, 226}, {&cik_CPF, 17}, {&cik_DB, 257}, {&cik_GRBM, 34}, {&cik_GRBMSE, 15}, {&cik_PA_SU, 153}, {&cik_PA_SC, 395}, {&cik_SPI, 186}, {&cik_SQ, 252}, {&cik_SX, 32}, {&cik_TA, 111}, {&cik_TCA, 39, 2}, {&cik_TCC, 160}, {&cik_TD, 55}, {&cik_TCP, 154}, {&cik_GDS, 121}, {&cik_VGT, 140}, {&cik_IA, 22}, {&cik_MC, 22}, {&cik_SRBM, 19}, {&cik_WD, 22}, {&cik_CPG, 46}, {&cik_CPC, 22}, }; static struct ac_pc_block_gfxdescr groups_VI[] = { {&cik_CB, 405}, {&cik_CPF, 19}, {&cik_DB, 257}, {&cik_GRBM, 34}, {&cik_GRBMSE, 15}, {&cik_PA_SU, 154}, {&cik_PA_SC, 397}, {&cik_SPI, 197}, {&cik_SQ, 273}, {&cik_SX, 34}, {&cik_TA, 119}, {&cik_TCA, 35, 2}, {&cik_TCC, 192}, {&cik_TD, 55}, {&cik_TCP, 180}, {&cik_GDS, 121}, {&cik_VGT, 147}, {&cik_IA, 24}, {&cik_MC, 22}, {&cik_SRBM, 27}, {&cik_WD, 37}, {&cik_CPG, 48}, {&cik_CPC, 24}, }; static struct ac_pc_block_gfxdescr groups_gfx9[] = { {&cik_CB, 438}, {&cik_CPF, 32}, {&cik_DB, 328}, {&cik_GRBM, 38}, {&cik_GRBMSE, 16}, {&cik_PA_SU, 292}, {&cik_PA_SC, 491}, {&cik_SPI, 196}, {&cik_SQ, 374}, {&cik_SX, 208}, {&cik_TA, 119}, {&cik_TCA, 35, 2}, {&cik_TCC, 256}, {&cik_TD, 57}, {&cik_TCP, 85}, {&cik_GDS, 121}, {&cik_VGT, 148}, {&cik_IA, 32}, {&cik_WD, 58}, {&cik_CPG, 59}, {&cik_CPC, 35}, }; static struct ac_pc_block_gfxdescr groups_gfx10[] = { {&cik_CB, 461}, {&gfx10_CHA, 45}, {&gfx10_CHCG, 35}, {&gfx10_CHC, 35}, {&cik_CPC, 47}, {&cik_CPF, 40}, {&cik_CPG, 82}, {&gfx10_DB, 370}, {&gfx10_GCR, 94}, {&cik_GDS, 123}, {&gfx10_GE, 315}, {&gfx10_GL1A, 36}, {&gfx10_GL1C, 64}, {&gfx10_GL2A, 91}, {&gfx10_GL2C, 235}, {&cik_GRBM, 47}, {&cik_GRBMSE, 19}, {&gfx10_PA_PH, 960}, {&cik_PA_SC, 552}, {&gfx10_PA_SU, 266}, {&gfx10_RLC, 7}, {&gfx10_RMI, 258}, {&cik_SPI, 329}, {&gfx10_SQ, 509}, {&cik_SX, 225}, {&cik_TA, 226}, {&gfx10_TCP, 77}, {&cik_TD, 61}, {&gfx10_UTCL1, 15}, }; struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc, unsigned index, unsigned *base_gid, unsigned *sub_index) { struct ac_pc_block *block = pc->blocks; unsigned bid; *base_gid = 0; for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { unsigned total = block->num_groups * block->b->selectors; if (index < total) { *sub_index = index; return block; } index -= total; *base_gid += block->num_groups; } return NULL; } struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc, unsigned *index) { unsigned bid; struct ac_pc_block *block = pc->blocks; for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { if (*index < block->num_groups) return block; *index -= block->num_groups; } return NULL; } bool ac_init_block_names(const struct radeon_info *info, const struct ac_perfcounters *pc, struct ac_pc_block *block) { bool per_instance_groups = ac_pc_block_has_per_instance_groups(pc, block); bool per_se_groups = ac_pc_block_has_per_se_groups(pc, block); unsigned i, j, k; unsigned groups_shader = 1, groups_se = 1, groups_instance = 1; unsigned namelen; char *groupname; char *p; if (per_instance_groups) groups_instance = block->num_instances; if (per_se_groups) groups_se = info->max_se; if (block->b->b->flags & AC_PC_BLOCK_SHADER) groups_shader = ARRAY_SIZE(ac_pc_shader_type_bits); namelen = strlen(block->b->b->name); block->group_name_stride = namelen + 1; if (block->b->b->flags & AC_PC_BLOCK_SHADER) block->group_name_stride += 3; if (per_se_groups) { assert(groups_se <= 10); block->group_name_stride += 1; if (per_instance_groups) block->group_name_stride += 1; } if (per_instance_groups) { assert(groups_instance <= 100); block->group_name_stride += 2; } block->group_names = MALLOC(block->num_groups * block->group_name_stride); if (!block->group_names) return false; groupname = block->group_names; for (i = 0; i < groups_shader; ++i) { const char *shader_suffix = ac_pc_shader_type_suffixes[i]; unsigned shaderlen = strlen(shader_suffix); for (j = 0; j < groups_se; ++j) { for (k = 0; k < groups_instance; ++k) { strcpy(groupname, block->b->b->name); p = groupname + namelen; if (block->b->b->flags & AC_PC_BLOCK_SHADER) { strcpy(p, shader_suffix); p += shaderlen; } if (per_se_groups) { p += sprintf(p, "%d", j); if (per_instance_groups) *p++ = '_'; } if (per_instance_groups) p += sprintf(p, "%d", k); groupname += block->group_name_stride; } } } assert(block->b->selectors <= 1000); block->selector_name_stride = block->group_name_stride + 4; block->selector_names = MALLOC(block->num_groups * block->b->selectors * block->selector_name_stride); if (!block->selector_names) return false; groupname = block->group_names; p = block->selector_names; for (i = 0; i < block->num_groups; ++i) { for (j = 0; j < block->b->selectors; ++j) { sprintf(p, "%s_%03d", groupname, j); p += block->selector_name_stride; } groupname += block->group_name_stride; } return true; } bool ac_init_perfcounters(const struct radeon_info *info, bool separate_se, bool separate_instance, struct ac_perfcounters *pc) { const struct ac_pc_block_gfxdescr *blocks; unsigned num_blocks; switch (info->chip_class) { case GFX7: blocks = groups_CIK; num_blocks = ARRAY_SIZE(groups_CIK); break; case GFX8: blocks = groups_VI; num_blocks = ARRAY_SIZE(groups_VI); break; case GFX9: blocks = groups_gfx9; num_blocks = ARRAY_SIZE(groups_gfx9); break; case GFX10: case GFX10_3: blocks = groups_gfx10; num_blocks = ARRAY_SIZE(groups_gfx10); break; case GFX6: default: return false; /* not implemented */ } pc->separate_se = separate_se; pc->separate_instance = separate_instance; pc->blocks = CALLOC(num_blocks, sizeof(struct ac_pc_block)); if (!pc->blocks) return false; pc->num_blocks = num_blocks; for (unsigned i = 0; i < num_blocks; i++) { struct ac_pc_block *block = &pc->blocks[i]; block->b = &blocks[i]; block->num_instances = MAX2(1, block->b->instances); if (!strcmp(block->b->b->name, "CB") || !strcmp(block->b->b->name, "DB") || !strcmp(block->b->b->name, "RMI")) block->num_instances = info->max_se; else if (!strcmp(block->b->b->name, "TCC")) block->num_instances = info->max_tcc_blocks; else if (!strcmp(block->b->b->name, "IA")) block->num_instances = MAX2(1, info->max_se / 2); else if (!strcmp(block->b->b->name, "TA") || !strcmp(block->b->b->name, "TCP") || !strcmp(block->b->b->name, "TD")) { block->num_instances = MAX2(1, info->max_good_cu_per_sa); } if (ac_pc_block_has_per_instance_groups(pc, block)) { block->num_groups = block->num_instances; } else { block->num_groups = 1; } if (ac_pc_block_has_per_se_groups(pc, block)) block->num_groups *= info->max_se; if (block->b->b->flags & AC_PC_BLOCK_SHADER) block->num_groups *= ARRAY_SIZE(ac_pc_shader_type_bits); pc->num_groups += block->num_groups; } return true; } void ac_destroy_perfcounters(struct ac_perfcounters *pc) { if (!pc) return; for (unsigned i = 0; i < pc->num_blocks; ++i) { FREE(pc->blocks[i].group_names); FREE(pc->blocks[i].selector_names); } FREE(pc->blocks); }